diff options
Diffstat (limited to 'drivers')
404 files changed, 89717 insertions, 76777 deletions
diff --git a/drivers/SCsub b/drivers/SCsub index 3028139f50..bc46bf2cec 100644 --- a/drivers/SCsub +++ b/drivers/SCsub @@ -31,15 +31,17 @@ SConscript("rtaudio/SCsub"); SConscript("nedmalloc/SCsub"); SConscript("nrex/SCsub"); SConscript("chibi/SCsub"); -if (env["vorbis"]=="yes" or env["speex"]=="yes" or env["theora"]=="yes"): +if (env["vorbis"]=="yes" or env["speex"]=="yes" or env["theora"]=="yes" or env["opus"]=="yes"): SConscript("ogg/SCsub"); if (env["vorbis"]=="yes"): SConscript("vorbis/SCsub"); +if (env["opus"]=="yes"): + SConscript('opus/SCsub'); if (env["tools"]=="yes"): SConscript("convex_decomp/SCsub"); -if env["theora"]=="yes": - SConscript("theoraplayer/SCsub") +#if env["theora"]=="yes": +# SConscript("theoraplayer/SCsub") if (env["theora"]=="yes"): SConscript("theora/SCsub"); if (env['speex']=='yes'): diff --git a/drivers/convex_decomp/b2Glue.h b/drivers/convex_decomp/b2Glue.h index db765f7eb9..7ec6d7f181 100644 --- a/drivers/convex_decomp/b2Glue.h +++ b/drivers/convex_decomp/b2Glue.h @@ -20,7 +20,8 @@ #define B2GLUE_H #include "math_2d.h" -#include <limits> +#include <limits.h> + namespace b2ConvexDecomp { typedef real_t float32; diff --git a/drivers/convex_decomp/b2Polygon.cpp b/drivers/convex_decomp/b2Polygon.cpp index 668313967e..775f2adfe2 100644 --- a/drivers/convex_decomp/b2Polygon.cpp +++ b/drivers/convex_decomp/b2Polygon.cpp @@ -21,8 +21,8 @@ #include "b2Triangle.h" #include "b2Polygon.h" -#include <cmath> -#include <climits> +#include <math.h> +#include <limits.h> #include <assert.h> #define b2Assert assert diff --git a/drivers/convex_decomp/b2Polygon.h b/drivers/convex_decomp/b2Polygon.h index 82cdc56804..36af2fd9d0 100644 --- a/drivers/convex_decomp/b2Polygon.h +++ b/drivers/convex_decomp/b2Polygon.h @@ -22,7 +22,7 @@ #include "b2Triangle.h" #include "stdio.h" #include <string.h> -#include <limits> +#include <limits.h> namespace b2ConvexDecomp { static bool B2_POLYGON_REPORT_ERRORS = false; diff --git a/drivers/etc1/rg_etc1.cpp b/drivers/etc1/rg_etc1.cpp index fd109f003c..47dcb57e6b 100644 --- a/drivers/etc1/rg_etc1.cpp +++ b/drivers/etc1/rg_etc1.cpp @@ -1,2454 +1,2454 @@ -// File: rg_etc1.cpp - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
-// Please see ZLIB license at the end of rg_etc1.h.
-//
-// For more information Ericsson Texture Compression (ETC/ETC1), see:
-// http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt
-//
-// v1.03 - 5/12/13 - Initial public release
-#include "rg_etc1.h"
-
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-//#include <stdio.h>
-#include <math.h>
-#include <stdio.h>
-#pragma warning (disable: 4201) // nonstandard extension used : nameless struct/union
-
-#if defined(_DEBUG) || defined(DEBUG)
-#define RG_ETC1_BUILD_DEBUG
-#endif
-
-#define RG_ETC1_ASSERT assert
-
-namespace rg_etc1
-{
-
- inline long labs(long val) {
- return val < 0 ? -val : val;
- }
-
- inline int intabs(int val) {
-
- return val<0?-val:val;
- }
-
- typedef unsigned char uint8;
- typedef unsigned short uint16;
- typedef unsigned int uint;
- typedef unsigned int uint32;
- typedef long long int64;
- typedef unsigned long long uint64;
-
- const uint32 cUINT32_MAX = 0xFFFFFFFFU;
- const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64;
-
- template<typename T> inline T minimum(T a, T b) { return (a < b) ? a : b; }
- template<typename T> inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); }
- template<typename T> inline T maximum(T a, T b) { return (a > b) ? a : b; }
- template<typename T> inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); }
- template<typename T> inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); }
- template<typename T> inline T square(T value) { return value * value; }
- template<typename T> inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); }
- template<typename T> inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); }
-
- template<class T, size_t N> T decay_array_to_subtype(T (&a)[N]);
-
-#define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X)))
-
- enum eNoClamp { cNoClamp };
-
- struct color_quad_u8
- {
- static inline int clamp(int v) { if (v & 0xFFFFFF00U) v = (~(static_cast<int>(v) >> 31)) & 0xFF; return v; }
-
- struct component_traits { enum { cSigned = false, cFloat = false, cMin = 0U, cMax = 255U }; };
-
- public:
- typedef unsigned char component_t;
- typedef int parameter_t;
-
- enum { cNumComps = 4 };
-
- union
- {
- struct
- {
- component_t r;
- component_t g;
- component_t b;
- component_t a;
- };
-
- component_t c[cNumComps];
-
- uint32 m_u32;
- };
-
- inline color_quad_u8()
- {
- }
-
- inline color_quad_u8(const color_quad_u8& other) : m_u32(other.m_u32)
- {
- }
-
- explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax)
- {
- set(y, alpha);
- }
-
- inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
- {
- set(red, green, blue, alpha);
- }
-
- explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax)
- {
- set_noclamp_y_alpha(y, alpha);
- }
-
- inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
- {
- set_noclamp_rgba(red, green, blue, alpha);
- }
-
- inline void clear()
- {
- m_u32 = 0;
- }
-
- inline color_quad_u8& operator= (const color_quad_u8& other)
- {
- m_u32 = other.m_u32;
- return *this;
- }
-
- inline color_quad_u8& set_rgb(const color_quad_u8& other)
- {
- r = other.r;
- g = other.g;
- b = other.b;
- return *this;
- }
-
- inline color_quad_u8& operator= (parameter_t y)
- {
- set(y, component_traits::cMax);
- return *this;
- }
-
- inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax)
- {
- y = clamp(y);
- alpha = clamp(alpha);
- r = static_cast<component_t>(y);
- g = static_cast<component_t>(y);
- b = static_cast<component_t>(y);
- a = static_cast<component_t>(alpha);
- return *this;
- }
-
- inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax)
- {
- RG_ETC1_ASSERT( (y >= component_traits::cMin) && (y <= component_traits::cMax) );
- RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
-
- r = static_cast<component_t>(y);
- g = static_cast<component_t>(y);
- b = static_cast<component_t>(y);
- a = static_cast<component_t>(alpha);
- return *this;
- }
-
- inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax)
- {
- r = static_cast<component_t>(clamp(red));
- g = static_cast<component_t>(clamp(green));
- b = static_cast<component_t>(clamp(blue));
- a = static_cast<component_t>(clamp(alpha));
- return *this;
- }
-
- inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha)
- {
- RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
- RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
- RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
- RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) );
-
- r = static_cast<component_t>(red);
- g = static_cast<component_t>(green);
- b = static_cast<component_t>(blue);
- a = static_cast<component_t>(alpha);
- return *this;
- }
-
- inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue)
- {
- RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) );
- RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) );
- RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) );
-
- r = static_cast<component_t>(red);
- g = static_cast<component_t>(green);
- b = static_cast<component_t>(blue);
- return *this;
- }
-
- static inline parameter_t get_min_comp() { return component_traits::cMin; }
- static inline parameter_t get_max_comp() { return component_traits::cMax; }
- static inline bool get_comps_are_signed() { return component_traits::cSigned; }
-
- inline component_t operator[] (uint i) const { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
- inline component_t& operator[] (uint i) { RG_ETC1_ASSERT(i < cNumComps); return c[i]; }
-
- inline color_quad_u8& set_component(uint i, parameter_t f)
- {
- RG_ETC1_ASSERT(i < cNumComps);
-
- c[i] = static_cast<component_t>(clamp(f));
-
- return *this;
- }
-
- inline color_quad_u8& set_grayscale(parameter_t l)
- {
- component_t x = static_cast<component_t>(clamp(l));
- c[0] = x;
- c[1] = x;
- c[2] = x;
- return *this;
- }
-
- inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h)
- {
- for (uint i = 0; i < cNumComps; i++)
- c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l[i], h[i]));
- return *this;
- }
-
- inline color_quad_u8& clamp(parameter_t l, parameter_t h)
- {
- for (uint i = 0; i < cNumComps; i++)
- c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l, h));
- return *this;
- }
-
- // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y).
- inline parameter_t get_luma() const
- {
- return static_cast<parameter_t>((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U);
- }
-
- // Returns REC 709 luma.
- inline parameter_t get_luma_rec709() const
- {
- return static_cast<parameter_t>((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U);
- }
-
- inline uint squared_distance_rgb(const color_quad_u8& c) const
- {
- return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b);
- }
-
- inline uint squared_distance_rgba(const color_quad_u8& c) const
- {
- return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a);
- }
-
- inline bool rgb_equals(const color_quad_u8& rhs) const
- {
- return (r == rhs.r) && (g == rhs.g) && (b == rhs.b);
- }
-
- inline bool operator== (const color_quad_u8& rhs) const
- {
- return m_u32 == rhs.m_u32;
- }
-
- color_quad_u8& operator+= (const color_quad_u8& other)
- {
- for (uint i = 0; i < 4; i++)
- c[i] = static_cast<component_t>(clamp(c[i] + other.c[i]));
- return *this;
- }
-
- color_quad_u8& operator-= (const color_quad_u8& other)
- {
- for (uint i = 0; i < 4; i++)
- c[i] = static_cast<component_t>(clamp(c[i] - other.c[i]));
- return *this;
- }
-
- friend color_quad_u8 operator+ (const color_quad_u8& lhs, const color_quad_u8& rhs)
- {
- color_quad_u8 result(lhs);
- result += rhs;
- return result;
- }
-
- friend color_quad_u8 operator- (const color_quad_u8& lhs, const color_quad_u8& rhs)
- {
- color_quad_u8 result(lhs);
- result -= rhs;
- return result;
- }
- }; // class color_quad_u8
-
- struct vec3F
- {
- float m_s[3];
-
- inline vec3F() { }
- inline vec3F(float s) { m_s[0] = s; m_s[1] = s; m_s[2] = s; }
- inline vec3F(float x, float y, float z) { m_s[0] = x; m_s[1] = y; m_s[2] = z; }
-
- inline float operator[] (uint i) const { RG_ETC1_ASSERT(i < 3); return m_s[i]; }
-
- inline vec3F& operator += (const vec3F& other) { for (uint i = 0; i < 3; i++) m_s[i] += other.m_s[i]; return *this; }
-
- inline vec3F& operator *= (float s) { for (uint i = 0; i < 3; i++) m_s[i] *= s; return *this; }
- };
-
- enum etc_constants
- {
- cETC1BytesPerBlock = 8U,
-
- cETC1SelectorBits = 2U,
- cETC1SelectorValues = 1U << cETC1SelectorBits,
- cETC1SelectorMask = cETC1SelectorValues - 1U,
-
- cETC1BlockShift = 2U,
- cETC1BlockSize = 1U << cETC1BlockShift,
-
- cETC1LSBSelectorIndicesBitOffset = 0,
- cETC1MSBSelectorIndicesBitOffset = 16,
-
- cETC1FlipBitOffset = 32,
- cETC1DiffBitOffset = 33,
-
- cETC1IntenModifierNumBits = 3,
- cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits,
- cETC1RightIntenModifierTableBitOffset = 34,
- cETC1LeftIntenModifierTableBitOffset = 37,
-
- // Base+Delta encoding (5 bit bases, 3 bit delta)
- cETC1BaseColorCompNumBits = 5,
- cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits,
-
- cETC1DeltaColorCompNumBits = 3,
- cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits,
- cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits,
-
- cETC1BaseColor5RBitOffset = 59,
- cETC1BaseColor5GBitOffset = 51,
- cETC1BaseColor5BBitOffset = 43,
-
- cETC1DeltaColor3RBitOffset = 56,
- cETC1DeltaColor3GBitOffset = 48,
- cETC1DeltaColor3BBitOffset = 40,
-
- // Absolute (non-delta) encoding (two 4-bit per component bases)
- cETC1AbsColorCompNumBits = 4,
- cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits,
-
- cETC1AbsColor4R1BitOffset = 60,
- cETC1AbsColor4G1BitOffset = 52,
- cETC1AbsColor4B1BitOffset = 44,
-
- cETC1AbsColor4R2BitOffset = 56,
- cETC1AbsColor4G2BitOffset = 48,
- cETC1AbsColor4B2BitOffset = 40,
-
- cETC1ColorDeltaMin = -4,
- cETC1ColorDeltaMax = 3,
-
- // Delta3:
- // 0 1 2 3 4 5 6 7
- // 000 001 010 011 100 101 110 111
- // 0 1 2 3 -4 -3 -2 -1
- };
-
- static uint8 g_quant5_tab[256+16];
-
-
- static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] =
- {
- { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 },
- { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 }
- };
-
- static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 };
- static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 };
-
- // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte.
- static uint16 g_etc1_inverse_lookup[2*8*4][256]; // [diff/inten_table/selector][desired_color]
-
- // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color.
- // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8)
- static const uint16 g_color8_to_etc_block_config_0_255[2][33] =
- {
- { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E,
- 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF },
- { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E,
- 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF },
- };
-
- // Really only [254][11].
- static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] =
- {
- { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E,
- 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, {
- 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306,
- 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112,
- 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707,
- 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B,
- 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605,
- 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF
- }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214,
- 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A,
- 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, {
- 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B,
- 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D,
- 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805,
- 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F,
- 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, {
- 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521,
- 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523,
- 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F,
- 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B,
- 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, {
- 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F,
- 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D,
- 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529,
- 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917,
- 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E,
- 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725,
- 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139,
- 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, {
- 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A,
- 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437,
- 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500,
- 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, {
- 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19,
- 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D,
- 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, {
- 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F,
- 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D,
- 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, {
- 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05,
- 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434,
- 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01,
- 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21,
- 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27,
- 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E,
- 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D,
- 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, {
- 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, {
- 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307,
- 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33,
- 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B,
- 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, {
- 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103,
- 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B,
- 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536,
- 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A,
- 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115,
- 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, {
- 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF
- }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820,
- 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031,
- 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, {
- 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35,
- 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F,
- 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D,
- 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029,
- 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832,
- 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D,
- 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133,
- 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF
- }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, {
- 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331,
- 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D,
- 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513,
- 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF
- }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, {
- 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, {
- 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905,
- 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09,
- 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D,
- 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621,
- 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18,
- 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919,
- 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625,
- 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F,
- 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936,
- 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A,
- 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, {
- 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913,
- 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, {
- 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20,
- 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C,
- 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, {
- 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06,
- 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, {
- 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26,
- 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18,
- 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03,
- 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929,
- 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23,
- 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF
- }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B,
- 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E,
- 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18,
- 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01,
- 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16,
- 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B,
- 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01,
- 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34,
- 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11,
- 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF },
- };
-
- struct etc1_block
- {
- // big endian uint64:
- // bit ofs: 56 48 40 32 24 16 8 0
- // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7
- union
- {
- uint64 m_uint64;
- uint8 m_bytes[8];
- };
-
- uint8 m_low_color[2];
- uint8 m_high_color[2];
-
- enum { cNumSelectorBytes = 4 };
- uint8 m_selectors[cNumSelectorBytes];
-
- inline void clear()
- {
- zero_this(this);
- }
-
- inline uint get_byte_bits(uint ofs, uint num) const
- {
- RG_ETC1_ASSERT((ofs + num) <= 64U);
- RG_ETC1_ASSERT(num && (num <= 8U));
- RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
- const uint byte_ofs = 7 - (ofs >> 3);
- const uint byte_bit_ofs = ofs & 7;
- return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1);
- }
-
- inline void set_byte_bits(uint ofs, uint num, uint bits)
- {
- RG_ETC1_ASSERT((ofs + num) <= 64U);
- RG_ETC1_ASSERT(num && (num < 32U));
- RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3));
- RG_ETC1_ASSERT(bits < (1U << num));
- const uint byte_ofs = 7 - (ofs >> 3);
- const uint byte_bit_ofs = ofs & 7;
- const uint mask = (1 << num) - 1;
- m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs);
- m_bytes[byte_ofs] |= (bits << byte_bit_ofs);
- }
-
- // false = left/right subblocks
- // true = upper/lower subblocks
- inline bool get_flip_bit() const
- {
- return (m_bytes[3] & 1) != 0;
- }
-
- inline void set_flip_bit(bool flip)
- {
- m_bytes[3] &= ~1;
- m_bytes[3] |= static_cast<uint8>(flip);
- }
-
- inline bool get_diff_bit() const
- {
- return (m_bytes[3] & 2) != 0;
- }
-
- inline void set_diff_bit(bool diff)
- {
- m_bytes[3] &= ~2;
- m_bytes[3] |= (static_cast<uint>(diff) << 1);
- }
-
- // Returns intensity modifier table (0-7) used by subblock subblock_id.
- // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2)
- inline uint get_inten_table(uint subblock_id) const
- {
- RG_ETC1_ASSERT(subblock_id < 2);
- const uint ofs = subblock_id ? 2 : 5;
- return (m_bytes[3] >> ofs) & 7;
- }
-
- // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1)
- inline void set_inten_table(uint subblock_id, uint t)
- {
- RG_ETC1_ASSERT(subblock_id < 2);
- RG_ETC1_ASSERT(t < 8);
- const uint ofs = subblock_id ? 2 : 5;
- m_bytes[3] &= ~(7 << ofs);
- m_bytes[3] |= (t << ofs);
- }
-
- // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables.
- inline uint get_selector(uint x, uint y) const
- {
- RG_ETC1_ASSERT((x | y) < 4);
-
- const uint bit_index = x * 4 + y;
- const uint byte_bit_ofs = bit_index & 7;
- const uint8 *p = &m_bytes[7 - (bit_index >> 3)];
- const uint lsb = (p[0] >> byte_bit_ofs) & 1;
- const uint msb = (p[-2] >> byte_bit_ofs) & 1;
- const uint val = lsb | (msb << 1);
-
- return g_etc1_to_selector_index[val];
- }
-
- // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables.
- inline void set_selector(uint x, uint y, uint val)
- {
- RG_ETC1_ASSERT((x | y | val) < 4);
- const uint bit_index = x * 4 + y;
-
- uint8 *p = &m_bytes[7 - (bit_index >> 3)];
-
- const uint byte_bit_ofs = bit_index & 7;
- const uint mask = 1 << byte_bit_ofs;
-
- const uint etc1_val = g_selector_index_to_etc1[val];
-
- const uint lsb = etc1_val & 1;
- const uint msb = etc1_val >> 1;
-
- p[0] &= ~mask;
- p[0] |= (lsb << byte_bit_ofs);
-
- p[-2] &= ~mask;
- p[-2] |= (msb << byte_bit_ofs);
- }
-
- inline void set_base4_color(uint idx, uint16 c)
- {
- if (idx)
- {
- set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15);
- set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15);
- set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15);
- }
- else
- {
- set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15);
- set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15);
- set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15);
- }
- }
-
- inline uint16 get_base4_color(uint idx) const
- {
- uint r, g, b;
- if (idx)
- {
- r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4);
- g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4);
- b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4);
- }
- else
- {
- r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4);
- g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4);
- b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4);
- }
- return static_cast<uint16>(b | (g << 4U) | (r << 8U));
- }
-
- inline void set_base5_color(uint16 c)
- {
- set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31);
- set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31);
- set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31);
- }
-
- inline uint16 get_base5_color() const
- {
- const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5);
- const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5);
- const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5);
- return static_cast<uint16>(b | (g << 5U) | (r << 10U));
- }
-
- void set_delta3_color(uint16 c)
- {
- set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7);
- set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7);
- set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7);
- }
-
- inline uint16 get_delta3_color() const
- {
- const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3);
- const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3);
- const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3);
- return static_cast<uint16>(b | (g << 3U) | (r << 6U));
- }
-
- // Base color 5
- static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U);
- static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U);
-
- static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U);
- static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled);
-
- static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
- static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U);
-
- // Delta color 3
- // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
- static uint16 pack_delta3(int r, int g, int b);
-
- // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax)
- static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3);
-
- // Abs color 4
- static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U);
- static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U);
-
- static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U);
- static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled);
-
- // subblock colors
- static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx);
- static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx);
- static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx);
-
- static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4)
- {
- if (color4)
- {
- dst.r = src.r | (src.r << 4);
- dst.g = src.g | (src.g << 4);
- dst.b = src.b | (src.b << 4);
- }
- else
- {
- dst.r = (src.r >> 2) | (src.r << 3);
- dst.g = (src.g >> 2) | (src.g << 3);
- dst.b = (src.b >> 2) | (src.b << 3);
- }
- dst.a = src.a;
- }
- };
-
- // Returns pointer to sorted array.
- template<typename T, typename Q>
- T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices)
- {
- RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T)));
- RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4));
-
- if (init_indices)
- {
- T* p = pIndices0;
- T* q = pIndices0 + (num_indices >> 1) * 2;
- uint i;
- for (i = 0; p != q; p += 2, i += 2)
- {
- p[0] = static_cast<T>(i);
- p[1] = static_cast<T>(i + 1);
- }
-
- if (num_indices & 1)
- *p = static_cast<T>(i);
- }
-
- uint hist[256 * 4];
-
- memset(hist, 0, sizeof(hist[0]) * 256 * key_size);
-
-#define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs))
-#define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs))
-
- if (key_size == 4)
- {
- T* p = pIndices0;
- T* q = pIndices0 + num_indices;
- for ( ; p != q; p++)
- {
- const uint key = RG_ETC1_GET_KEY(p);
-
- hist[ key & 0xFF]++;
- hist[256 + ((key >> 8) & 0xFF)]++;
- hist[512 + ((key >> 16) & 0xFF)]++;
- hist[768 + ((key >> 24) & 0xFF)]++;
- }
- }
- else if (key_size == 3)
- {
- T* p = pIndices0;
- T* q = pIndices0 + num_indices;
- for ( ; p != q; p++)
- {
- const uint key = RG_ETC1_GET_KEY(p);
-
- hist[ key & 0xFF]++;
- hist[256 + ((key >> 8) & 0xFF)]++;
- hist[512 + ((key >> 16) & 0xFF)]++;
- }
- }
- else if (key_size == 2)
- {
- T* p = pIndices0;
- T* q = pIndices0 + (num_indices >> 1) * 2;
-
- for ( ; p != q; p += 2)
- {
- const uint key0 = RG_ETC1_GET_KEY(p);
- const uint key1 = RG_ETC1_GET_KEY(p+1);
-
- hist[ key0 & 0xFF]++;
- hist[256 + ((key0 >> 8) & 0xFF)]++;
-
- hist[ key1 & 0xFF]++;
- hist[256 + ((key1 >> 8) & 0xFF)]++;
- }
-
- if (num_indices & 1)
- {
- const uint key = RG_ETC1_GET_KEY(p);
-
- hist[ key & 0xFF]++;
- hist[256 + ((key >> 8) & 0xFF)]++;
- }
- }
- else
- {
- RG_ETC1_ASSERT(key_size == 1);
- if (key_size != 1)
- return NULL;
-
- T* p = pIndices0;
- T* q = pIndices0 + (num_indices >> 1) * 2;
-
- for ( ; p != q; p += 2)
- {
- const uint key0 = RG_ETC1_GET_KEY(p);
- const uint key1 = RG_ETC1_GET_KEY(p+1);
-
- hist[key0 & 0xFF]++;
- hist[key1 & 0xFF]++;
- }
-
- if (num_indices & 1)
- {
- const uint key = RG_ETC1_GET_KEY(p);
-
- hist[key & 0xFF]++;
- }
- }
-
- T* pCur = pIndices0;
- T* pNew = pIndices1;
-
- for (uint pass = 0; pass < key_size; pass++)
- {
- const uint* pHist = &hist[pass << 8];
-
- uint offsets[256];
-
- uint cur_ofs = 0;
- for (uint i = 0; i < 256; i += 2)
- {
- offsets[i] = cur_ofs;
- cur_ofs += pHist[i];
-
- offsets[i+1] = cur_ofs;
- cur_ofs += pHist[i+1];
- }
-
- const uint pass_shift = pass << 3;
-
- T* p = pCur;
- T* q = pCur + (num_indices >> 1) * 2;
-
- for ( ; p != q; p += 2)
- {
- uint index0 = p[0];
- uint index1 = p[1];
-
- uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF;
- uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF;
-
- if (c0 == c1)
- {
- uint dst_offset0 = offsets[c0];
-
- offsets[c0] = dst_offset0 + 2;
-
- pNew[dst_offset0] = static_cast<T>(index0);
- pNew[dst_offset0 + 1] = static_cast<T>(index1);
- }
- else
- {
- uint dst_offset0 = offsets[c0]++;
- uint dst_offset1 = offsets[c1]++;
-
- pNew[dst_offset0] = static_cast<T>(index0);
- pNew[dst_offset1] = static_cast<T>(index1);
- }
- }
-
- if (num_indices & 1)
- {
- uint index = *p;
- uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF;
-
- uint dst_offset = offsets[c];
- offsets[c] = dst_offset + 1;
-
- pNew[dst_offset] = static_cast<T>(index);
- }
-
- T* t = pCur;
- pCur = pNew;
- pNew = t;
- }
-
- return pCur;
- }
-
-#undef RG_ETC1_GET_KEY
-#undef RG_ETC1_GET_KEY_FROM_INDEX
-
- uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias)
- {
- return pack_color5(color.r, color.g, color.b, scaled, bias);
- }
-
- uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias)
- {
- if (scaled)
- {
- r = (r * 31U + bias) / 255U;
- g = (g * 31U + bias) / 255U;
- b = (b * 31U + bias) / 255U;
- }
-
- r = rg_etc1::minimum(r, 31U);
- g = rg_etc1::minimum(g, 31U);
- b = rg_etc1::minimum(b, 31U);
-
- return static_cast<uint16>(b | (g << 5U) | (r << 10U));
- }
-
- color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha)
- {
- uint b = packed_color5 & 31U;
- uint g = (packed_color5 >> 5U) & 31U;
- uint r = (packed_color5 >> 10U) & 31U;
-
- if (scaled)
- {
- b = (b << 3U) | (b >> 2U);
- g = (g << 3U) | (g >> 2U);
- r = (r << 3U) | (r >> 2U);
- }
-
- return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
- }
-
- void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled)
- {
- color_quad_u8 c(unpack_color5(packed_color5, scaled, 0));
- r = c.r;
- g = c.g;
- b = c.b;
- }
-
- bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
- {
- int dc_r, dc_g, dc_b;
- unpack_delta3(dc_r, dc_g, dc_b, packed_delta3);
-
- int b = (packed_color5 & 31U) + dc_b;
- int g = ((packed_color5 >> 5U) & 31U) + dc_g;
- int r = ((packed_color5 >> 10U) & 31U) + dc_r;
-
- bool success = true;
- if (static_cast<uint>(r | g | b) > 31U)
- {
- success = false;
- r = rg_etc1::clamp<int>(r, 0, 31);
- g = rg_etc1::clamp<int>(g, 0, 31);
- b = rg_etc1::clamp<int>(b, 0, 31);
- }
-
- if (scaled)
- {
- b = (b << 3U) | (b >> 2U);
- g = (g << 3U) | (g >> 2U);
- r = (r << 3U) | (r >> 2U);
- }
-
- result.set_noclamp_rgba(r, g, b, rg_etc1::minimum(alpha, 255U));
- return success;
- }
-
- bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha)
- {
- color_quad_u8 result;
- const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha);
- r = result.r;
- g = result.g;
- b = result.b;
- return success;
- }
-
- uint16 etc1_block::pack_delta3(int r, int g, int b)
- {
- RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax));
- RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax));
- RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax));
- if (r < 0) r += 8;
- if (g < 0) g += 8;
- if (b < 0) b += 8;
- return static_cast<uint16>(b | (g << 3) | (r << 6));
- }
-
- void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3)
- {
- r = (packed_delta3 >> 6) & 7;
- g = (packed_delta3 >> 3) & 7;
- b = packed_delta3 & 7;
- if (r >= 4) r -= 8;
- if (g >= 4) g -= 8;
- if (b >= 4) b -= 8;
- }
-
- uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias)
- {
- return pack_color4(color.r, color.g, color.b, scaled, bias);
- }
-
- uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias)
- {
- if (scaled)
- {
- r = (r * 15U + bias) / 255U;
- g = (g * 15U + bias) / 255U;
- b = (b * 15U + bias) / 255U;
- }
-
- r = rg_etc1::minimum(r, 15U);
- g = rg_etc1::minimum(g, 15U);
- b = rg_etc1::minimum(b, 15U);
-
- return static_cast<uint16>(b | (g << 4U) | (r << 8U));
- }
-
- color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha)
- {
- uint b = packed_color4 & 15U;
- uint g = (packed_color4 >> 4U) & 15U;
- uint r = (packed_color4 >> 8U) & 15U;
-
- if (scaled)
- {
- b = (b << 4U) | b;
- g = (g << 4U) | g;
- r = (r << 4U) | r;
- }
-
- return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U));
- }
-
- void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled)
- {
- color_quad_u8 c(unpack_color4(packed_color4, scaled, 0));
- r = c.r;
- g = c.g;
- b = c.b;
- }
-
- void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx)
- {
- RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
- const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
-
- uint r, g, b;
- unpack_color5(r, g, b, packed_color5, true);
-
- const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
-
- const int y0 = pInten_modifer_table[0];
- pDst[0].set(ir + y0, ig + y0, ib + y0);
-
- const int y1 = pInten_modifer_table[1];
- pDst[1].set(ir + y1, ig + y1, ib + y1);
-
- const int y2 = pInten_modifer_table[2];
- pDst[2].set(ir + y2, ig + y2, ib + y2);
-
- const int y3 = pInten_modifer_table[3];
- pDst[3].set(ir + y3, ig + y3, ib + y3);
- }
-
- bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx)
- {
- RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
- const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
-
- uint r, g, b;
- bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true);
-
- const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
-
- const int y0 = pInten_modifer_table[0];
- pDst[0].set(ir + y0, ig + y0, ib + y0);
-
- const int y1 = pInten_modifer_table[1];
- pDst[1].set(ir + y1, ig + y1, ib + y1);
-
- const int y2 = pInten_modifer_table[2];
- pDst[2].set(ir + y2, ig + y2, ib + y2);
-
- const int y3 = pInten_modifer_table[3];
- pDst[3].set(ir + y3, ig + y3, ib + y3);
-
- return success;
- }
-
- void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx)
- {
- RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues);
- const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0];
-
- uint r, g, b;
- unpack_color4(r, g, b, packed_color4, true);
-
- const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b);
-
- const int y0 = pInten_modifer_table[0];
- pDst[0].set(ir + y0, ig + y0, ib + y0);
-
- const int y1 = pInten_modifer_table[1];
- pDst[1].set(ir + y1, ig + y1, ib + y1);
-
- const int y2 = pInten_modifer_table[2];
- pDst[2].set(ir + y2, ig + y2, ib + y2);
-
- const int y3 = pInten_modifer_table[3];
- pDst[3].set(ir + y3, ig + y3, ib + y3);
- }
-
- bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha)
- {
- color_quad_u8* pDst = reinterpret_cast<color_quad_u8*>(pDst_pixels_rgba);
- const etc1_block& block = *static_cast<const etc1_block*>(pETC1_block);
-
- const bool diff_flag = block.get_diff_bit();
- const bool flip_flag = block.get_flip_bit();
- const uint table_index0 = block.get_inten_table(0);
- const uint table_index1 = block.get_inten_table(1);
-
- color_quad_u8 subblock_colors0[4];
- color_quad_u8 subblock_colors1[4];
- bool success = true;
-
- if (diff_flag)
- {
- const uint16 base_color5 = block.get_base5_color();
- const uint16 delta_color3 = block.get_delta3_color();
- etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0);
-
- if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1))
- success = false;
- }
- else
- {
- const uint16 base_color4_0 = block.get_base4_color(0);
- etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0);
-
- const uint16 base_color4_1 = block.get_base4_color(1);
- etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1);
- }
-
- if (preserve_alpha)
- {
- if (flip_flag)
- {
- for (uint y = 0; y < 2; y++)
- {
- pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
- pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
- pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]);
- pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]);
- pDst += 4;
- }
-
- for (uint y = 2; y < 4; y++)
- {
- pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]);
- pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]);
- pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
- pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
- pDst += 4;
- }
- }
- else
- {
- for (uint y = 0; y < 4; y++)
- {
- pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]);
- pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]);
- pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]);
- pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]);
- pDst += 4;
- }
- }
- }
- else
- {
- if (flip_flag)
- {
- // 0000
- // 0000
- // 1111
- // 1111
- for (uint y = 0; y < 2; y++)
- {
- pDst[0] = subblock_colors0[block.get_selector(0, y)];
- pDst[1] = subblock_colors0[block.get_selector(1, y)];
- pDst[2] = subblock_colors0[block.get_selector(2, y)];
- pDst[3] = subblock_colors0[block.get_selector(3, y)];
- pDst += 4;
- }
-
- for (uint y = 2; y < 4; y++)
- {
- pDst[0] = subblock_colors1[block.get_selector(0, y)];
- pDst[1] = subblock_colors1[block.get_selector(1, y)];
- pDst[2] = subblock_colors1[block.get_selector(2, y)];
- pDst[3] = subblock_colors1[block.get_selector(3, y)];
- pDst += 4;
- }
- }
- else
- {
- // 0011
- // 0011
- // 0011
- // 0011
- for (uint y = 0; y < 4; y++)
- {
- pDst[0] = subblock_colors0[block.get_selector(0, y)];
- pDst[1] = subblock_colors0[block.get_selector(1, y)];
- pDst[2] = subblock_colors1[block.get_selector(2, y)];
- pDst[3] = subblock_colors1[block.get_selector(3, y)];
- pDst += 4;
- }
- }
- }
-
- return success;
- }
-
- struct etc1_solution_coordinates
- {
- inline etc1_solution_coordinates() :
- m_unscaled_color(0, 0, 0, 0),
- m_inten_table(0),
- m_color4(false)
- {
- }
-
- inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) :
- m_unscaled_color(r, g, b, 255),
- m_inten_table(inten_table),
- m_color4(color4)
- {
- }
-
- inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) :
- m_unscaled_color(c),
- m_inten_table(inten_table),
- m_color4(color4)
- {
- }
-
- inline etc1_solution_coordinates(const etc1_solution_coordinates& other)
- {
- *this = other;
- }
-
- inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs)
- {
- m_unscaled_color = rhs.m_unscaled_color;
- m_inten_table = rhs.m_inten_table;
- m_color4 = rhs.m_color4;
- return *this;
- }
-
- inline void clear()
- {
- m_unscaled_color.clear();
- m_inten_table = 0;
- m_color4 = false;
- }
-
- inline color_quad_u8 get_scaled_color() const
- {
- int br, bg, bb;
- if (m_color4)
- {
- br = m_unscaled_color.r | (m_unscaled_color.r << 4);
- bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
- bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
- }
- else
- {
- br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
- bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
- bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
- }
- return color_quad_u8(br, bg, bb);
- }
-
- inline void get_block_colors(color_quad_u8* pBlock_colors)
- {
- int br, bg, bb;
- if (m_color4)
- {
- br = m_unscaled_color.r | (m_unscaled_color.r << 4);
- bg = m_unscaled_color.g | (m_unscaled_color.g << 4);
- bb = m_unscaled_color.b | (m_unscaled_color.b << 4);
- }
- else
- {
- br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3);
- bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3);
- bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3);
- }
- const int* pInten_table = g_etc1_inten_tables[m_inten_table];
- pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]);
- pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]);
- pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]);
- pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]);
- }
-
- color_quad_u8 m_unscaled_color;
- uint m_inten_table;
- bool m_color4;
- };
-
- class etc1_optimizer
- {
- etc1_optimizer(const etc1_optimizer&);
- etc1_optimizer& operator= (const etc1_optimizer&);
-
- public:
- etc1_optimizer()
- {
- clear();
- }
-
- void clear()
- {
- m_pParams = NULL;
- m_pResult = NULL;
- m_pSorted_luma = NULL;
- m_pSorted_luma_indices = NULL;
- }
-
- struct params : etc1_pack_params
- {
- params()
- {
- clear();
- }
-
- params(const etc1_pack_params& base_params) :
- etc1_pack_params(base_params)
- {
- clear_optimizer_params();
- }
-
- void clear()
- {
- etc1_pack_params::clear();
- clear_optimizer_params();
- }
-
- void clear_optimizer_params()
- {
- m_num_src_pixels = 0;
- m_pSrc_pixels = 0;
-
- m_use_color4 = false;
- static const int s_default_scan_delta[] = { 0 };
- m_pScan_deltas = s_default_scan_delta;
- m_scan_delta_size = 1;
-
- m_base_color5.clear();
- m_constrain_against_base_color5 = false;
- }
-
- uint m_num_src_pixels;
- const color_quad_u8* m_pSrc_pixels;
-
- bool m_use_color4;
- const int* m_pScan_deltas;
- uint m_scan_delta_size;
-
- color_quad_u8 m_base_color5;
- bool m_constrain_against_base_color5;
- };
-
- struct results
- {
- uint64 m_error;
- color_quad_u8 m_block_color_unscaled;
- uint m_block_inten_table;
- uint m_n;
- uint8* m_pSelectors;
- bool m_block_color4;
-
- inline results& operator= (const results& rhs)
- {
- m_block_color_unscaled = rhs.m_block_color_unscaled;
- m_block_color4 = rhs.m_block_color4;
- m_block_inten_table = rhs.m_block_inten_table;
- m_error = rhs.m_error;
- RG_ETC1_ASSERT(m_n == rhs.m_n);
- memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n);
- return *this;
- }
- };
-
- void init(const params& params, results& result);
- bool compute();
-
- private:
- struct potential_solution
- {
- potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false)
- {
- }
-
- etc1_solution_coordinates m_coords;
- uint8 m_selectors[8];
- uint64 m_error;
- bool m_valid;
-
- void clear()
- {
- m_coords.clear();
- m_error = cUINT64_MAX;
- m_valid = false;
- }
- };
-
- const params* m_pParams;
- results* m_pResult;
-
- int m_limit;
-
- vec3F m_avg_color;
- int m_br, m_bg, m_bb;
- uint16 m_luma[8];
- uint32 m_sorted_luma[2][8];
- const uint32* m_pSorted_luma_indices;
- uint32* m_pSorted_luma;
-
- uint8 m_selectors[8];
- uint8 m_best_selectors[8];
-
- potential_solution m_best_solution;
- potential_solution m_trial_solution;
- uint8 m_temp_selectors[8];
-
- bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
- bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution);
- };
-
- bool etc1_optimizer::compute()
- {
- const uint n = m_pParams->m_num_src_pixels;
- const int scan_delta_size = m_pParams->m_scan_delta_size;
-
- // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color.
- // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index.
- for (int zdi = 0; zdi < scan_delta_size; zdi++)
- {
- const int zd = m_pParams->m_pScan_deltas[zdi];
- const int mbb = m_bb + zd;
- if (mbb < 0) continue; else if (mbb > m_limit) break;
-
- for (int ydi = 0; ydi < scan_delta_size; ydi++)
- {
- const int yd = m_pParams->m_pScan_deltas[ydi];
- const int mbg = m_bg + yd;
- if (mbg < 0) continue; else if (mbg > m_limit) break;
-
- for (int xdi = 0; xdi < scan_delta_size; xdi++)
- {
- const int xd = m_pParams->m_pScan_deltas[xdi];
- const int mbr = m_br + xd;
- if (mbr < 0) continue; else if (mbr > m_limit) break;
-
- etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4);
- if (m_pParams->m_quality == cHighQuality)
- {
- if (!evaluate_solution(coords, m_trial_solution, &m_best_solution))
- continue;
- }
- else
- {
- if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution))
- continue;
- }
-
- // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index.
- // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors:
- // The goal is:
- // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0
- // Rearranging this:
- // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0
- // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0
- // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0
- // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0
- // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0
- // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4
- // So what this means:
- // optimal_block_color = avg_input - avg_inten_delta
- // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta.
- // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula.
- // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping.
-
- const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2);
- for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++)
- {
- const uint8* pSelectors = m_best_solution.m_selectors;
- const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table];
-
- int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0;
- const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color());
- for (uint r = 0; r < n; r++)
- {
- const uint s = *pSelectors++;
- const int yd = pInten_table[s];
- // Compute actual delta being applied to each pixel, taking into account clamping.
- delta_sum_r += rg_etc1::clamp<int>(base_color.r + yd, 0, 255) - base_color.r;
- delta_sum_g += rg_etc1::clamp<int>(base_color.g + yd, 0, 255) - base_color.g;
- delta_sum_b += rg_etc1::clamp<int>(base_color.b + yd, 0, 255) - base_color.b;
- }
- if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b))
- break;
- const float avg_delta_r_f = static_cast<float>(delta_sum_r) / n;
- const float avg_delta_g_f = static_cast<float>(delta_sum_g) / n;
- const float avg_delta_b_f = static_cast<float>(delta_sum_b) / n;
- const int br1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit);
- const int bg1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit);
- const int bb1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit);
-
- bool skip = false;
-
- if ((mbr == br1) && (mbg == bg1) && (mbb == bb1))
- skip = true;
- else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b))
- skip = true;
- else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1))
- skip = true;
-
- if (skip)
- break;
-
- etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4);
- if (m_pParams->m_quality == cHighQuality)
- {
- if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution))
- break;
- }
- else
- {
- if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution))
- break;
- }
-
- } // refinement_trial
-
- } // xdi
- } // ydi
- } // zdi
-
- if (!m_best_solution.m_valid)
- {
- m_pResult->m_error = cUINT32_MAX;
- return false;
- }
-
- const uint8* pSelectors = m_best_solution.m_selectors;
-
-#ifdef RG_ETC1_BUILD_DEBUG
- {
- color_quad_u8 block_colors[4];
- m_best_solution.m_coords.get_block_colors(block_colors);
-
- const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
- uint64 actual_error = 0;
- for (uint i = 0; i < n; i++)
- actual_error += pSrc_pixels[i].squared_distance_rgb(block_colors[pSelectors[i]]);
-
- RG_ETC1_ASSERT(actual_error == m_best_solution.m_error);
- }
-#endif
-
- m_pResult->m_error = m_best_solution.m_error;
-
- m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color;
- m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4;
-
- m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table;
- memcpy(m_pResult->m_pSelectors, pSelectors, n);
- m_pResult->m_n = n;
-
- return true;
- }
-
- void etc1_optimizer::init(const params& p, results& r)
- {
- // This version is hardcoded for 8 pixel subblocks.
- RG_ETC1_ASSERT(p.m_num_src_pixels == 8);
-
- m_pParams = &p;
- m_pResult = &r;
-
- const uint n = 8;
-
- m_limit = m_pParams->m_use_color4 ? 15 : 31;
-
- vec3F avg_color(0.0f);
-
- for (uint i = 0; i < n; i++)
- {
- const color_quad_u8& c = m_pParams->m_pSrc_pixels[i];
- const vec3F fc(c.r, c.g, c.b);
-
- avg_color += fc;
-
- m_luma[i] = static_cast<uint16>(c.r + c.g + c.b);
- m_sorted_luma[0][i] = i;
- }
- avg_color *= (1.0f / static_cast<float>(n));
- m_avg_color = avg_color;
-
- m_br = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit);
- m_bg = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit);
- m_bb = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit);
-
- if (m_pParams->m_quality <= cMediumQuality)
- {
- m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false);
- m_pSorted_luma = m_sorted_luma[0];
- if (m_pSorted_luma_indices == m_sorted_luma[0])
- m_pSorted_luma = m_sorted_luma[1];
-
- for (uint i = 0; i < n; i++)
- m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]];
- }
-
- m_best_solution.m_coords.clear();
- m_best_solution.m_valid = false;
- m_best_solution.m_error = cUINT64_MAX;
- }
-
- bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
- {
- trial_solution.m_valid = false;
-
- if (m_pParams->m_constrain_against_base_color5)
- {
- const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
- const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
- const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
-
- if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
- return false;
- }
-
- const color_quad_u8 base_color(coords.get_scaled_color());
-
- const uint n = 8;
-
- trial_solution.m_error = cUINT64_MAX;
-
- for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++)
- {
- const int* pInten_table = g_etc1_inten_tables[inten_table];
-
- color_quad_u8 block_colors[4];
- for (uint s = 0; s < 4; s++)
- {
- const int yd = pInten_table[s];
- block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
- }
-
- uint64 total_error = 0;
-
- const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
- for (uint c = 0; c < n; c++)
- {
- const color_quad_u8& src_pixel = *pSrc_pixels++;
-
- uint best_selector_index = 0;
- uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b);
-
- uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b);
- if (trial_error < best_error)
- {
- best_error = trial_error;
- best_selector_index = 1;
- }
-
- trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b);
- if (trial_error < best_error)
- {
- best_error = trial_error;
- best_selector_index = 2;
- }
-
- trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b);
- if (trial_error < best_error)
- {
- best_error = trial_error;
- best_selector_index = 3;
- }
-
- m_temp_selectors[c] = static_cast<uint8>(best_selector_index);
-
- total_error += best_error;
- if (total_error >= trial_solution.m_error)
- break;
- }
-
- if (total_error < trial_solution.m_error)
- {
- trial_solution.m_error = total_error;
- trial_solution.m_coords.m_inten_table = inten_table;
- memcpy(trial_solution.m_selectors, m_temp_selectors, 8);
- trial_solution.m_valid = true;
- }
- }
- trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
- trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
-
- bool success = false;
- if (pBest_solution)
- {
- if (trial_solution.m_error < pBest_solution->m_error)
- {
- *pBest_solution = trial_solution;
- success = true;
- }
- }
-
- return success;
- }
-
- bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution)
- {
- if (m_pParams->m_constrain_against_base_color5)
- {
- const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r;
- const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g;
- const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b;
-
- if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax))
- {
- trial_solution.m_valid = false;
- return false;
- }
- }
-
- const color_quad_u8 base_color(coords.get_scaled_color());
-
- const uint n = 8;
-
- trial_solution.m_error = cUINT64_MAX;
-
- for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table)
- {
- const int* pInten_table = g_etc1_inten_tables[inten_table];
-
- uint block_inten[4];
- color_quad_u8 block_colors[4];
- for (uint s = 0; s < 4; s++)
- {
- const int yd = pInten_table[s];
- color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0);
- block_colors[s] = block_color;
- block_inten[s] = block_color.r + block_color.g + block_color.b;
- }
-
- // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors.
- // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast.
- // 0 1 2 3
- // 01 12 23
- const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] };
-
- uint64 total_error = 0;
- const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels;
- if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0])
- {
- if (block_inten[0] > m_pSorted_luma[n - 1])
- {
- const uint min_error = intabs(block_inten[0] - m_pSorted_luma[n - 1]);
- if (min_error >= trial_solution.m_error)
- continue;
- }
-
- memset(&m_temp_selectors[0], 0, n);
-
- for (uint c = 0; c < n; c++)
- total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]);
- }
- else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2])
- {
- if (m_pSorted_luma[0] > block_inten[3])
- {
- const uint min_error = intabs(m_pSorted_luma[0] - block_inten[3]);
- if (min_error >= trial_solution.m_error)
- continue;
- }
-
- memset(&m_temp_selectors[0], 3, n);
-
- for (uint c = 0; c < n; c++)
- total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]);
- }
- else
- {
- uint cur_selector = 0, c;
- for (c = 0; c < n; c++)
- {
- const uint y = m_pSorted_luma[c];
- while ((y * 2) >= block_inten_midpoints[cur_selector])
- if (++cur_selector > 2)
- goto done;
- const uint sorted_pixel_index = m_pSorted_luma_indices[c];
- m_temp_selectors[sorted_pixel_index] = static_cast<uint8>(cur_selector);
- total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
- }
-done:
- while (c < n)
- {
- const uint sorted_pixel_index = m_pSorted_luma_indices[c];
- m_temp_selectors[sorted_pixel_index] = 3;
- total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]);
- ++c;
- }
- }
-
- if (total_error < trial_solution.m_error)
- {
- trial_solution.m_error = total_error;
- trial_solution.m_coords.m_inten_table = inten_table;
- memcpy(trial_solution.m_selectors, m_temp_selectors, n);
- trial_solution.m_valid = true;
- if (!total_error)
- break;
- }
- }
- trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color;
- trial_solution.m_coords.m_color4 = m_pParams->m_use_color4;
-
- bool success = false;
- if (pBest_solution)
- {
- if (trial_solution.m_error < pBest_solution->m_error)
- {
- *pBest_solution = trial_solution;
- success = true;
- }
- }
-
- return success;
- }
-
- static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c)
- {
- const uint limit = diff ? 32 : 16; limit;
- RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit));
- int c;
- if (diff)
- c = (packed_c >> 2) | (packed_c << 3);
- else
- c = packed_c | (packed_c << 4);
- c += g_etc1_inten_tables[inten][selector];
- c = rg_etc1::clamp<int>(c, 0, 255);
- return c;
- }
-
- static inline int mul_8bit(int a, int b) { int t = a*b + 128; return (t + (t >> 8)) >> 8; }
-
- void pack_etc1_block_init()
- {
- for (uint diff = 0; diff < 2; diff++)
- {
- const uint limit = diff ? 32 : 16;
-
- for (uint inten = 0; inten < 8; inten++)
- {
- for (uint selector = 0; selector < 4; selector++)
- {
- const uint inverse_table_index = diff + (inten << 1) + (selector << 4);
- for (uint color = 0; color < 256; color++)
- {
- uint best_error = cUINT32_MAX, best_packed_c = 0;
- for (uint packed_c = 0; packed_c < limit; packed_c++)
- {
- int v = etc1_decode_value(diff, inten, selector, packed_c);
- uint err = labs(v - static_cast<int>(color));
- //printf("err: %d - %u = %u\n",v,color,err);
- if (err < best_error)
- {
- best_error = err;
- best_packed_c = packed_c;
- if (!best_error)
- break;
- }
- }
- RG_ETC1_ASSERT(best_error <= 255);
- g_etc1_inverse_lookup[inverse_table_index][color] = static_cast<uint16>(best_packed_c | (best_error << 8));
- }
- }
- }
- }
-
- uint expand5[32];
- for(int i = 0; i < 32; i++)
- expand5[i] = (i << 3) | (i >> 2);
-
- for(int i = 0; i < 256 + 16; i++)
- {
- int v = clamp<int>(i - 8, 0, 255);
- g_quant5_tab[i] = static_cast<uint8>(expand5[mul_8bit(v,31)]);
- }
- }
-
- // Packs solid color blocks efficiently using a set of small precomputed tables.
- // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time.
- static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, etc1_pack_params& pack_params)
- {
- pack_params;
- RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
-
- static uint s_next_comp[4] = { 1, 2, 0, 1 };
-
- uint best_error = cUINT32_MAX, best_i = 0;
- int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
-
- // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
- for (uint i = 0; i < 3; i++)
- {
- const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
-
- const int delta_range = 1;
- for (int delta = -delta_range; delta <= delta_range; delta++)
- {
- const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
-
- const uint16* pTable;
- if (!c_plus_delta)
- pTable = g_color8_to_etc_block_config_0_255[0];
- else if (c_plus_delta == 255)
- pTable = g_color8_to_etc_block_config_0_255[1];
- else
- pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
-
- do
- {
- const uint x = *pTable++;
-
-#ifdef RG_ETC1_BUILD_DEBUG
- const uint diff = x & 1;
- const uint inten = (x >> 1) & 7;
- const uint selector = (x >> 4) & 3;
- const uint p0 = (x >> 8) & 255;
- RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
-#endif
-
- const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
- uint16 p1 = pInverse_table[c1];
- uint16 p2 = pInverse_table[c2];
- const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
- if (trial_error < best_error)
- {
- best_error = trial_error;
- best_x = x;
- best_packed_c1 = p1 & 0xFF;
- best_packed_c2 = p2 & 0xFF;
- best_i = i;
- if (!best_error)
- goto found_perfect_match;
- }
- } while (*pTable != 0xFFFF);
- }
- }
-found_perfect_match:
-
- const uint diff = best_x & 1;
- const uint inten = (best_x >> 1) & 7;
-
- block.m_bytes[3] = static_cast<uint8>(((inten | (inten << 3)) << 2) | (diff << 1));
-
- const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3];
- *reinterpret_cast<uint16*>(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0;
- *reinterpret_cast<uint16*>(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0;
-
- const uint best_packed_c0 = (best_x >> 8) & 255;
- if (diff)
- {
- block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 << 3);
- block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 << 3);
- block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 << 3);
- }
- else
- {
- block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 | (best_packed_c0 << 4));
- block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 | (best_packed_c1 << 4));
- block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 | (best_packed_c2 << 4));
- }
-
- return best_error;
- }
-
- static uint pack_etc1_block_solid_color_constrained(
- etc1_optimizer::results& results,
- uint num_colors, const uint8* pColor,
- etc1_pack_params& pack_params,
- bool use_diff,
- const color_quad_u8* pBase_color5_unscaled)
- {
- RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]);
-
- pack_params;
- static uint s_next_comp[4] = { 1, 2, 0, 1 };
-
- uint best_error = cUINT32_MAX, best_i = 0;
- int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
-
- // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error.
- for (uint i = 0; i < 3; i++)
- {
- const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
-
- const int delta_range = 1;
- for (int delta = -delta_range; delta <= delta_range; delta++)
- {
- const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255);
-
- const uint16* pTable;
- if (!c_plus_delta)
- pTable = g_color8_to_etc_block_config_0_255[0];
- else if (c_plus_delta == 255)
- pTable = g_color8_to_etc_block_config_0_255[1];
- else
- pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1];
-
- do
- {
- const uint x = *pTable++;
- const uint diff = x & 1;
- if (static_cast<uint>(use_diff) != diff)
- {
- if (*pTable == 0xFFFF)
- break;
- continue;
- }
-
- if ((diff) && (pBase_color5_unscaled))
- {
- const int p0 = (x >> 8) & 255;
- int delta = p0 - static_cast<int>(pBase_color5_unscaled->c[i]);
- if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax))
- {
- if (*pTable == 0xFFFF)
- break;
- continue;
- }
- }
-
-#ifdef RG_ETC1_BUILD_DEBUG
- {
- const uint inten = (x >> 1) & 7;
- const uint selector = (x >> 4) & 3;
- const uint p0 = (x >> 8) & 255;
- RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta);
- }
-#endif
-
- const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF];
- uint16 p1 = pInverse_table[c1];
- uint16 p2 = pInverse_table[c2];
-
- if ((diff) && (pBase_color5_unscaled))
- {
- int delta1 = (p1 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i]]);
- int delta2 = (p2 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i + 1]]);
- if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax))
- {
- if (*pTable == 0xFFFF)
- break;
- continue;
- }
- }
-
- const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8);
- if (trial_error < best_error)
- {
- best_error = trial_error;
- best_x = x;
- best_packed_c1 = p1 & 0xFF;
- best_packed_c2 = p2 & 0xFF;
- best_i = i;
- if (!best_error)
- goto found_perfect_match;
- }
- } while (*pTable != 0xFFFF);
- }
- }
-found_perfect_match:
-
- if (best_error == cUINT32_MAX)
- return best_error;
-
- best_error *= num_colors;
-
- results.m_n = num_colors;
- results.m_block_color4 = !(best_x & 1);
- results.m_block_inten_table = (best_x >> 1) & 7;
- memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors);
-
- const uint best_packed_c0 = (best_x >> 8) & 255;
- results.m_block_color_unscaled[best_i] = static_cast<uint8>(best_packed_c0);
- results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1);
- results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast<uint8>(best_packed_c2);
- results.m_error = best_error;
-
- return best_error;
- }
-
- // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555.
- static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block)
- {
- int err[8],*ep1 = err,*ep2 = err+4;
- uint8 *quant = g_quant5_tab+8;
-
- memset(dest, 0xFF, sizeof(color_quad_u8)*16);
-
- // process channels seperately
- for(int ch=0;ch<3;ch++)
- {
- uint8* bp = (uint8*)block;
- uint8* dp = (uint8*)dest;
-
- bp += ch; dp += ch;
-
- memset(err,0, sizeof(err));
- for(int y = 0; y < 4; y++)
- {
- // pixel 0
- dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
- ep1[0] = bp[ 0] - dp[ 0];
-
- // pixel 1
- dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
- ep1[1] = bp[ 4] - dp[ 4];
-
- // pixel 2
- dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
- ep1[2] = bp[ 8] - dp[ 8];
-
- // pixel 3
- dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
- ep1[3] = bp[12] - dp[12];
-
- // advance to next line
- int* tmp = ep1; ep1 = ep2; ep2 = tmp;
- bp += 16;
- dp += 16;
- }
- }
- }
-
- unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params)
- {
- const color_quad_u8* pSrc_pixels = reinterpret_cast<const color_quad_u8*>(pSrc_pixels_rgba);
- etc1_block& dst_block = *static_cast<etc1_block*>(pETC1_block);
-
-#ifdef RG_ETC1_BUILD_DEBUG
- // Ensure all alpha values are 0xFF.
- for (uint i = 0; i < 16; i++)
- {
- RG_ETC1_ASSERT(pSrc_pixels[i].a == 255);
- }
-#endif
-
- color_quad_u8 src_pixel0(pSrc_pixels[0]);
-
- // Check for solid block.
- const uint32 first_pixel_u32 = pSrc_pixels->m_u32;
- int r;
- for (r = 15; r >= 1; --r)
- if (pSrc_pixels[r].m_u32 != first_pixel_u32)
- break;
- if (!r)
- return static_cast<unsigned int>(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params));
-
- color_quad_u8 dithered_pixels[16];
- if (pack_params.m_dithering)
- {
- dither_block_555(dithered_pixels, pSrc_pixels);
- pSrc_pixels = dithered_pixels;
- }
-
- etc1_optimizer optimizer;
-
- uint64 best_error = cUINT64_MAX;
- uint best_flip = false, best_use_color4 = false;
-
- uint8 best_selectors[2][8];
- etc1_optimizer::results best_results[2];
- for (uint i = 0; i < 2; i++)
- {
- best_results[i].m_n = 8;
- best_results[i].m_pSelectors = best_selectors[i];
- }
-
- uint8 selectors[3][8];
- etc1_optimizer::results results[3];
-
- for (uint i = 0; i < 3; i++)
- {
- results[i].m_n = 8;
- results[i].m_pSelectors = selectors[i];
- }
-
- color_quad_u8 subblock_pixels[8];
-
- etc1_optimizer::params params(pack_params);
- params.m_num_src_pixels = 8;
- params.m_pSrc_pixels = subblock_pixels;
-
- for (uint flip = 0; flip < 2; flip++)
- {
- for (uint use_color4 = 0; use_color4 < 2; use_color4++)
- {
- uint64 trial_error = 0;
-
- uint subblock;
- for (subblock = 0; subblock < 2; subblock++)
- {
- if (flip)
- memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8);
- else
- {
- const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2;
- subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12];
- subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13];
- }
-
- results[2].m_error = cUINT64_MAX;
- if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4)))
- {
- const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32;
- for (r = 7; r >= 1; --r)
- if (subblock_pixels[r].m_u32 != subblock_pixel0_u32)
- break;
- if (!r)
- {
- pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, pack_params, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL);
- }
- }
-
- params.m_use_color4 = (use_color4 != 0);
- params.m_constrain_against_base_color5 = false;
-
- if ((!use_color4) && (subblock))
- {
- params.m_constrain_against_base_color5 = true;
- params.m_base_color5 = results[0].m_block_color_unscaled;
- }
-
- if (params.m_quality == cHighQuality)
- {
- static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 };
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4);
- params.m_pScan_deltas = s_scan_delta_0_to_4;
- }
- else if (params.m_quality == cMediumQuality)
- {
- static const int s_scan_delta_0_to_1[] = { -1, 0, 1 };
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1);
- params.m_pScan_deltas = s_scan_delta_0_to_1;
- }
- else
- {
- static const int s_scan_delta_0[] = { 0 };
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0);
- params.m_pScan_deltas = s_scan_delta_0;
- }
-
- optimizer.init(params, results[subblock]);
- if (!optimizer.compute())
- break;
-
- if (params.m_quality >= cMediumQuality)
- {
- // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions.
- const uint refinement_error_thresh0 = 3000;
- const uint refinement_error_thresh1 = 6000;
- if (results[subblock].m_error > refinement_error_thresh0)
- {
- if (params.m_quality == cMediumQuality)
- {
- static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 };
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3);
- params.m_pScan_deltas = s_scan_delta_2_to_3;
- }
- else
- {
- static const int s_scan_delta_5_to_5[] = { -5, 5 };
- static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 };
- if (results[subblock].m_error > refinement_error_thresh1)
- {
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8);
- params.m_pScan_deltas = s_scan_delta_5_to_8;
- }
- else
- {
- params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5);
- params.m_pScan_deltas = s_scan_delta_5_to_5;
- }
- }
-
- if (!optimizer.compute())
- break;
- }
-
- if (results[2].m_error < results[subblock].m_error)
- results[subblock] = results[2];
- }
-
- trial_error += results[subblock].m_error;
- if (trial_error >= best_error)
- break;
- }
-
- if (subblock < 2)
- continue;
-
- best_error = trial_error;
- best_results[0] = results[0];
- best_results[1] = results[1];
- best_flip = flip;
- best_use_color4 = use_color4;
-
- } // use_color4
-
- } // flip
-
- int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r;
- int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g;
- int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b;
- RG_ETC1_ASSERT(best_use_color4 || ((rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax)));
-
- if (best_use_color4)
- {
- dst_block.m_bytes[0] = static_cast<uint8>(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4));
- dst_block.m_bytes[1] = static_cast<uint8>(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4));
- dst_block.m_bytes[2] = static_cast<uint8>(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4));
- }
- else
- {
- if (dr < 0) dr += 8; dst_block.m_bytes[0] = static_cast<uint8>((best_results[0].m_block_color_unscaled.r << 3) | dr);
- if (dg < 0) dg += 8; dst_block.m_bytes[1] = static_cast<uint8>((best_results[0].m_block_color_unscaled.g << 3) | dg);
- if (db < 0) db += 8; dst_block.m_bytes[2] = static_cast<uint8>((best_results[0].m_block_color_unscaled.b << 3) | db);
- }
-
- dst_block.m_bytes[3] = static_cast<uint8>( (best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip );
-
- uint selector0 = 0, selector1 = 0;
- if (best_flip)
- {
- // flipped:
- // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 },
- // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }
- //
- // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 },
- // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }
- const uint8* pSelectors0 = best_results[0].m_pSelectors;
- const uint8* pSelectors1 = best_results[1].m_pSelectors;
- for (int x = 3; x >= 0; --x)
- {
- uint b;
- b = g_selector_index_to_etc1[pSelectors1[4 + x]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors1[x]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors0[4 + x]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors0[x]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
- }
- }
- else
- {
- // non-flipped:
- // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 },
- // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }
- //
- // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
- // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }
- for (int subblock = 1; subblock >= 0; --subblock)
- {
- const uint8* pSelectors = best_results[subblock].m_pSelectors + 4;
- for (uint i = 0; i < 2; i++)
- {
- uint b;
- b = g_selector_index_to_etc1[pSelectors[3]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors[2]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors[1]];
- selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1);
-
- b = g_selector_index_to_etc1[pSelectors[0]];
- selector0 = (selector0 << 1) | (b & 1);selector1 = (selector1 << 1) | (b >> 1);
-
- pSelectors -= 4;
- }
- }
- }
-
- dst_block.m_bytes[4] = static_cast<uint8>(selector1 >> 8); dst_block.m_bytes[5] = static_cast<uint8>(selector1 & 0xFF);
- dst_block.m_bytes[6] = static_cast<uint8>(selector0 >> 8); dst_block.m_bytes[7] = static_cast<uint8>(selector0 & 0xFF);
-
- return static_cast<unsigned int>(best_error);
- }
-
-} // namespace rg_etc1
+// File: rg_etc1.cpp - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com> +// Please see ZLIB license at the end of rg_etc1.h. +// +// For more information Ericsson Texture Compression (ETC/ETC1), see: +// http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt +// +// v1.03 - 5/12/13 - Initial public release +#include "rg_etc1.h" + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +//#include <stdio.h> +#include <math.h> +#include <stdio.h> +#pragma warning (disable: 4201) // nonstandard extension used : nameless struct/union + +#if defined(_DEBUG) || defined(DEBUG) +#define RG_ETC1_BUILD_DEBUG +#endif + +#define RG_ETC1_ASSERT assert + +namespace rg_etc1 +{ + + inline long labs(long val) { + return val < 0 ? -val : val; + } + + inline int intabs(int val) { + + return val<0?-val:val; + } + + typedef unsigned char uint8; + typedef unsigned short uint16; + typedef unsigned int uint; + typedef unsigned int uint32; + typedef long long int64; + typedef unsigned long long uint64; + + const uint32 cUINT32_MAX = 0xFFFFFFFFU; + const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; + + template<typename T> inline T minimum(T a, T b) { return (a < b) ? a : b; } + template<typename T> inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); } + template<typename T> inline T maximum(T a, T b) { return (a > b) ? a : b; } + template<typename T> inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); } + template<typename T> inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); } + template<typename T> inline T square(T value) { return value * value; } + template<typename T> inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); } + template<typename T> inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); } + + template<class T, size_t N> T decay_array_to_subtype(T (&a)[N]); + +#define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X))) + + enum eNoClamp { cNoClamp }; + + struct color_quad_u8 + { + static inline int clamp(int v) { if (v & 0xFFFFFF00U) v = (~(static_cast<int>(v) >> 31)) & 0xFF; return v; } + + struct component_traits { enum { cSigned = false, cFloat = false, cMin = 0U, cMax = 255U }; }; + + public: + typedef unsigned char component_t; + typedef int parameter_t; + + enum { cNumComps = 4 }; + + union + { + struct + { + component_t r; + component_t g; + component_t b; + component_t a; + }; + + component_t c[cNumComps]; + + uint32 m_u32; + }; + + inline color_quad_u8() + { + } + + inline color_quad_u8(const color_quad_u8& other) : m_u32(other.m_u32) + { + } + + explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax) + { + set(y, alpha); + } + + inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) + { + set(red, green, blue, alpha); + } + + explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax) + { + set_noclamp_y_alpha(y, alpha); + } + + inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) + { + set_noclamp_rgba(red, green, blue, alpha); + } + + inline void clear() + { + m_u32 = 0; + } + + inline color_quad_u8& operator= (const color_quad_u8& other) + { + m_u32 = other.m_u32; + return *this; + } + + inline color_quad_u8& set_rgb(const color_quad_u8& other) + { + r = other.r; + g = other.g; + b = other.b; + return *this; + } + + inline color_quad_u8& operator= (parameter_t y) + { + set(y, component_traits::cMax); + return *this; + } + + inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax) + { + y = clamp(y); + alpha = clamp(alpha); + r = static_cast<component_t>(y); + g = static_cast<component_t>(y); + b = static_cast<component_t>(y); + a = static_cast<component_t>(alpha); + return *this; + } + + inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax) + { + RG_ETC1_ASSERT( (y >= component_traits::cMin) && (y <= component_traits::cMax) ); + RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) ); + + r = static_cast<component_t>(y); + g = static_cast<component_t>(y); + b = static_cast<component_t>(y); + a = static_cast<component_t>(alpha); + return *this; + } + + inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) + { + r = static_cast<component_t>(clamp(red)); + g = static_cast<component_t>(clamp(green)); + b = static_cast<component_t>(clamp(blue)); + a = static_cast<component_t>(clamp(alpha)); + return *this; + } + + inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha) + { + RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) ); + RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) ); + RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) ); + RG_ETC1_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) ); + + r = static_cast<component_t>(red); + g = static_cast<component_t>(green); + b = static_cast<component_t>(blue); + a = static_cast<component_t>(alpha); + return *this; + } + + inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue) + { + RG_ETC1_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) ); + RG_ETC1_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) ); + RG_ETC1_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) ); + + r = static_cast<component_t>(red); + g = static_cast<component_t>(green); + b = static_cast<component_t>(blue); + return *this; + } + + static inline parameter_t get_min_comp() { return component_traits::cMin; } + static inline parameter_t get_max_comp() { return component_traits::cMax; } + static inline bool get_comps_are_signed() { return component_traits::cSigned; } + + inline component_t operator[] (uint i) const { RG_ETC1_ASSERT(i < cNumComps); return c[i]; } + inline component_t& operator[] (uint i) { RG_ETC1_ASSERT(i < cNumComps); return c[i]; } + + inline color_quad_u8& set_component(uint i, parameter_t f) + { + RG_ETC1_ASSERT(i < cNumComps); + + c[i] = static_cast<component_t>(clamp(f)); + + return *this; + } + + inline color_quad_u8& set_grayscale(parameter_t l) + { + component_t x = static_cast<component_t>(clamp(l)); + c[0] = x; + c[1] = x; + c[2] = x; + return *this; + } + + inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h) + { + for (uint i = 0; i < cNumComps; i++) + c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l[i], h[i])); + return *this; + } + + inline color_quad_u8& clamp(parameter_t l, parameter_t h) + { + for (uint i = 0; i < cNumComps; i++) + c[i] = static_cast<component_t>(rg_etc1::clamp<parameter_t>(c[i], l, h)); + return *this; + } + + // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). + inline parameter_t get_luma() const + { + return static_cast<parameter_t>((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U); + } + + // Returns REC 709 luma. + inline parameter_t get_luma_rec709() const + { + return static_cast<parameter_t>((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); + } + + inline uint squared_distance_rgb(const color_quad_u8& c) const + { + return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b); + } + + inline uint squared_distance_rgba(const color_quad_u8& c) const + { + return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a); + } + + inline bool rgb_equals(const color_quad_u8& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } + + inline bool operator== (const color_quad_u8& rhs) const + { + return m_u32 == rhs.m_u32; + } + + color_quad_u8& operator+= (const color_quad_u8& other) + { + for (uint i = 0; i < 4; i++) + c[i] = static_cast<component_t>(clamp(c[i] + other.c[i])); + return *this; + } + + color_quad_u8& operator-= (const color_quad_u8& other) + { + for (uint i = 0; i < 4; i++) + c[i] = static_cast<component_t>(clamp(c[i] - other.c[i])); + return *this; + } + + friend color_quad_u8 operator+ (const color_quad_u8& lhs, const color_quad_u8& rhs) + { + color_quad_u8 result(lhs); + result += rhs; + return result; + } + + friend color_quad_u8 operator- (const color_quad_u8& lhs, const color_quad_u8& rhs) + { + color_quad_u8 result(lhs); + result -= rhs; + return result; + } + }; // class color_quad_u8 + + struct vec3F + { + float m_s[3]; + + inline vec3F() { } + inline vec3F(float s) { m_s[0] = s; m_s[1] = s; m_s[2] = s; } + inline vec3F(float x, float y, float z) { m_s[0] = x; m_s[1] = y; m_s[2] = z; } + + inline float operator[] (uint i) const { RG_ETC1_ASSERT(i < 3); return m_s[i]; } + + inline vec3F& operator += (const vec3F& other) { for (uint i = 0; i < 3; i++) m_s[i] += other.m_s[i]; return *this; } + + inline vec3F& operator *= (float s) { for (uint i = 0; i < 3; i++) m_s[i] *= s; return *this; } + }; + + enum etc_constants + { + cETC1BytesPerBlock = 8U, + + cETC1SelectorBits = 2U, + cETC1SelectorValues = 1U << cETC1SelectorBits, + cETC1SelectorMask = cETC1SelectorValues - 1U, + + cETC1BlockShift = 2U, + cETC1BlockSize = 1U << cETC1BlockShift, + + cETC1LSBSelectorIndicesBitOffset = 0, + cETC1MSBSelectorIndicesBitOffset = 16, + + cETC1FlipBitOffset = 32, + cETC1DiffBitOffset = 33, + + cETC1IntenModifierNumBits = 3, + cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, + cETC1RightIntenModifierTableBitOffset = 34, + cETC1LeftIntenModifierTableBitOffset = 37, + + // Base+Delta encoding (5 bit bases, 3 bit delta) + cETC1BaseColorCompNumBits = 5, + cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, + + cETC1DeltaColorCompNumBits = 3, + cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, + cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, + + cETC1BaseColor5RBitOffset = 59, + cETC1BaseColor5GBitOffset = 51, + cETC1BaseColor5BBitOffset = 43, + + cETC1DeltaColor3RBitOffset = 56, + cETC1DeltaColor3GBitOffset = 48, + cETC1DeltaColor3BBitOffset = 40, + + // Absolute (non-delta) encoding (two 4-bit per component bases) + cETC1AbsColorCompNumBits = 4, + cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, + + cETC1AbsColor4R1BitOffset = 60, + cETC1AbsColor4G1BitOffset = 52, + cETC1AbsColor4B1BitOffset = 44, + + cETC1AbsColor4R2BitOffset = 56, + cETC1AbsColor4G2BitOffset = 48, + cETC1AbsColor4B2BitOffset = 40, + + cETC1ColorDeltaMin = -4, + cETC1ColorDeltaMax = 3, + + // Delta3: + // 0 1 2 3 4 5 6 7 + // 000 001 010 011 100 101 110 111 + // 0 1 2 3 -4 -3 -2 -1 + }; + + static uint8 g_quant5_tab[256+16]; + + + static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = + { + { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, + { -60, -18, 18, 60 }, { -80, -24, 24, 80 }, { -106, -33, 33, 106 }, { -183, -47, 47, 183 } + }; + + static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; + static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; + + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. + static uint16 g_etc1_inverse_lookup[2*8*4][256]; // [diff/inten_table/selector][desired_color] + + // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. + // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) + static const uint16 g_color8_to_etc_block_config_0_255[2][33] = + { + { 0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, + 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF }, + { 0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, + 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF }, + }; + + // Really only [254][11]. + static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = + { + { 0x021C, 0x0D0D, 0xFFFF }, { 0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF }, { 0x0113, 0x0217, 0xFFFF }, { 0x0116, 0x031E, + 0x0B0E, 0x0405, 0xFFFF }, { 0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF }, { 0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF }, { + 0x0303, 0x0215, 0x0607, 0xFFFF }, { 0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF }, { 0x0100, 0x0024, 0x0306, + 0x0025, 0x041B, 0x0E0D, 0xFFFF }, { 0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF }, { 0x0213, 0x0317, 0xFFFF }, { 0x0112, + 0x0505, 0xFFFF }, { 0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF }, { 0x0211, 0x0909, 0xFFFF }, { 0x0110, 0x0315, 0x0707, + 0x0419, 0x180F, 0xFFFF }, { 0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF }, { 0x0032, 0x0202, 0x0033, 0x0125, 0x051B, + 0x0F0D, 0xFFFF }, { 0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF }, { 0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF }, { 0x0605, + 0x0417, 0xFFFF }, { 0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF }, { 0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF + }, { 0x0519, 0x190F, 0xFFFF }, { 0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF }, { 0x0130, 0x0214, + 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF }, { 0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF }, { 0x031A, + 0x0D0B, 0x091F, 0xFFFF }, { 0x0413, 0x0705, 0x0517, 0xFFFF }, { 0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF }, { + 0x0126, 0x080C, 0x0B09, 0xFFFF }, { 0x0411, 0x0619, 0x1A0F, 0xFFFF }, { 0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, + 0xFFFF }, { 0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF }, { 0x0132, 0x0302, 0x0229, 0x110D, + 0xFFFF }, { 0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF }, { 0x0220, 0x0513, 0x0617, 0xFFFF }, { 0x0135, 0x0805, + 0x0327, 0xFFFF }, { 0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF }, { 0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, + 0xFFFF }, { 0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF }, { 0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF }, { + 0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF }, { 0x0300, 0x0224, 0x0506, 0x0521, + 0x0F0B, 0x0B1F, 0xFFFF }, { 0x041A, 0x0613, 0x0717, 0xFFFF }, { 0x0235, 0x0905, 0xFFFF }, { 0x0312, 0x0134, 0x0523, + 0x0427, 0xFFFF }, { 0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF }, { 0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, + 0xFFFF }, { 0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF }, { 0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, + 0x130D, 0xFFFF }, { 0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF }, { 0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF }, { + 0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF }, { 0x0623, 0x0527, 0xFFFF }, { 0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, + 0xFFFF }, { 0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF }, { 0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, + 0xFFFF }, { 0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF }, { 0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, + 0x140D, 0xFFFF }, { 0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF }, { 0x051A, 0x0813, 0x0B05, 0x0917, + 0xFFFF }, { 0x0723, 0x0435, 0x0627, 0xFFFF }, { 0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF }, { 0x0326, 0x0A0C, 0x012E, + 0x0811, 0x0A19, 0x1E0F, 0xFFFF }, { 0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF }, { 0x0410, 0x0901, 0x0633, 0x0725, + 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF }, { 0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF }, { 0x0332, 0x0502, 0x0821, 0x0139, + 0x120B, 0x0E1F, 0xFFFF }, { 0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF }, { 0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF }, { + 0x0823, 0x032F, 0xFFFF }, { 0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF }, { 0x0422, 0x0604, 0x090A, + 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF }, { 0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF }, { 0x032A, 0x0825, 0x0437, + 0x0729, 0x0C1B, 0x160D, 0xFFFF }, { 0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF }, { 0x0500, + 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF }, { 0x061A, 0x0635, 0x0D05, 0xFFFF }, { 0x0923, 0x0827, 0xFFFF }, { + 0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF }, { 0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, + 0x072B, 0xFFFF }, { 0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF }, { 0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, + 0xFFFF }, { 0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF }, { 0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF }, { + 0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF }, { 0x0520, 0x0A23, 0x0927, 0xFFFF }, { 0x0B11, 0x1209, 0x013B, 0x052F, + 0xFFFF }, { 0x0616, 0x081E, 0x0D19, 0xFFFF }, { 0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, + 0x0F1D, 0xFFFF }, { 0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF }, { 0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF }, { + 0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF }, { 0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, + 0x0D17, 0xFFFF }, { 0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF }, { 0x1309, 0x023B, 0x062F, 0xFFFF }, { 0x0612, 0x0434, + 0x013A, 0x0C11, 0x0E19, 0xFFFF }, { 0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF }, { 0x0D01, + 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF }, { 0x0610, 0x0A29, 0x190D, 0xFFFF }, { 0x0718, 0x042C, 0x0C21, + 0x0539, 0x160B, 0x121F, 0xFFFF }, { 0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF }, { 0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, + 0xFFFF }, { 0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF }, { 0x0D11, 0x0F19, 0x1409, 0xFFFF }, { 0x0716, 0x003C, 0x091E, + 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF }, { 0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, + 0xFFFF }, { 0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF }, { 0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF }, { + 0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF }, { 0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF }, { + 0x081A, 0x0D23, 0x0C27, 0xFFFF }, { 0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF }, { 0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, + 0x1019, 0x0B2B, 0x013D, 0xFFFF }, { 0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF }, { 0x0C33, + 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF }, { 0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF }, { 0x0818, 0x052C, 0x0F13, 0x180B, + 0x141F, 0xFFFF }, { 0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF }, { 0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF }, { + 0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF }, { 0x1119, 0x023D, 0xFFFF }, { 0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, + 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF }, { 0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, + 0xFFFF }, { 0x0F21, 0x0D29, 0x1C0D, 0xFFFF }, { 0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF }, { 0x0730, 0x0814, 0x0536, + 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF }, { 0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF }, { 0x091A, + 0x1709, 0x063B, 0x0A2F, 0xFFFF }, { 0x1011, 0x1219, 0x033D, 0xFFFF }, { 0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, + 0x1507, 0x0D2B, 0xFFFF }, { 0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF }, { + 0x0E29, 0x1D0D, 0xFFFF }, { 0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF }, { 0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF + }, { 0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF }, { 0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF }, { 0x0820, + 0x1111, 0x1319, 0x1809, 0xFFFF }, { 0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF }, { 0x0916, 0x023C, 0x0B1E, 0x1031, + 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF }, { 0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF }, { + 0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF }, { 0x072A, 0x1213, 0x1317, 0xFFFF }, { 0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, + 0x1505, 0xFFFF }, { 0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF }, { 0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, + 0xFFFF }, { 0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF }, { 0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, + 0x161D, 0xFFFF }, { 0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF }, { 0x1221, 0x0B39, 0x1029, + 0xFFFF }, { 0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF }, { 0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF }, { 0x0832, + 0x0A02, 0x1223, 0x1127, 0xFFFF }, { 0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF }, { 0x0920, 0x1519, 0x063D, + 0xFFFF }, { 0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF }, { 0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, + 0x1225, 0x0E37, 0x161B, 0xFFFF }, { 0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF }, { 0x0C39, 0x1D0B, 0x191F, 0xFFFF + }, { 0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF }, { 0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF }, { + 0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF }, { 0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF }, { 0x1331, + 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF }, { 0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, + 0x181D, 0xFFFF }, { 0x0926, 0x072E, 0x1229, 0xFFFF }, { 0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF }, { 0x0A10, 0x1513, + 0x1617, 0xFFFF }, { 0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF }, { 0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF + }, { 0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF }, { 0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF }, { + 0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF }, { 0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF }, { + 0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF }, { 0x1613, 0x1717, 0xFFFF }, { 0x092A, 0x1235, 0x1905, + 0xFFFF }, { 0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF }, { 0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, + 0x0C3B, 0x102F, 0xFFFF }, { 0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF }, { 0x1531, 0x1701, 0x1803, 0x122D, + 0x1A1D, 0xFFFF }, { 0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF }, { 0x0A26, 0x003E, 0x082E, 0x1621, + 0x0F39, 0x1429, 0x003F, 0xFFFF }, { 0x1713, 0x1C1F, 0xFFFF }, { 0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF }, { 0x0C18, + 0x092C, 0x1623, 0x1527, 0xFFFF }, { 0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF }, { 0x0A28, 0x0D1C, 0x1919, + 0x0A3D, 0xFFFF }, { 0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF }, { 0x1801, 0x1533, 0x1625, + 0x1237, 0x1A1B, 0xFFFF }, { 0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF }, { 0x0B22, 0x0D04, 0x1039, 0x1D1F, + 0xFFFF }, { 0x1813, 0x1B05, 0x1917, 0xFFFF }, { 0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF }, { 0x0B30, 0x0C14, 0x0936, + 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF }, { 0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF }, { 0x0D1A, + 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF }, { 0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF }, { + 0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF }, { 0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF }, { 0x1913, + 0x1A17, 0xFFFF }, { 0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF }, { 0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF }, { + 0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF }, { 0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF }, { 0x0C20, + 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF }, { 0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF }, { 0x0D16, 0x063C, + 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF }, { 0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF }, { 0x1635, 0x1D05, 0xFFFF }, { + 0x0B2A, 0x1923, 0x1827, 0xFFFF }, { 0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF }, { 0x0D00, 0x0C24, 0x0F06, + 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF }, { 0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF }, { + 0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF }, { 0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF }, { 0x0C26, + 0x023E, 0x0A2E, 0x1B13, 0xFFFF }, { 0x1735, 0x1E05, 0x1C17, 0xFFFF }, { 0x0D10, 0x1A23, 0x1927, 0xFFFF }, { 0x0E18, + 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF }, { 0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF }, { 0x0C28, 0x0F1C, 0x1A31, 0x1D03, + 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF }, { 0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF }, { 0x1B21, 0x1929, + 0x053F, 0xFFFF }, { 0x0E16, 0x073C, 0x1439, 0xFFFF }, { 0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF }, { 0x1B23, + 0x1835, 0x1A27, 0xFFFF }, { 0x0C2A, 0x123B, 0x162F, 0xFFFF }, { 0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF + }, { 0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF }, { 0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, + 0x182D, 0xFFFF }, { 0x1A29, 0x063F, 0xFFFF }, { 0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF }, { 0x0D26, 0x033E, + 0x0B2E, 0x1D13, 0x1E17, 0xFFFF }, { 0x1935, 0x1B27, 0xFFFF }, { 0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF }, { 0x0F18, + 0x0C2C, 0x1D11, 0x1F19, 0xFFFF }, { 0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF }, { 0x0D28, 0x1C31, 0x1E01, + 0x1B33, 0x192D, 0xFFFF }, { 0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF }, { 0x1D21, 0x1639, 0xFFFF }, { 0x0F16, + 0x083C, 0x1E13, 0x1F17, 0xFFFF }, { 0x0E22, 0x1A35, 0xFFFF }, { 0x1D23, 0x1C27, 0xFFFF }, { 0x0D2A, 0x1E11, 0x143B, + 0x182F, 0xFFFF }, { 0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF }, { 0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, + 0x1A2D, 0xFFFF }, { 0x1C33, 0x1D25, 0x1937, 0xFFFF }, { 0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF }, { 0x0F12, 0x0D34, + 0x0A3A, 0x1F13, 0xFFFF }, { 0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF }, { 0x1E23, 0x1D27, 0xFFFF }, { 0x0F10, 0x1F11, + 0x153B, 0x192F, 0xFFFF }, { 0x0D2C, 0x123D, 0xFFFF }, + }; + + struct etc1_block + { + // big endian uint64: + // bit ofs: 56 48 40 32 24 16 8 0 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + union + { + uint64 m_uint64; + uint8 m_bytes[8]; + }; + + uint8 m_low_color[2]; + uint8 m_high_color[2]; + + enum { cNumSelectorBytes = 4 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + zero_this(this); + } + + inline uint get_byte_bits(uint ofs, uint num) const + { + RG_ETC1_ASSERT((ofs + num) <= 64U); + RG_ETC1_ASSERT(num && (num <= 8U)); + RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); + const uint byte_ofs = 7 - (ofs >> 3); + const uint byte_bit_ofs = ofs & 7; + return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); + } + + inline void set_byte_bits(uint ofs, uint num, uint bits) + { + RG_ETC1_ASSERT((ofs + num) <= 64U); + RG_ETC1_ASSERT(num && (num < 32U)); + RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); + RG_ETC1_ASSERT(bits < (1U << num)); + const uint byte_ofs = 7 - (ofs >> 3); + const uint byte_bit_ofs = ofs & 7; + const uint mask = (1 << num) - 1; + m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); + m_bytes[byte_ofs] |= (bits << byte_bit_ofs); + } + + // false = left/right subblocks + // true = upper/lower subblocks + inline bool get_flip_bit() const + { + return (m_bytes[3] & 1) != 0; + } + + inline void set_flip_bit(bool flip) + { + m_bytes[3] &= ~1; + m_bytes[3] |= static_cast<uint8>(flip); + } + + inline bool get_diff_bit() const + { + return (m_bytes[3] & 2) != 0; + } + + inline void set_diff_bit(bool diff) + { + m_bytes[3] &= ~2; + m_bytes[3] |= (static_cast<uint>(diff) << 1); + } + + // Returns intensity modifier table (0-7) used by subblock subblock_id. + // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) + inline uint get_inten_table(uint subblock_id) const + { + RG_ETC1_ASSERT(subblock_id < 2); + const uint ofs = subblock_id ? 2 : 5; + return (m_bytes[3] >> ofs) & 7; + } + + // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) + inline void set_inten_table(uint subblock_id, uint t) + { + RG_ETC1_ASSERT(subblock_id < 2); + RG_ETC1_ASSERT(t < 8); + const uint ofs = subblock_id ? 2 : 5; + m_bytes[3] &= ~(7 << ofs); + m_bytes[3] |= (t << ofs); + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint get_selector(uint x, uint y) const + { + RG_ETC1_ASSERT((x | y) < 4); + + const uint bit_index = x * 4 + y; + const uint byte_bit_ofs = bit_index & 7; + const uint8 *p = &m_bytes[7 - (bit_index >> 3)]; + const uint lsb = (p[0] >> byte_bit_ofs) & 1; + const uint msb = (p[-2] >> byte_bit_ofs) & 1; + const uint val = lsb | (msb << 1); + + return g_etc1_to_selector_index[val]; + } + + // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline void set_selector(uint x, uint y, uint val) + { + RG_ETC1_ASSERT((x | y | val) < 4); + const uint bit_index = x * 4 + y; + + uint8 *p = &m_bytes[7 - (bit_index >> 3)]; + + const uint byte_bit_ofs = bit_index & 7; + const uint mask = 1 << byte_bit_ofs; + + const uint etc1_val = g_selector_index_to_etc1[val]; + + const uint lsb = etc1_val & 1; + const uint msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + + inline void set_base4_color(uint idx, uint16 c) + { + if (idx) + { + set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); + } + else + { + set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); + set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); + set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); + } + } + + inline uint16 get_base4_color(uint idx) const + { + uint r, g, b; + if (idx) + { + r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); + } + else + { + r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); + g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); + b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); + } + return static_cast<uint16>(b | (g << 4U) | (r << 8U)); + } + + inline void set_base5_color(uint16 c) + { + set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); + set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); + set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); + } + + inline uint16 get_base5_color() const + { + const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); + const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); + const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); + return static_cast<uint16>(b | (g << 5U) | (r << 10U)); + } + + void set_delta3_color(uint16 c) + { + set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); + set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); + set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); + } + + inline uint16 get_delta3_color() const + { + const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); + const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); + const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); + return static_cast<uint16>(b | (g << 3U) | (r << 6U)); + } + + // Base color 5 + static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U); + static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U); + + static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U); + static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); + + static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); + static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); + + // Delta color 3 + // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static uint16 pack_delta3(int r, int g, int b); + + // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) + static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3); + + // Abs color 4 + static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U); + static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U); + + static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U); + static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled); + + // subblock colors + static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx); + static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx); + static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx); + + static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) + { + if (color4) + { + dst.r = src.r | (src.r << 4); + dst.g = src.g | (src.g << 4); + dst.b = src.b | (src.b << 4); + } + else + { + dst.r = (src.r >> 2) | (src.r << 3); + dst.g = (src.g >> 2) | (src.g << 3); + dst.b = (src.b >> 2) | (src.b << 3); + } + dst.a = src.a; + } + }; + + // Returns pointer to sorted array. + template<typename T, typename Q> + T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) + { + RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T))); + RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4)); + + if (init_indices) + { + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + uint i; + for (i = 0; p != q; p += 2, i += 2) + { + p[0] = static_cast<T>(i); + p[1] = static_cast<T>(i + 1); + } + + if (num_indices & 1) + *p = static_cast<T>(i); + } + + uint hist[256 * 4]; + + memset(hist, 0, sizeof(hist[0]) * 256 * key_size); + +#define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs)) +#define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs)) + + if (key_size == 4) + { + T* p = pIndices0; + T* q = pIndices0 + num_indices; + for ( ; p != q; p++) + { + const uint key = RG_ETC1_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + hist[768 + ((key >> 24) & 0xFF)]++; + } + } + else if (key_size == 3) + { + T* p = pIndices0; + T* q = pIndices0 + num_indices; + for ( ; p != q; p++) + { + const uint key = RG_ETC1_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + hist[512 + ((key >> 16) & 0xFF)]++; + } + } + else if (key_size == 2) + { + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint key0 = RG_ETC1_GET_KEY(p); + const uint key1 = RG_ETC1_GET_KEY(p+1); + + hist[ key0 & 0xFF]++; + hist[256 + ((key0 >> 8) & 0xFF)]++; + + hist[ key1 & 0xFF]++; + hist[256 + ((key1 >> 8) & 0xFF)]++; + } + + if (num_indices & 1) + { + const uint key = RG_ETC1_GET_KEY(p); + + hist[ key & 0xFF]++; + hist[256 + ((key >> 8) & 0xFF)]++; + } + } + else + { + RG_ETC1_ASSERT(key_size == 1); + if (key_size != 1) + return NULL; + + T* p = pIndices0; + T* q = pIndices0 + (num_indices >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint key0 = RG_ETC1_GET_KEY(p); + const uint key1 = RG_ETC1_GET_KEY(p+1); + + hist[key0 & 0xFF]++; + hist[key1 & 0xFF]++; + } + + if (num_indices & 1) + { + const uint key = RG_ETC1_GET_KEY(p); + + hist[key & 0xFF]++; + } + } + + T* pCur = pIndices0; + T* pNew = pIndices1; + + for (uint pass = 0; pass < key_size; pass++) + { + const uint* pHist = &hist[pass << 8]; + + uint offsets[256]; + + uint cur_ofs = 0; + for (uint i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i+1] = cur_ofs; + cur_ofs += pHist[i+1]; + } + + const uint pass_shift = pass << 3; + + T* p = pCur; + T* q = pCur + (num_indices >> 1) * 2; + + for ( ; p != q; p += 2) + { + uint index0 = p[0]; + uint index1 = p[1]; + + uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF; + uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF; + + if (c0 == c1) + { + uint dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew[dst_offset0] = static_cast<T>(index0); + pNew[dst_offset0 + 1] = static_cast<T>(index1); + } + else + { + uint dst_offset0 = offsets[c0]++; + uint dst_offset1 = offsets[c1]++; + + pNew[dst_offset0] = static_cast<T>(index0); + pNew[dst_offset1] = static_cast<T>(index1); + } + } + + if (num_indices & 1) + { + uint index = *p; + uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF; + + uint dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew[dst_offset] = static_cast<T>(index); + } + + T* t = pCur; + pCur = pNew; + pNew = t; + } + + return pCur; + } + +#undef RG_ETC1_GET_KEY +#undef RG_ETC1_GET_KEY_FROM_INDEX + + uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias) + { + return pack_color5(color.r, color.g, color.b, scaled, bias); + } + + uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias) + { + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 31U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = rg_etc1::minimum(r, 31U); + g = rg_etc1::minimum(g, 31U); + b = rg_etc1::minimum(b, 31U); + + return static_cast<uint16>(b | (g << 5U) | (r << 10U)); + } + + color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha) + { + uint b = packed_color5 & 31U; + uint g = (packed_color5 >> 5U) & 31U; + uint r = (packed_color5 >> 10U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U)); + } + + void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled) + { + color_quad_u8 c(unpack_color5(packed_color5, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) + { + int dc_r, dc_g, dc_b; + unpack_delta3(dc_r, dc_g, dc_b, packed_delta3); + + int b = (packed_color5 & 31U) + dc_b; + int g = ((packed_color5 >> 5U) & 31U) + dc_g; + int r = ((packed_color5 >> 10U) & 31U) + dc_r; + + bool success = true; + if (static_cast<uint>(r | g | b) > 31U) + { + success = false; + r = rg_etc1::clamp<int>(r, 0, 31); + g = rg_etc1::clamp<int>(g, 0, 31); + b = rg_etc1::clamp<int>(b, 0, 31); + } + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 3U) | (g >> 2U); + r = (r << 3U) | (r >> 2U); + } + + result.set_noclamp_rgba(r, g, b, rg_etc1::minimum(alpha, 255U)); + return success; + } + + bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) + { + color_quad_u8 result; + const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); + r = result.r; + g = result.g; + b = result.b; + return success; + } + + uint16 etc1_block::pack_delta3(int r, int g, int b) + { + RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); + RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); + RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); + if (r < 0) r += 8; + if (g < 0) g += 8; + if (b < 0) b += 8; + return static_cast<uint16>(b | (g << 3) | (r << 6)); + } + + void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3) + { + r = (packed_delta3 >> 6) & 7; + g = (packed_delta3 >> 3) & 7; + b = packed_delta3 & 7; + if (r >= 4) r -= 8; + if (g >= 4) g -= 8; + if (b >= 4) b -= 8; + } + + uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias) + { + return pack_color4(color.r, color.g, color.b, scaled, bias); + } + + uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias) + { + if (scaled) + { + r = (r * 15U + bias) / 255U; + g = (g * 15U + bias) / 255U; + b = (b * 15U + bias) / 255U; + } + + r = rg_etc1::minimum(r, 15U); + g = rg_etc1::minimum(g, 15U); + b = rg_etc1::minimum(b, 15U); + + return static_cast<uint16>(b | (g << 4U) | (r << 8U)); + } + + color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha) + { + uint b = packed_color4 & 15U; + uint g = (packed_color4 >> 4U) & 15U; + uint r = (packed_color4 >> 8U) & 15U; + + if (scaled) + { + b = (b << 4U) | b; + g = (g << 4U) | g; + r = (r << 4U) | r; + } + + return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U)); + } + + void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled) + { + color_quad_u8 c(unpack_color4(packed_color4, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx) + { + RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint r, g, b; + unpack_color5(r, g, b, packed_color5, true); + + const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); + } + + bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx) + { + RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint r, g, b; + bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); + + const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); + + return success; + } + + void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx) + { + RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); + const int *pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; + + uint r, g, b; + unpack_color4(r, g, b, packed_color4, true); + + const int ir = static_cast<int>(r), ig = static_cast<int>(g), ib = static_cast<int>(b); + + const int y0 = pInten_modifer_table[0]; + pDst[0].set(ir + y0, ig + y0, ib + y0); + + const int y1 = pInten_modifer_table[1]; + pDst[1].set(ir + y1, ig + y1, ib + y1); + + const int y2 = pInten_modifer_table[2]; + pDst[2].set(ir + y2, ig + y2, ib + y2); + + const int y3 = pInten_modifer_table[3]; + pDst[3].set(ir + y3, ig + y3, ib + y3); + } + + bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha) + { + color_quad_u8* pDst = reinterpret_cast<color_quad_u8*>(pDst_pixels_rgba); + const etc1_block& block = *static_cast<const etc1_block*>(pETC1_block); + + const bool diff_flag = block.get_diff_bit(); + const bool flip_flag = block.get_flip_bit(); + const uint table_index0 = block.get_inten_table(0); + const uint table_index1 = block.get_inten_table(1); + + color_quad_u8 subblock_colors0[4]; + color_quad_u8 subblock_colors1[4]; + bool success = true; + + if (diff_flag) + { + const uint16 base_color5 = block.get_base5_color(); + const uint16 delta_color3 = block.get_delta3_color(); + etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); + + if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) + success = false; + } + else + { + const uint16 base_color4_0 = block.get_base4_color(0); + etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); + + const uint16 base_color4_1 = block.get_base4_color(1); + etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); + } + + if (preserve_alpha) + { + if (flip_flag) + { + for (uint y = 0; y < 2; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); + pDst += 4; + } + + for (uint y = 2; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + else + { + for (uint y = 0; y < 4; y++) + { + pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); + pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); + pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); + pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); + pDst += 4; + } + } + } + else + { + if (flip_flag) + { + // 0000 + // 0000 + // 1111 + // 1111 + for (uint y = 0; y < 2; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors0[block.get_selector(2, y)]; + pDst[3] = subblock_colors0[block.get_selector(3, y)]; + pDst += 4; + } + + for (uint y = 2; y < 4; y++) + { + pDst[0] = subblock_colors1[block.get_selector(0, y)]; + pDst[1] = subblock_colors1[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + else + { + // 0011 + // 0011 + // 0011 + // 0011 + for (uint y = 0; y < 4; y++) + { + pDst[0] = subblock_colors0[block.get_selector(0, y)]; + pDst[1] = subblock_colors0[block.get_selector(1, y)]; + pDst[2] = subblock_colors1[block.get_selector(2, y)]; + pDst[3] = subblock_colors1[block.get_selector(3, y)]; + pDst += 4; + } + } + } + + return success; + } + + struct etc1_solution_coordinates + { + inline etc1_solution_coordinates() : + m_unscaled_color(0, 0, 0, 0), + m_inten_table(0), + m_color4(false) + { + } + + inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : + m_unscaled_color(r, g, b, 255), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : + m_unscaled_color(c), + m_inten_table(inten_table), + m_color4(color4) + { + } + + inline etc1_solution_coordinates(const etc1_solution_coordinates& other) + { + *this = other; + } + + inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs) + { + m_unscaled_color = rhs.m_unscaled_color; + m_inten_table = rhs.m_inten_table; + m_color4 = rhs.m_color4; + return *this; + } + + inline void clear() + { + m_unscaled_color.clear(); + m_inten_table = 0; + m_color4 = false; + } + + inline color_quad_u8 get_scaled_color() const + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + return color_quad_u8(br, bg, bb); + } + + inline void get_block_colors(color_quad_u8* pBlock_colors) + { + int br, bg, bb; + if (m_color4) + { + br = m_unscaled_color.r | (m_unscaled_color.r << 4); + bg = m_unscaled_color.g | (m_unscaled_color.g << 4); + bb = m_unscaled_color.b | (m_unscaled_color.b << 4); + } + else + { + br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); + bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); + bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); + } + const int* pInten_table = g_etc1_inten_tables[m_inten_table]; + pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]); + pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]); + pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]); + pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]); + } + + color_quad_u8 m_unscaled_color; + uint m_inten_table; + bool m_color4; + }; + + class etc1_optimizer + { + etc1_optimizer(const etc1_optimizer&); + etc1_optimizer& operator= (const etc1_optimizer&); + + public: + etc1_optimizer() + { + clear(); + } + + void clear() + { + m_pParams = NULL; + m_pResult = NULL; + m_pSorted_luma = NULL; + m_pSorted_luma_indices = NULL; + } + + struct params : etc1_pack_params + { + params() + { + clear(); + } + + params(const etc1_pack_params& base_params) : + etc1_pack_params(base_params) + { + clear_optimizer_params(); + } + + void clear() + { + etc1_pack_params::clear(); + clear_optimizer_params(); + } + + void clear_optimizer_params() + { + m_num_src_pixels = 0; + m_pSrc_pixels = 0; + + m_use_color4 = false; + static const int s_default_scan_delta[] = { 0 }; + m_pScan_deltas = s_default_scan_delta; + m_scan_delta_size = 1; + + m_base_color5.clear(); + m_constrain_against_base_color5 = false; + } + + uint m_num_src_pixels; + const color_quad_u8* m_pSrc_pixels; + + bool m_use_color4; + const int* m_pScan_deltas; + uint m_scan_delta_size; + + color_quad_u8 m_base_color5; + bool m_constrain_against_base_color5; + }; + + struct results + { + uint64 m_error; + color_quad_u8 m_block_color_unscaled; + uint m_block_inten_table; + uint m_n; + uint8* m_pSelectors; + bool m_block_color4; + + inline results& operator= (const results& rhs) + { + m_block_color_unscaled = rhs.m_block_color_unscaled; + m_block_color4 = rhs.m_block_color4; + m_block_inten_table = rhs.m_block_inten_table; + m_error = rhs.m_error; + RG_ETC1_ASSERT(m_n == rhs.m_n); + memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n); + return *this; + } + }; + + void init(const params& params, results& result); + bool compute(); + + private: + struct potential_solution + { + potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false) + { + } + + etc1_solution_coordinates m_coords; + uint8 m_selectors[8]; + uint64 m_error; + bool m_valid; + + void clear() + { + m_coords.clear(); + m_error = cUINT64_MAX; + m_valid = false; + } + }; + + const params* m_pParams; + results* m_pResult; + + int m_limit; + + vec3F m_avg_color; + int m_br, m_bg, m_bb; + uint16 m_luma[8]; + uint32 m_sorted_luma[2][8]; + const uint32* m_pSorted_luma_indices; + uint32* m_pSorted_luma; + + uint8 m_selectors[8]; + uint8 m_best_selectors[8]; + + potential_solution m_best_solution; + potential_solution m_trial_solution; + uint8 m_temp_selectors[8]; + + bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); + }; + + bool etc1_optimizer::compute() + { + const uint n = m_pParams->m_num_src_pixels; + const int scan_delta_size = m_pParams->m_scan_delta_size; + + // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. + // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. + for (int zdi = 0; zdi < scan_delta_size; zdi++) + { + const int zd = m_pParams->m_pScan_deltas[zdi]; + const int mbb = m_bb + zd; + if (mbb < 0) continue; else if (mbb > m_limit) break; + + for (int ydi = 0; ydi < scan_delta_size; ydi++) + { + const int yd = m_pParams->m_pScan_deltas[ydi]; + const int mbg = m_bg + yd; + if (mbg < 0) continue; else if (mbg > m_limit) break; + + for (int xdi = 0; xdi < scan_delta_size; xdi++) + { + const int xd = m_pParams->m_pScan_deltas[xdi]; + const int mbr = m_br + xd; + if (mbr < 0) continue; else if (mbr > m_limit) break; + + etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); + if (m_pParams->m_quality == cHighQuality) + { + if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) + continue; + } + else + { + if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution)) + continue; + } + + // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. + // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: + // The goal is: + // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 + // Rearranging this: + // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 + // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 + // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 + // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 + // So what this means: + // optimal_block_color = avg_input - avg_inten_delta + // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. + // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. + // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. + + const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2); + for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) + { + const uint8* pSelectors = m_best_solution.m_selectors; + const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; + + int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; + const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color()); + for (uint r = 0; r < n; r++) + { + const uint s = *pSelectors++; + const int yd = pInten_table[s]; + // Compute actual delta being applied to each pixel, taking into account clamping. + delta_sum_r += rg_etc1::clamp<int>(base_color.r + yd, 0, 255) - base_color.r; + delta_sum_g += rg_etc1::clamp<int>(base_color.g + yd, 0, 255) - base_color.g; + delta_sum_b += rg_etc1::clamp<int>(base_color.b + yd, 0, 255) - base_color.b; + } + if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) + break; + const float avg_delta_r_f = static_cast<float>(delta_sum_r) / n; + const float avg_delta_g_f = static_cast<float>(delta_sum_g) / n; + const float avg_delta_b_f = static_cast<float>(delta_sum_b) / n; + const int br1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bg1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); + const int bb1 = rg_etc1::clamp<int>(static_cast<uint>((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); + + bool skip = false; + + if ((mbr == br1) && (mbg == bg1) && (mbb == bb1)) + skip = true; + else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b)) + skip = true; + else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1)) + skip = true; + + if (skip) + break; + + etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4); + if (m_pParams->m_quality == cHighQuality) + { + if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) + break; + } + else + { + if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution)) + break; + } + + } // refinement_trial + + } // xdi + } // ydi + } // zdi + + if (!m_best_solution.m_valid) + { + m_pResult->m_error = cUINT32_MAX; + return false; + } + + const uint8* pSelectors = m_best_solution.m_selectors; + +#ifdef RG_ETC1_BUILD_DEBUG + { + color_quad_u8 block_colors[4]; + m_best_solution.m_coords.get_block_colors(block_colors); + + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + uint64 actual_error = 0; + for (uint i = 0; i < n; i++) + actual_error += pSrc_pixels[i].squared_distance_rgb(block_colors[pSelectors[i]]); + + RG_ETC1_ASSERT(actual_error == m_best_solution.m_error); + } +#endif + + m_pResult->m_error = m_best_solution.m_error; + + m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; + m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; + + m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; + memcpy(m_pResult->m_pSelectors, pSelectors, n); + m_pResult->m_n = n; + + return true; + } + + void etc1_optimizer::init(const params& p, results& r) + { + // This version is hardcoded for 8 pixel subblocks. + RG_ETC1_ASSERT(p.m_num_src_pixels == 8); + + m_pParams = &p; + m_pResult = &r; + + const uint n = 8; + + m_limit = m_pParams->m_use_color4 ? 15 : 31; + + vec3F avg_color(0.0f); + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& c = m_pParams->m_pSrc_pixels[i]; + const vec3F fc(c.r, c.g, c.b); + + avg_color += fc; + + m_luma[i] = static_cast<uint16>(c.r + c.g + c.b); + m_sorted_luma[0][i] = i; + } + avg_color *= (1.0f / static_cast<float>(n)); + m_avg_color = avg_color; + + m_br = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); + m_bg = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); + m_bb = rg_etc1::clamp<int>(static_cast<uint>(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); + + if (m_pParams->m_quality <= cMediumQuality) + { + m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false); + m_pSorted_luma = m_sorted_luma[0]; + if (m_pSorted_luma_indices == m_sorted_luma[0]) + m_pSorted_luma = m_sorted_luma[1]; + + for (uint i = 0; i < n; i++) + m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; + } + + m_best_solution.m_coords.clear(); + m_best_solution.m_valid = false; + m_best_solution.m_error = cUINT64_MAX; + } + + bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + trial_solution.m_valid = false; + + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; + const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; + const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; + + if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + return false; + } + + const color_quad_u8 base_color(coords.get_scaled_color()); + + const uint n = 8; + + trial_solution.m_error = cUINT64_MAX; + + for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + color_quad_u8 block_colors[4]; + for (uint s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); + } + + uint64 total_error = 0; + + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + for (uint c = 0; c < n; c++) + { + const color_quad_u8& src_pixel = *pSrc_pixels++; + + uint best_selector_index = 0; + uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b); + + uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 1; + } + + trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 2; + } + + trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b); + if (trial_error < best_error) + { + best_error = trial_error; + best_selector_index = 3; + } + + m_temp_selectors[c] = static_cast<uint8>(best_selector_index); + + total_error += best_error; + if (total_error >= trial_solution.m_error) + break; + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + memcpy(trial_solution.m_selectors, m_temp_selectors, 8); + trial_solution.m_valid = true; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) + { + if (m_pParams->m_constrain_against_base_color5) + { + const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; + const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; + const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; + + if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) + { + trial_solution.m_valid = false; + return false; + } + } + + const color_quad_u8 base_color(coords.get_scaled_color()); + + const uint n = 8; + + trial_solution.m_error = cUINT64_MAX; + + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) + { + const int* pInten_table = g_etc1_inten_tables[inten_table]; + + uint block_inten[4]; + color_quad_u8 block_colors[4]; + for (uint s = 0; s < 4; s++) + { + const int yd = pInten_table[s]; + color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); + block_colors[s] = block_color; + block_inten[s] = block_color.r + block_color.g + block_color.b; + } + + // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. + // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. + // 0 1 2 3 + // 01 12 23 + const uint block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; + + uint64 total_error = 0; + const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; + if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) + { + if (block_inten[0] > m_pSorted_luma[n - 1]) + { + const uint min_error = intabs(block_inten[0] - m_pSorted_luma[n - 1]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 0, n); + + for (uint c = 0; c < n; c++) + total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]); + } + else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) + { + if (m_pSorted_luma[0] > block_inten[3]) + { + const uint min_error = intabs(m_pSorted_luma[0] - block_inten[3]); + if (min_error >= trial_solution.m_error) + continue; + } + + memset(&m_temp_selectors[0], 3, n); + + for (uint c = 0; c < n; c++) + total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]); + } + else + { + uint cur_selector = 0, c; + for (c = 0; c < n; c++) + { + const uint y = m_pSorted_luma[c]; + while ((y * 2) >= block_inten_midpoints[cur_selector]) + if (++cur_selector > 2) + goto done; + const uint sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = static_cast<uint8>(cur_selector); + total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]); + } +done: + while (c < n) + { + const uint sorted_pixel_index = m_pSorted_luma_indices[c]; + m_temp_selectors[sorted_pixel_index] = 3; + total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]); + ++c; + } + } + + if (total_error < trial_solution.m_error) + { + trial_solution.m_error = total_error; + trial_solution.m_coords.m_inten_table = inten_table; + memcpy(trial_solution.m_selectors, m_temp_selectors, n); + trial_solution.m_valid = true; + if (!total_error) + break; + } + } + trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; + trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; + + bool success = false; + if (pBest_solution) + { + if (trial_solution.m_error < pBest_solution->m_error) + { + *pBest_solution = trial_solution; + success = true; + } + } + + return success; + } + + static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c) + { + const uint limit = diff ? 32 : 16; limit; + RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); + int c; + if (diff) + c = (packed_c >> 2) | (packed_c << 3); + else + c = packed_c | (packed_c << 4); + c += g_etc1_inten_tables[inten][selector]; + c = rg_etc1::clamp<int>(c, 0, 255); + return c; + } + + static inline int mul_8bit(int a, int b) { int t = a*b + 128; return (t + (t >> 8)) >> 8; } + + void pack_etc1_block_init() + { + for (uint diff = 0; diff < 2; diff++) + { + const uint limit = diff ? 32 : 16; + + for (uint inten = 0; inten < 8; inten++) + { + for (uint selector = 0; selector < 4; selector++) + { + const uint inverse_table_index = diff + (inten << 1) + (selector << 4); + for (uint color = 0; color < 256; color++) + { + uint best_error = cUINT32_MAX, best_packed_c = 0; + for (uint packed_c = 0; packed_c < limit; packed_c++) + { + int v = etc1_decode_value(diff, inten, selector, packed_c); + uint err = labs(v - static_cast<int>(color)); + //printf("err: %d - %u = %u\n",v,color,err); + if (err < best_error) + { + best_error = err; + best_packed_c = packed_c; + if (!best_error) + break; + } + } + RG_ETC1_ASSERT(best_error <= 255); + g_etc1_inverse_lookup[inverse_table_index][color] = static_cast<uint16>(best_packed_c | (best_error << 8)); + } + } + } + } + + uint expand5[32]; + for(int i = 0; i < 32; i++) + expand5[i] = (i << 3) | (i >> 2); + + for(int i = 0; i < 256 + 16; i++) + { + int v = clamp<int>(i - 8, 0, 255); + g_quant5_tab[i] = static_cast<uint8>(expand5[mul_8bit(v,31)]); + } + } + + // Packs solid color blocks efficiently using a set of small precomputed tables. + // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. + static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, etc1_pack_params& pack_params) + { + pack_params; + RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]); + + static uint s_next_comp[4] = { 1, 2, 0, 1 }; + + uint best_error = cUINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint i = 0; i < 3; i++) + { + const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255); + + const uint16* pTable; + if (!c_plus_delta) + pTable = g_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_color8_to_etc_block_config_0_255[1]; + else + pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint x = *pTable++; + +#ifdef RG_ETC1_BUILD_DEBUG + const uint diff = x & 1; + const uint inten = (x >> 1) & 7; + const uint selector = (x >> 4) & 3; + const uint p0 = (x >> 8) & 255; + RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); +#endif + + const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16 p1 = pInverse_table[c1]; + uint16 p2 = pInverse_table[c2]; + const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } +found_perfect_match: + + const uint diff = best_x & 1; + const uint inten = (best_x >> 1) & 7; + + block.m_bytes[3] = static_cast<uint8>(((inten | (inten << 3)) << 2) | (diff << 1)); + + const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; + *reinterpret_cast<uint16*>(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; + *reinterpret_cast<uint16*>(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; + + const uint best_packed_c0 = (best_x >> 8) & 255; + if (diff) + { + block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 << 3); + block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 << 3); + block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 << 3); + } + else + { + block.m_bytes[best_i] = static_cast<uint8>(best_packed_c0 | (best_packed_c0 << 4)); + block.m_bytes[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1 | (best_packed_c1 << 4)); + block.m_bytes[s_next_comp[best_i+1]] = static_cast<uint8>(best_packed_c2 | (best_packed_c2 << 4)); + } + + return best_error; + } + + static uint pack_etc1_block_solid_color_constrained( + etc1_optimizer::results& results, + uint num_colors, const uint8* pColor, + etc1_pack_params& pack_params, + bool use_diff, + const color_quad_u8* pBase_color5_unscaled) + { + RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]); + + pack_params; + static uint s_next_comp[4] = { 1, 2, 0, 1 }; + + uint best_error = cUINT32_MAX, best_i = 0; + int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; + + // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. + for (uint i = 0; i < 3; i++) + { + const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; + + const int delta_range = 1; + for (int delta = -delta_range; delta <= delta_range; delta++) + { + const int c_plus_delta = rg_etc1::clamp<int>(pColor[i] + delta, 0, 255); + + const uint16* pTable; + if (!c_plus_delta) + pTable = g_color8_to_etc_block_config_0_255[0]; + else if (c_plus_delta == 255) + pTable = g_color8_to_etc_block_config_0_255[1]; + else + pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; + + do + { + const uint x = *pTable++; + const uint diff = x & 1; + if (static_cast<uint>(use_diff) != diff) + { + if (*pTable == 0xFFFF) + break; + continue; + } + + if ((diff) && (pBase_color5_unscaled)) + { + const int p0 = (x >> 8) & 255; + int delta = p0 - static_cast<int>(pBase_color5_unscaled->c[i]); + if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax)) + { + if (*pTable == 0xFFFF) + break; + continue; + } + } + +#ifdef RG_ETC1_BUILD_DEBUG + { + const uint inten = (x >> 1) & 7; + const uint selector = (x >> 4) & 3; + const uint p0 = (x >> 8) & 255; + RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); + } +#endif + + const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; + uint16 p1 = pInverse_table[c1]; + uint16 p2 = pInverse_table[c2]; + + if ((diff) && (pBase_color5_unscaled)) + { + int delta1 = (p1 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i]]); + int delta2 = (p2 & 0xFF) - static_cast<int>(pBase_color5_unscaled->c[s_next_comp[i + 1]]); + if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) + { + if (*pTable == 0xFFFF) + break; + continue; + } + } + + const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8); + if (trial_error < best_error) + { + best_error = trial_error; + best_x = x; + best_packed_c1 = p1 & 0xFF; + best_packed_c2 = p2 & 0xFF; + best_i = i; + if (!best_error) + goto found_perfect_match; + } + } while (*pTable != 0xFFFF); + } + } +found_perfect_match: + + if (best_error == cUINT32_MAX) + return best_error; + + best_error *= num_colors; + + results.m_n = num_colors; + results.m_block_color4 = !(best_x & 1); + results.m_block_inten_table = (best_x >> 1) & 7; + memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors); + + const uint best_packed_c0 = (best_x >> 8) & 255; + results.m_block_color_unscaled[best_i] = static_cast<uint8>(best_packed_c0); + results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast<uint8>(best_packed_c1); + results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast<uint8>(best_packed_c2); + results.m_error = best_error; + + return best_error; + } + + // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555. + static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block) + { + int err[8],*ep1 = err,*ep2 = err+4; + uint8 *quant = g_quant5_tab+8; + + memset(dest, 0xFF, sizeof(color_quad_u8)*16); + + // process channels seperately + for(int ch=0;ch<3;ch++) + { + uint8* bp = (uint8*)block; + uint8* dp = (uint8*)dest; + + bp += ch; dp += ch; + + memset(err,0, sizeof(err)); + for(int y = 0; y < 4; y++) + { + // pixel 0 + dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)]; + ep1[0] = bp[ 0] - dp[ 0]; + + // pixel 1 + dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)]; + ep1[1] = bp[ 4] - dp[ 4]; + + // pixel 2 + dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)]; + ep1[2] = bp[ 8] - dp[ 8]; + + // pixel 3 + dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)]; + ep1[3] = bp[12] - dp[12]; + + // advance to next line + int* tmp = ep1; ep1 = ep2; ep2 = tmp; + bp += 16; + dp += 16; + } + } + } + + unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params) + { + const color_quad_u8* pSrc_pixels = reinterpret_cast<const color_quad_u8*>(pSrc_pixels_rgba); + etc1_block& dst_block = *static_cast<etc1_block*>(pETC1_block); + +#ifdef RG_ETC1_BUILD_DEBUG + // Ensure all alpha values are 0xFF. + for (uint i = 0; i < 16; i++) + { + RG_ETC1_ASSERT(pSrc_pixels[i].a == 255); + } +#endif + + color_quad_u8 src_pixel0(pSrc_pixels[0]); + + // Check for solid block. + const uint32 first_pixel_u32 = pSrc_pixels->m_u32; + int r; + for (r = 15; r >= 1; --r) + if (pSrc_pixels[r].m_u32 != first_pixel_u32) + break; + if (!r) + return static_cast<unsigned int>(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params)); + + color_quad_u8 dithered_pixels[16]; + if (pack_params.m_dithering) + { + dither_block_555(dithered_pixels, pSrc_pixels); + pSrc_pixels = dithered_pixels; + } + + etc1_optimizer optimizer; + + uint64 best_error = cUINT64_MAX; + uint best_flip = false, best_use_color4 = false; + + uint8 best_selectors[2][8]; + etc1_optimizer::results best_results[2]; + for (uint i = 0; i < 2; i++) + { + best_results[i].m_n = 8; + best_results[i].m_pSelectors = best_selectors[i]; + } + + uint8 selectors[3][8]; + etc1_optimizer::results results[3]; + + for (uint i = 0; i < 3; i++) + { + results[i].m_n = 8; + results[i].m_pSelectors = selectors[i]; + } + + color_quad_u8 subblock_pixels[8]; + + etc1_optimizer::params params(pack_params); + params.m_num_src_pixels = 8; + params.m_pSrc_pixels = subblock_pixels; + + for (uint flip = 0; flip < 2; flip++) + { + for (uint use_color4 = 0; use_color4 < 2; use_color4++) + { + uint64 trial_error = 0; + + uint subblock; + for (subblock = 0; subblock < 2; subblock++) + { + if (flip) + memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8); + else + { + const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2; + subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12]; + subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13]; + } + + results[2].m_error = cUINT64_MAX; + if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4))) + { + const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32; + for (r = 7; r >= 1; --r) + if (subblock_pixels[r].m_u32 != subblock_pixel0_u32) + break; + if (!r) + { + pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, pack_params, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL); + } + } + + params.m_use_color4 = (use_color4 != 0); + params.m_constrain_against_base_color5 = false; + + if ((!use_color4) && (subblock)) + { + params.m_constrain_against_base_color5 = true; + params.m_base_color5 = results[0].m_block_color_unscaled; + } + + if (params.m_quality == cHighQuality) + { + static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 }; + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4); + params.m_pScan_deltas = s_scan_delta_0_to_4; + } + else if (params.m_quality == cMediumQuality) + { + static const int s_scan_delta_0_to_1[] = { -1, 0, 1 }; + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1); + params.m_pScan_deltas = s_scan_delta_0_to_1; + } + else + { + static const int s_scan_delta_0[] = { 0 }; + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0); + params.m_pScan_deltas = s_scan_delta_0; + } + + optimizer.init(params, results[subblock]); + if (!optimizer.compute()) + break; + + if (params.m_quality >= cMediumQuality) + { + // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions. + const uint refinement_error_thresh0 = 3000; + const uint refinement_error_thresh1 = 6000; + if (results[subblock].m_error > refinement_error_thresh0) + { + if (params.m_quality == cMediumQuality) + { + static const int s_scan_delta_2_to_3[] = { -3, -2, 2, 3 }; + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3); + params.m_pScan_deltas = s_scan_delta_2_to_3; + } + else + { + static const int s_scan_delta_5_to_5[] = { -5, 5 }; + static const int s_scan_delta_5_to_8[] = { -8, -7, -6, -5, 5, 6, 7, 8 }; + if (results[subblock].m_error > refinement_error_thresh1) + { + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8); + params.m_pScan_deltas = s_scan_delta_5_to_8; + } + else + { + params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5); + params.m_pScan_deltas = s_scan_delta_5_to_5; + } + } + + if (!optimizer.compute()) + break; + } + + if (results[2].m_error < results[subblock].m_error) + results[subblock] = results[2]; + } + + trial_error += results[subblock].m_error; + if (trial_error >= best_error) + break; + } + + if (subblock < 2) + continue; + + best_error = trial_error; + best_results[0] = results[0]; + best_results[1] = results[1]; + best_flip = flip; + best_use_color4 = use_color4; + + } // use_color4 + + } // flip + + int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r; + int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g; + int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b; + RG_ETC1_ASSERT(best_use_color4 || ((rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax))); + + if (best_use_color4) + { + dst_block.m_bytes[0] = static_cast<uint8>(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4)); + dst_block.m_bytes[1] = static_cast<uint8>(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4)); + dst_block.m_bytes[2] = static_cast<uint8>(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4)); + } + else + { + if (dr < 0) dr += 8; dst_block.m_bytes[0] = static_cast<uint8>((best_results[0].m_block_color_unscaled.r << 3) | dr); + if (dg < 0) dg += 8; dst_block.m_bytes[1] = static_cast<uint8>((best_results[0].m_block_color_unscaled.g << 3) | dg); + if (db < 0) db += 8; dst_block.m_bytes[2] = static_cast<uint8>((best_results[0].m_block_color_unscaled.b << 3) | db); + } + + dst_block.m_bytes[3] = static_cast<uint8>( (best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip ); + + uint selector0 = 0, selector1 = 0; + if (best_flip) + { + // flipped: + // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, + // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } + // + // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, + // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } + const uint8* pSelectors0 = best_results[0].m_pSelectors; + const uint8* pSelectors1 = best_results[1].m_pSelectors; + for (int x = 3; x >= 0; --x) + { + uint b; + b = g_selector_index_to_etc1[pSelectors1[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors1[x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[4 + x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors0[x]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + } + } + else + { + // non-flipped: + // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, + // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } + // + // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, + // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } + for (int subblock = 1; subblock >= 0; --subblock) + { + const uint8* pSelectors = best_results[subblock].m_pSelectors + 4; + for (uint i = 0; i < 2; i++) + { + uint b; + b = g_selector_index_to_etc1[pSelectors[3]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[2]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[1]]; + selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); + + b = g_selector_index_to_etc1[pSelectors[0]]; + selector0 = (selector0 << 1) | (b & 1);selector1 = (selector1 << 1) | (b >> 1); + + pSelectors -= 4; + } + } + } + + dst_block.m_bytes[4] = static_cast<uint8>(selector1 >> 8); dst_block.m_bytes[5] = static_cast<uint8>(selector1 & 0xFF); + dst_block.m_bytes[6] = static_cast<uint8>(selector0 >> 8); dst_block.m_bytes[7] = static_cast<uint8>(selector0 & 0xFF); + + return static_cast<unsigned int>(best_error); + } + +} // namespace rg_etc1 diff --git a/drivers/etc1/rg_etc1.h b/drivers/etc1/rg_etc1.h index 9a701506fd..9ce89a6cc6 100644 --- a/drivers/etc1/rg_etc1.h +++ b/drivers/etc1/rg_etc1.h @@ -1,76 +1,76 @@ -// File: rg_etc1.h - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com>
-// Please see ZLIB license at the end of this file.
-#pragma once
-
-namespace rg_etc1
-{
- // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels.
- // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping.
- // This function is thread safe, and does not dynamically allocate any memory.
- // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255.
- bool unpack_etc1_block(const void *pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false);
-
- // Quality setting = the higher the quality, the slower.
- // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality).
- enum etc1_quality
- {
- cLowQuality,
- cMediumQuality,
- cHighQuality,
- };
-
- struct etc1_pack_params
- {
- etc1_quality m_quality;
- bool m_dithering;
-
- inline etc1_pack_params()
- {
- clear();
- }
-
- void clear()
- {
- m_quality = cHighQuality;
- m_dithering = false;
- }
- };
-
- // Important: pack_etc1_block_init() must be called before calling pack_etc1_block().
- void pack_etc1_block_init();
-
- // Packs a 4x4 block of 32bpp RGBA pixels to an 8-byte ETC1 block.
- // 32-bit RGBA pixels must always be arranged as (R,G,B,A) (R first, A last) in memory, independent of platform endianness. A should always be 255.
- // Returns squared error of result.
- // This function is thread safe, and does not dynamically allocate any memory.
- // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE.
- unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params);
-
-} // namespace rg_etc1
-
-//------------------------------------------------------------------------------
-//
-// rg_etc1 uses the ZLIB license:
-// http://opensource.org/licenses/Zlib
-//
-// Copyright (c) 2012 Rich Geldreich
-//
-// This software is provided 'as-is', without any express or implied
-// warranty. In no event will the authors be held liable for any damages
-// arising from the use of this software.
-//
-// Permission is granted to anyone to use this software for any purpose,
-// including commercial applications, and to alter it and redistribute it
-// freely, subject to the following restrictions:
-//
-// 1. The origin of this software must not be misrepresented; you must not
-// claim that you wrote the original software. If you use this software
-// in a product, an acknowledgment in the product documentation would be
-// appreciated but is not required.
-//
-// 2. Altered source versions must be plainly marked as such, and must not be
-// misrepresented as being the original software.
-//
-// 3. This notice may not be removed or altered from any source distribution.
-//
-//------------------------------------------------------------------------------
+// File: rg_etc1.h - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich <richgel99@gmail.com> +// Please see ZLIB license at the end of this file. +#pragma once + +namespace rg_etc1 +{ + // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels. + // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping. + // This function is thread safe, and does not dynamically allocate any memory. + // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255. + bool unpack_etc1_block(const void *pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false); + + // Quality setting = the higher the quality, the slower. + // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality). + enum etc1_quality + { + cLowQuality, + cMediumQuality, + cHighQuality, + }; + + struct etc1_pack_params + { + etc1_quality m_quality; + bool m_dithering; + + inline etc1_pack_params() + { + clear(); + } + + void clear() + { + m_quality = cHighQuality; + m_dithering = false; + } + }; + + // Important: pack_etc1_block_init() must be called before calling pack_etc1_block(). + void pack_etc1_block_init(); + + // Packs a 4x4 block of 32bpp RGBA pixels to an 8-byte ETC1 block. + // 32-bit RGBA pixels must always be arranged as (R,G,B,A) (R first, A last) in memory, independent of platform endianness. A should always be 255. + // Returns squared error of result. + // This function is thread safe, and does not dynamically allocate any memory. + // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE. + unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params); + +} // namespace rg_etc1 + +//------------------------------------------------------------------------------ +// +// rg_etc1 uses the ZLIB license: +// http://opensource.org/licenses/Zlib +// +// Copyright (c) 2012 Rich Geldreich +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source distribution. +// +//------------------------------------------------------------------------------ diff --git a/drivers/gles2/rasterizer_gles2.cpp b/drivers/gles2/rasterizer_gles2.cpp index d3a5f3b5bc..3d75ed29f3 100644 --- a/drivers/gles2/rasterizer_gles2.cpp +++ b/drivers/gles2/rasterizer_gles2.cpp @@ -4145,7 +4145,7 @@ void RasterizerGLES2::begin_frame() { //fragment_lighting=Globals::get_singleton()->get("rasterizer/use_fragment_lighting"); #ifdef TOOLS_ENABLED - canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("rasterizer/use_pixel_snap",false)); + canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("display/use_2d_pixel_snap",false)); shadow_filter=ShadowFilterTechnique(int(Globals::get_singleton()->get("rasterizer/shadow_filter"))); #endif @@ -4160,7 +4160,6 @@ void RasterizerGLES2::begin_frame() { time_delta=time-last_time; last_time=time; frame++; - clear_viewport(Color(1,0,0.5)); _rinfo.vertex_count=0; _rinfo.object_count=0; @@ -5970,6 +5969,10 @@ void RasterizerGLES2::_render(const Geometry *p_geometry,const Material *p_mater if (element_count==0) return; + if (mm->visible>=0) { + element_count=MIN(element_count,mm->visible); + } + const MultiMesh::Element *elements=&mm->elements[0]; _rinfo.vertex_count+=s->array_len*element_count; @@ -10804,7 +10807,7 @@ void RasterizerGLES2::init() { copy_shader.set_conditional(CopyShaderGLES2::USE_8BIT_HDR,!use_fp16_fb); canvas_shader.set_conditional(CanvasShaderGLES2::USE_DEPTH_SHADOWS,read_depth_supported); - canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("rasterizer/use_pixel_snap",false)); + canvas_shader.set_conditional(CanvasShaderGLES2::USE_PIXEL_SNAP,GLOBAL_DEF("display/use_2d_pixel_snap",false)); npo2_textures_available=true; //fragment_lighting=false; @@ -11188,6 +11191,12 @@ RasterizerGLES2::RasterizerGLES2(bool p_compress_arrays,bool p_keep_ram_copy,boo tc0_idx=0; }; +void RasterizerGLES2::restore_framebuffer() { + + glBindFramebuffer(GL_FRAMEBUFFER, base_framebuffer); + +} + RasterizerGLES2::~RasterizerGLES2() { memdelete_arr(skinned_buffer); diff --git a/drivers/gles2/rasterizer_gles2.h b/drivers/gles2/rasterizer_gles2.h index d337ecfb64..f759e84b53 100644 --- a/drivers/gles2/rasterizer_gles2.h +++ b/drivers/gles2/rasterizer_gles2.h @@ -1695,6 +1695,8 @@ public: void reload_vram(); virtual bool has_feature(VS::Features p_feature) const; + + virtual void restore_framebuffer(); static RasterizerGLES2* get_singleton(); diff --git a/drivers/gles2/shader_compiler_gles2.cpp b/drivers/gles2/shader_compiler_gles2.cpp index 157f2e398b..d57512c936 100644 --- a/drivers/gles2/shader_compiler_gles2.cpp +++ b/drivers/gles2/shader_compiler_gles2.cpp @@ -132,18 +132,18 @@ String ShaderCompilerGLES2::dump_node_code(SL::Node *p_node,int p_level,bool p_a SL::BlockNode *bnode=(SL::BlockNode*)p_node; //variables - code+="{"ENDL; + code+="{" ENDL; for(Map<StringName,SL::DataType>::Element *E=bnode->variables.front();E;E=E->next()) { - code+=_mktab(p_level)+_typestr(E->value())+" "+replace_string(E->key())+";"ENDL; + code+=_mktab(p_level)+_typestr(E->value())+" "+replace_string(E->key())+";" ENDL; } for(int i=0;i<bnode->statements.size();i++) { - code+=_mktab(p_level)+dump_node_code(bnode->statements[i],p_level)+";"ENDL; + code+=_mktab(p_level)+dump_node_code(bnode->statements[i],p_level)+";" ENDL; } - code+="}"ENDL; + code+="}" ENDL; } break; case SL::Node::TYPE_VARIABLE: { @@ -489,15 +489,15 @@ String ShaderCompilerGLES2::dump_node_code(SL::Node *p_node,int p_level,bool p_a SL::ControlFlowNode *cfnode=(SL::ControlFlowNode*)p_node; if (cfnode->flow_op==SL::FLOW_OP_IF) { - code+="if ("+dump_node_code(cfnode->statements[0],p_level)+") {"ENDL; + code+="if ("+dump_node_code(cfnode->statements[0],p_level)+") {" ENDL; code+=dump_node_code(cfnode->statements[1],p_level+1); if (cfnode->statements.size()==3) { - code+="} else {"ENDL; + code+="} else {" ENDL; code+=dump_node_code(cfnode->statements[2],p_level+1); } - code+="}"ENDL; + code+="}" ENDL; } else if (cfnode->flow_op==SL::FLOW_OP_RETURN) { @@ -560,7 +560,7 @@ Error ShaderCompilerGLES2::compile_node(SL::ProgramNode *p_program) { ubase=uniforms->size(); for(Map<StringName,SL::Uniform>::Element *E=p_program->uniforms.front();E;E=E->next()) { - String uline="uniform "+_typestr(E->get().type)+" _"+E->key().operator String()+";"ENDL; + String uline="uniform "+_typestr(E->get().type)+" _"+E->key().operator String()+";" ENDL; global_code+=uline; if (uniforms) { @@ -593,10 +593,10 @@ Error ShaderCompilerGLES2::compile_node(SL::ProgramNode *p_program) { header+=_typestr(fnode->arguments[i].type)+" "+replace_string(fnode->arguments[i].name); } - header+=") {"ENDL; + header+=") {" ENDL; String fcode=header; fcode+=dump_node_code(fnode->body,1); - fcode+="}"ENDL; + fcode+="}" ENDL; global_code+=fcode; } @@ -605,7 +605,7 @@ Error ShaderCompilerGLES2::compile_node(SL::ProgramNode *p_program) { StringName varname=E->key(); String newvarname=replace_string(varname); - global_code+="uniform "+_typestr(E->get())+" "+newvarname+";"ENDL; + global_code+="uniform "+_typestr(E->get())+" "+newvarname+";" ENDL; }*/ code=dump_node_code(p_program,0); diff --git a/drivers/mpc/audio_stream_mpc.cpp b/drivers/mpc/audio_stream_mpc.cpp index 67f21f922c..fe6aa05d00 100644 --- a/drivers/mpc/audio_stream_mpc.cpp +++ b/drivers/mpc/audio_stream_mpc.cpp @@ -1,7 +1,7 @@ #include "audio_stream_mpc.h" -Error AudioStreamMPC::_open_file() { +Error AudioStreamPlaybackMPC::_open_file() { if (f) { memdelete(f); @@ -41,7 +41,7 @@ Error AudioStreamMPC::_open_file() { return OK; } -void AudioStreamMPC::_close_file() { +void AudioStreamPlaybackMPC::_close_file() { if (f) { memdelete(f); @@ -52,7 +52,7 @@ void AudioStreamMPC::_close_file() { data_ofs=0; } -int AudioStreamMPC::_read_file(void *p_dst,int p_bytes) { +int AudioStreamPlaybackMPC::_read_file(void *p_dst,int p_bytes) { if (f) return f->get_buffer((uint8_t*)p_dst,p_bytes); @@ -68,7 +68,7 @@ int AudioStreamMPC::_read_file(void *p_dst,int p_bytes) { return p_bytes; } -bool AudioStreamMPC::_seek_file(int p_pos){ +bool AudioStreamPlaybackMPC::_seek_file(int p_pos){ if (p_pos<0 || p_pos>streamlen) return false; @@ -83,7 +83,7 @@ bool AudioStreamMPC::_seek_file(int p_pos){ return true; } -int AudioStreamMPC::_tell_file() const{ +int AudioStreamPlaybackMPC::_tell_file() const{ if (f) return f->get_pos(); @@ -93,13 +93,13 @@ int AudioStreamMPC::_tell_file() const{ } -int AudioStreamMPC::_sizeof_file() const{ +int AudioStreamPlaybackMPC::_sizeof_file() const{ //print_line("sizeof file, get: "+itos(streamlen)); return streamlen; } -bool AudioStreamMPC::_canseek_file() const{ +bool AudioStreamPlaybackMPC::_canseek_file() const{ //print_line("canseek file, get true"); return true; @@ -107,51 +107,46 @@ bool AudioStreamMPC::_canseek_file() const{ ///////////////////// -mpc_int32_t AudioStreamMPC::_mpc_read(mpc_reader *p_reader,void *p_dst, mpc_int32_t p_bytes) { +mpc_int32_t AudioStreamPlaybackMPC::_mpc_read(mpc_reader *p_reader,void *p_dst, mpc_int32_t p_bytes) { - AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data; + AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data; return smpc->_read_file(p_dst,p_bytes); } -mpc_bool_t AudioStreamMPC::_mpc_seek(mpc_reader *p_reader,mpc_int32_t p_offset) { +mpc_bool_t AudioStreamPlaybackMPC::_mpc_seek(mpc_reader *p_reader,mpc_int32_t p_offset) { - AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data; + AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data; return smpc->_seek_file(p_offset); } -mpc_int32_t AudioStreamMPC::_mpc_tell(mpc_reader *p_reader) { +mpc_int32_t AudioStreamPlaybackMPC::_mpc_tell(mpc_reader *p_reader) { - AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data; + AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data; return smpc->_tell_file(); } -mpc_int32_t AudioStreamMPC::_mpc_get_size(mpc_reader *p_reader) { +mpc_int32_t AudioStreamPlaybackMPC::_mpc_get_size(mpc_reader *p_reader) { - AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data; + AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data; return smpc->_sizeof_file(); } -mpc_bool_t AudioStreamMPC::_mpc_canseek(mpc_reader *p_reader) { +mpc_bool_t AudioStreamPlaybackMPC::_mpc_canseek(mpc_reader *p_reader) { - AudioStreamMPC *smpc=(AudioStreamMPC *)p_reader->data; + AudioStreamPlaybackMPC *smpc=(AudioStreamPlaybackMPC *)p_reader->data; return smpc->_canseek_file(); } -bool AudioStreamMPC::_can_mix() const { - return /*active &&*/ !paused; -} - - -void AudioStreamMPC::update() { +int AudioStreamPlaybackMPC::mix(int16_t* p_bufer,int p_frames) { if (!active || paused) - return; + return 0; - int todo=get_todo(); + int todo=p_frames; while(todo>MPC_DECODER_BUFFER_LENGTH/si.channels) { @@ -162,7 +157,7 @@ void AudioStreamMPC::update() { mpc_status err = mpc_demux_decode(demux, &frame); if (frame.bits!=-1) { - int16_t *dst_buff = get_write_buffer(); + int16_t *dst_buff = p_bufer; #ifdef MPC_FIXED_POINT @@ -185,21 +180,21 @@ void AudioStreamMPC::update() { #endif int frames = frame.samples; - write(frames); + p_bufer+=si.channels*frames; todo-=frames; } else { if (err != MPC_STATUS_OK) { stop(); - ERR_EXPLAIN("Error decoding MPC"); - ERR_FAIL(); + ERR_PRINT("Error decoding MPC"); + break; } else { //finished if (!loop) { stop(); - return; + break; } else { @@ -213,9 +208,11 @@ void AudioStreamMPC::update() { } } } + + return p_frames-todo; } -Error AudioStreamMPC::_reload() { +Error AudioStreamPlaybackMPC::_reload() { ERR_FAIL_COND_V(demux!=NULL, ERR_FILE_ALREADY_IN_USE); @@ -224,31 +221,40 @@ Error AudioStreamMPC::_reload() { demux = mpc_demux_init(&reader); ERR_FAIL_COND_V(!demux,ERR_CANT_CREATE); - mpc_demux_get_info(demux, &si); - _setup(si.channels,si.sample_freq,MPC_DECODER_BUFFER_LENGTH*2/si.channels); return OK; } -void AudioStreamMPC::set_file(const String& p_file) { +void AudioStreamPlaybackMPC::set_file(const String& p_file) { file=p_file; + Error err = _open_file(); + ERR_FAIL_COND(err!=OK); + demux = mpc_demux_init(&reader); + ERR_FAIL_COND(!demux); + mpc_demux_get_info(demux, &si); + stream_min_size=MPC_DECODER_BUFFER_LENGTH*2/si.channels; + stream_rate=si.sample_freq; + stream_channels=si.channels; + + mpc_demux_exit(demux); + demux=NULL; + _close_file(); + } -String AudioStreamMPC::get_file() const { +String AudioStreamPlaybackMPC::get_file() const { return file; } -void AudioStreamMPC::play() { +void AudioStreamPlaybackMPC::play(float p_offset) { - _THREAD_SAFE_METHOD_ - if (active) stop(); active=false; @@ -262,9 +268,9 @@ void AudioStreamMPC::play() { } -void AudioStreamMPC::stop() { +void AudioStreamPlaybackMPC::stop() { + - _THREAD_SAFE_METHOD_ if (!active) return; if (demux) { @@ -275,70 +281,58 @@ void AudioStreamMPC::stop() { active=false; } -bool AudioStreamMPC::is_playing() const { +bool AudioStreamPlaybackMPC::is_playing() const { - return active || (get_total() - get_todo() -1 > 0); + return active; } -void AudioStreamMPC::set_paused(bool p_paused) { - paused=p_paused; -} -bool AudioStreamMPC::is_paused(bool p_paused) const { - - return paused; -} - -void AudioStreamMPC::set_loop(bool p_enable) { +void AudioStreamPlaybackMPC::set_loop(bool p_enable) { loop=p_enable; } -bool AudioStreamMPC::has_loop() const { +bool AudioStreamPlaybackMPC::has_loop() const { return loop; } -float AudioStreamMPC::get_length() const { +float AudioStreamPlaybackMPC::get_length() const { return 0; } -String AudioStreamMPC::get_stream_name() const { +String AudioStreamPlaybackMPC::get_stream_name() const { return ""; } -int AudioStreamMPC::get_loop_count() const { +int AudioStreamPlaybackMPC::get_loop_count() const { return 0; } -float AudioStreamMPC::get_pos() const { +float AudioStreamPlaybackMPC::get_pos() const { return 0; } -void AudioStreamMPC::seek_pos(float p_time) { +void AudioStreamPlaybackMPC::seek_pos(float p_time) { } -AudioStream::UpdateMode AudioStreamMPC::get_update_mode() const { - - return UPDATE_THREAD; -} -void AudioStreamMPC::_bind_methods() { +void AudioStreamPlaybackMPC::_bind_methods() { - ObjectTypeDB::bind_method(_MD("set_file","name"),&AudioStreamMPC::set_file); - ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamMPC::get_file); + ObjectTypeDB::bind_method(_MD("set_file","name"),&AudioStreamPlaybackMPC::set_file); + ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamPlaybackMPC::get_file); ADD_PROPERTYNZ( PropertyInfo(Variant::STRING,"file",PROPERTY_HINT_FILE,"mpc"), _SCS("set_file"), _SCS("get_file")); } -AudioStreamMPC::AudioStreamMPC() { +AudioStreamPlaybackMPC::AudioStreamPlaybackMPC() { - preload=true; + preload=false; f=NULL; streamlen=0; data_ofs=0; @@ -356,7 +350,7 @@ AudioStreamMPC::AudioStreamMPC() { } -AudioStreamMPC::~AudioStreamMPC() { +AudioStreamPlaybackMPC::~AudioStreamPlaybackMPC() { stop(); diff --git a/drivers/mpc/audio_stream_mpc.h b/drivers/mpc/audio_stream_mpc.h index 8fb0ed13de..122d0d0bbb 100644 --- a/drivers/mpc/audio_stream_mpc.h +++ b/drivers/mpc/audio_stream_mpc.h @@ -1,18 +1,17 @@ #ifndef AUDIO_STREAM_MPC_H #define AUDIO_STREAM_MPC_H -#include "scene/resources/audio_stream_resampled.h" +#include "scene/resources/audio_stream.h" #include "os/file_access.h" #include "mpc/mpcdec.h" #include "os/thread_safe.h" #include "io/resource_loader.h" //#include "../libmpcdec/decoder.h" //#include "../libmpcdec/internal.h" -class AudioStreamMPC : public AudioStreamResampled { - OBJ_TYPE( AudioStreamMPC, AudioStreamResampled ); +class AudioStreamPlaybackMPC : public AudioStreamPlayback { - _THREAD_SAFE_CLASS_ + OBJ_TYPE( AudioStreamPlaybackMPC, AudioStreamPlayback ); bool preload; FileAccess *f; @@ -39,7 +38,9 @@ class AudioStreamMPC : public AudioStreamResampled { static mpc_int32_t _mpc_get_size(mpc_reader *p_reader); static mpc_bool_t _mpc_canseek(mpc_reader *p_reader); - virtual bool _can_mix() const ; + int stream_min_size; + int stream_rate; + int stream_channels; protected: Error _open_file(); @@ -59,12 +60,10 @@ public: void set_file(const String& p_file); String get_file() const; - virtual void play(); + virtual void play(float p_offset=0); virtual void stop(); virtual bool is_playing() const; - virtual void set_paused(bool p_paused); - virtual bool is_paused(bool p_paused) const; virtual void set_loop(bool p_enable); virtual bool has_loop() const; @@ -78,13 +77,35 @@ public: virtual float get_pos() const; virtual void seek_pos(float p_time); - virtual UpdateMode get_update_mode() const; - virtual void update(); + virtual int get_channels() const { return stream_channels; } + virtual int get_mix_rate() const { return stream_rate; } - AudioStreamMPC(); - ~AudioStreamMPC(); + virtual int get_minimum_buffer_size() const { return stream_min_size; } + virtual int mix(int16_t* p_bufer,int p_frames); + + virtual void set_loop_restart_time(float p_time) { } + + AudioStreamPlaybackMPC(); + ~AudioStreamPlaybackMPC(); }; +class AudioStreamMPC : public AudioStream { + + OBJ_TYPE( AudioStreamMPC, AudioStream ); + + String file; +public: + + Ref<AudioStreamPlayback> instance_playback() { + Ref<AudioStreamPlaybackMPC> pb = memnew( AudioStreamPlaybackMPC ); + pb->set_file(file); + return pb; + } + + void set_file(const String& p_file) { file=p_file; } + + +}; class ResourceFormatLoaderAudioStreamMPC : public ResourceFormatLoader { public: diff --git a/drivers/nedmalloc/malloc.c.h b/drivers/nedmalloc/malloc.c.h index b9e65637d5..4fec5cc9d4 100644 --- a/drivers/nedmalloc/malloc.c.h +++ b/drivers/nedmalloc/malloc.c.h @@ -1,5814 +1,5814 @@ -#ifdef NEDMALLOC_ENABLED
-/*
- This is a version (aka dlmalloc) of malloc/free/realloc written by
- Doug Lea and released to the public domain, as explained at
- http://creativecommons.org/licenses/publicdomain. Send questions,
- comments, complaints, performance data, etc to dl@cs.oswego.edu
-
-* Version 2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee)
-
- Note: There may be an updated version of this malloc obtainable at
- ftp://gee.cs.oswego.edu/pub/misc/malloc.c
- Check before installing!
-
-* Quickstart
-
- This library is all in one file to simplify the most common usage:
- ftp it, compile it (-O3), and link it into another program. All of
- the compile-time options default to reasonable values for use on
- most platforms. You might later want to step through various
- compile-time and dynamic tuning options.
-
- For convenience, an include file for code using this malloc is at:
- ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h
- You don't really need this .h file unless you call functions not
- defined in your system include files. The .h file contains only the
- excerpts from this file needed for using this malloc on ANSI C/C++
- systems, so long as you haven't changed compile-time options about
- naming and tuning parameters. If you do, then you can create your
- own malloc.h that does include all settings by cutting at the point
- indicated below. Note that you may already by default be using a C
- library containing a malloc that is based on some version of this
- malloc (for example in linux). You might still want to use the one
- in this file to customize settings or to avoid overheads associated
- with library versions.
-
-* Vital statistics:
-
- Supported pointer/size_t representation: 4 or 8 bytes
- size_t MUST be an unsigned type of the same width as
- pointers. (If you are using an ancient system that declares
- size_t as a signed type, or need it to be a different width
- than pointers, you can use a previous release of this malloc
- (e.g. 2.7.2) supporting these.)
-
- Alignment: 8 bytes (default)
- This suffices for nearly all current machines and C compilers.
- However, you can define MALLOC_ALIGNMENT to be wider than this
- if necessary (up to 128bytes), at the expense of using more space.
-
- Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes)
- 8 or 16 bytes (if 8byte sizes)
- Each malloced chunk has a hidden word of overhead holding size
- and status information, and additional cross-check word
- if FOOTERS is defined.
-
- Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead)
- 8-byte ptrs: 32 bytes (including overhead)
-
- Even a request for zero bytes (i.e., malloc(0)) returns a
- pointer to something of the minimum allocatable size.
- The maximum overhead wastage (i.e., number of extra bytes
- allocated than were requested in malloc) is less than or equal
- to the minimum size, except for requests >= mmap_threshold that
- are serviced via mmap(), where the worst case wastage is about
- 32 bytes plus the remainder from a system page (the minimal
- mmap unit); typically 4096 or 8192 bytes.
-
- Security: static-safe; optionally more or less
- The "security" of malloc refers to the ability of malicious
- code to accentuate the effects of errors (for example, freeing
- space that is not currently malloc'ed or overwriting past the
- ends of chunks) in code that calls malloc. This malloc
- guarantees not to modify any memory locations below the base of
- heap, i.e., static variables, even in the presence of usage
- errors. The routines additionally detect most improper frees
- and reallocs. All this holds as long as the static bookkeeping
- for malloc itself is not corrupted by some other means. This
- is only one aspect of security -- these checks do not, and
- cannot, detect all possible programming errors.
-
- If FOOTERS is defined nonzero, then each allocated chunk
- carries an additional check word to verify that it was malloced
- from its space. These check words are the same within each
- execution of a program using malloc, but differ across
- executions, so externally crafted fake chunks cannot be
- freed. This improves security by rejecting frees/reallocs that
- could corrupt heap memory, in addition to the checks preventing
- writes to statics that are always on. This may further improve
- security at the expense of time and space overhead. (Note that
- FOOTERS may also be worth using with MSPACES.)
-
- By default detected errors cause the program to abort (calling
- "abort()"). You can override this to instead proceed past
- errors by defining PROCEED_ON_ERROR. In this case, a bad free
- has no effect, and a malloc that encounters a bad address
- caused by user overwrites will ignore the bad address by
- dropping pointers and indices to all known memory. This may
- be appropriate for programs that should continue if at all
- possible in the face of programming errors, although they may
- run out of memory because dropped memory is never reclaimed.
-
- If you don't like either of these options, you can define
- CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything
- else. And if if you are sure that your program using malloc has
- no errors or vulnerabilities, you can define INSECURE to 1,
- which might (or might not) provide a small performance improvement.
-
- Thread-safety: NOT thread-safe unless USE_LOCKS defined
- When USE_LOCKS is defined, each public call to malloc, free,
- etc is surrounded with either a pthread mutex or a win32
- spinlock (depending on WIN32). This is not especially fast, and
- can be a major bottleneck. It is designed only to provide
- minimal protection in concurrent environments, and to provide a
- basis for extensions. If you are using malloc in a concurrent
- program, consider instead using nedmalloc
- (http://www.nedprod.com/programs/portable/nedmalloc/) or
- ptmalloc (See http://www.malloc.de), which are derived
- from versions of this malloc.
-
- System requirements: Any combination of MORECORE and/or MMAP/MUNMAP
- This malloc can use unix sbrk or any emulation (invoked using
- the CALL_MORECORE macro) and/or mmap/munmap or any emulation
- (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system
- memory. On most unix systems, it tends to work best if both
- MORECORE and MMAP are enabled. On Win32, it uses emulations
- based on VirtualAlloc. It also uses common C library functions
- like memset.
-
- Compliance: I believe it is compliant with the Single Unix Specification
- (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably
- others as well.
-
-* Overview of algorithms
-
- This is not the fastest, most space-conserving, most portable, or
- most tunable malloc ever written. However it is among the fastest
- while also being among the most space-conserving, portable and
- tunable. Consistent balance across these factors results in a good
- general-purpose allocator for malloc-intensive programs.
-
- In most ways, this malloc is a best-fit allocator. Generally, it
- chooses the best-fitting existing chunk for a request, with ties
- broken in approximately least-recently-used order. (This strategy
- normally maintains low fragmentation.) However, for requests less
- than 256bytes, it deviates from best-fit when there is not an
- exactly fitting available chunk by preferring to use space adjacent
- to that used for the previous small request, as well as by breaking
- ties in approximately most-recently-used order. (These enhance
- locality of series of small allocations.) And for very large requests
- (>= 256Kb by default), it relies on system memory mapping
- facilities, if supported. (This helps avoid carrying around and
- possibly fragmenting memory used only for large chunks.)
-
- All operations (except malloc_stats and mallinfo) have execution
- times that are bounded by a constant factor of the number of bits in
- a size_t, not counting any clearing in calloc or copying in realloc,
- or actions surrounding MORECORE and MMAP that have times
- proportional to the number of non-contiguous regions returned by
- system allocation routines, which is often just 1. In real-time
- applications, you can optionally suppress segment traversals using
- NO_SEGMENT_TRAVERSAL, which assures bounded execution even when
- system allocators return non-contiguous spaces, at the typical
- expense of carrying around more memory and increased fragmentation.
-
- The implementation is not very modular and seriously overuses
- macros. Perhaps someday all C compilers will do as good a job
- inlining modular code as can now be done by brute-force expansion,
- but now, enough of them seem not to.
-
- Some compilers issue a lot of warnings about code that is
- dead/unreachable only on some platforms, and also about intentional
- uses of negation on unsigned types. All known cases of each can be
- ignored.
-
- For a longer but out of date high-level description, see
- http://gee.cs.oswego.edu/dl/html/malloc.html
-
-* MSPACES
- If MSPACES is defined, then in addition to malloc, free, etc.,
- this file also defines mspace_malloc, mspace_free, etc. These
- are versions of malloc routines that take an "mspace" argument
- obtained using create_mspace, to control all internal bookkeeping.
- If ONLY_MSPACES is defined, only these versions are compiled.
- So if you would like to use this allocator for only some allocations,
- and your system malloc for others, you can compile with
- ONLY_MSPACES and then do something like...
- static mspace mymspace = create_mspace(0,0); // for example
- #define mymalloc(bytes) mspace_malloc(mymspace, bytes)
-
- (Note: If you only need one instance of an mspace, you can instead
- use "USE_DL_PREFIX" to relabel the global malloc.)
-
- You can similarly create thread-local allocators by storing
- mspaces as thread-locals. For example:
- static __thread mspace tlms = 0;
- void* tlmalloc(size_t bytes) {
- if (tlms == 0) tlms = create_mspace(0, 0);
- return mspace_malloc(tlms, bytes);
- }
- void tlfree(void* mem) { mspace_free(tlms, mem); }
-
- Unless FOOTERS is defined, each mspace is completely independent.
- You cannot allocate from one and free to another (although
- conformance is only weakly checked, so usage errors are not always
- caught). If FOOTERS is defined, then each chunk carries around a tag
- indicating its originating mspace, and frees are directed to their
- originating spaces.
-
- ------------------------- Compile-time options ---------------------------
-
-Be careful in setting #define values for numerical constants of type
-size_t. On some systems, literal values are not automatically extended
-to size_t precision unless they are explicitly casted. You can also
-use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below.
-
-WIN32 default: defined if _WIN32 defined
- Defining WIN32 sets up defaults for MS environment and compilers.
- Otherwise defaults are for unix. Beware that there seem to be some
- cases where this malloc might not be a pure drop-in replacement for
- Win32 malloc: Random-looking failures from Win32 GDI API's (eg;
- SetDIBits()) may be due to bugs in some video driver implementations
- when pixel buffers are malloc()ed, and the region spans more than
- one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb)
- default granularity, pixel buffers may straddle virtual allocation
- regions more often than when using the Microsoft allocator. You can
- avoid this by using VirtualAlloc() and VirtualFree() for all pixel
- buffers rather than using malloc(). If this is not possible,
- recompile this malloc with a larger DEFAULT_GRANULARITY.
-
-MALLOC_ALIGNMENT default: (size_t)8
- Controls the minimum alignment for malloc'ed chunks. It must be a
- power of two and at least 8, even on machines for which smaller
- alignments would suffice. It may be defined as larger than this
- though. Note however that code and data structures are optimized for
- the case of 8-byte alignment.
-
-MSPACES default: 0 (false)
- If true, compile in support for independent allocation spaces.
- This is only supported if HAVE_MMAP is true.
-
-ONLY_MSPACES default: 0 (false)
- If true, only compile in mspace versions, not regular versions.
-
-USE_LOCKS default: 0 (false)
- Causes each call to each public routine to be surrounded with
- pthread or WIN32 mutex lock/unlock. (If set true, this can be
- overridden on a per-mspace basis for mspace versions.) If set to a
- non-zero value other than 1, locks are used, but their
- implementation is left out, so lock functions must be supplied manually,
- as described below.
-
-USE_SPIN_LOCKS default: 1 iff USE_LOCKS and on x86 using gcc or MSC
- If true, uses custom spin locks for locking. This is currently
- supported only for x86 platforms using gcc or recent MS compilers.
- Otherwise, posix locks or win32 critical sections are used.
-
-FOOTERS default: 0
- If true, provide extra checking and dispatching by placing
- information in the footers of allocated chunks. This adds
- space and time overhead.
-
-INSECURE default: 0
- If true, omit checks for usage errors and heap space overwrites.
-
-USE_DL_PREFIX default: NOT defined
- Causes compiler to prefix all public routines with the string 'dl'.
- This can be useful when you only want to use this malloc in one part
- of a program, using your regular system malloc elsewhere.
-
-ABORT default: defined as abort()
- Defines how to abort on failed checks. On most systems, a failed
- check cannot die with an "assert" or even print an informative
- message, because the underlying print routines in turn call malloc,
- which will fail again. Generally, the best policy is to simply call
- abort(). It's not very useful to do more than this because many
- errors due to overwriting will show up as address faults (null, odd
- addresses etc) rather than malloc-triggered checks, so will also
- abort. Also, most compilers know that abort() does not return, so
- can better optimize code conditionally calling it.
-
-PROCEED_ON_ERROR default: defined as 0 (false)
- Controls whether detected bad addresses cause them to bypassed
- rather than aborting. If set, detected bad arguments to free and
- realloc are ignored. And all bookkeeping information is zeroed out
- upon a detected overwrite of freed heap space, thus losing the
- ability to ever return it from malloc again, but enabling the
- application to proceed. If PROCEED_ON_ERROR is defined, the
- static variable malloc_corruption_error_count is compiled in
- and can be examined to see if errors have occurred. This option
- generates slower code than the default abort policy.
-
-DEBUG default: NOT defined
- The DEBUG setting is mainly intended for people trying to modify
- this code or diagnose problems when porting to new platforms.
- However, it may also be able to better isolate user errors than just
- using runtime checks. The assertions in the check routines spell
- out in more detail the assumptions and invariants underlying the
- algorithms. The checking is fairly extensive, and will slow down
- execution noticeably. Calling malloc_stats or mallinfo with DEBUG
- set will attempt to check every non-mmapped allocated and free chunk
- in the course of computing the summaries.
-
-ABORT_ON_ASSERT_FAILURE default: defined as 1 (true)
- Debugging assertion failures can be nearly impossible if your
- version of the assert macro causes malloc to be called, which will
- lead to a cascade of further failures, blowing the runtime stack.
- ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(),
- which will usually make debugging easier.
-
-MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32
- The action to take before "return 0" when malloc fails to be able to
- return memory because there is none available.
-
-HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES
- True if this system supports sbrk or an emulation of it.
-
-MORECORE default: sbrk
- The name of the sbrk-style system routine to call to obtain more
- memory. See below for guidance on writing custom MORECORE
- functions. The type of the argument to sbrk/MORECORE varies across
- systems. It cannot be size_t, because it supports negative
- arguments, so it is normally the signed type of the same width as
- size_t (sometimes declared as "intptr_t"). It doesn't much matter
- though. Internally, we only call it with arguments less than half
- the max value of a size_t, which should work across all reasonable
- possibilities, although sometimes generating compiler warnings.
-
-MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE
- If true, take advantage of fact that consecutive calls to MORECORE
- with positive arguments always return contiguous increasing
- addresses. This is true of unix sbrk. It does not hurt too much to
- set it true anyway, since malloc copes with non-contiguities.
- Setting it false when definitely non-contiguous saves time
- and possibly wasted space it would take to discover this though.
-
-MORECORE_CANNOT_TRIM default: NOT defined
- True if MORECORE cannot release space back to the system when given
- negative arguments. This is generally necessary only if you are
- using a hand-crafted MORECORE function that cannot handle negative
- arguments.
-
-NO_SEGMENT_TRAVERSAL default: 0
- If non-zero, suppresses traversals of memory segments
- returned by either MORECORE or CALL_MMAP. This disables
- merging of segments that are contiguous, and selectively
- releasing them to the OS if unused, but bounds execution times.
-
-HAVE_MMAP default: 1 (true)
- True if this system supports mmap or an emulation of it. If so, and
- HAVE_MORECORE is not true, MMAP is used for all system
- allocation. If set and HAVE_MORECORE is true as well, MMAP is
- primarily used to directly allocate very large blocks. It is also
- used as a backup strategy in cases where MORECORE fails to provide
- space from system. Note: A single call to MUNMAP is assumed to be
- able to unmap memory that may have be allocated using multiple calls
- to MMAP, so long as they are adjacent.
-
-HAVE_MREMAP default: 1 on linux, else 0
- If true realloc() uses mremap() to re-allocate large blocks and
- extend or shrink allocation spaces.
-
-MMAP_CLEARS default: 1 except on WINCE.
- True if mmap clears memory so calloc doesn't need to. This is true
- for standard unix mmap using /dev/zero and on WIN32 except for WINCE.
-
-USE_BUILTIN_FFS default: 0 (i.e., not used)
- Causes malloc to use the builtin ffs() function to compute indices.
- Some compilers may recognize and intrinsify ffs to be faster than the
- supplied C version. Also, the case of x86 using gcc is special-cased
- to an asm instruction, so is already as fast as it can be, and so
- this setting has no effect. Similarly for Win32 under recent MS compilers.
- (On most x86s, the asm version is only slightly faster than the C version.)
-
-malloc_getpagesize default: derive from system includes, or 4096.
- The system page size. To the extent possible, this malloc manages
- memory from the system in page-size units. This may be (and
- usually is) a function rather than a constant. This is ignored
- if WIN32, where page size is determined using getSystemInfo during
- initialization. This may be several megabytes if ENABLE_LARGE_PAGES
- is enabled.
-
-ENABLE_LARGE_PAGES default: NOT defined
- Causes the system page size to be the value of GetLargePageMinimum()
- if that function is available (Windows Server 2003/Vista or later).
- This allows the use of large page entries in the MMU which can
- significantly improve performance in large working set applications
- as TLB cache load is reduced by a factor of three. Note that enabling
- this option is equal to locking the process' memory in current
- implementations of Windows and requires the SE_LOCK_MEMORY_PRIVILEGE
- to be held by the process in order to succeed.
-
-USE_DEV_RANDOM default: 0 (i.e., not used)
- Causes malloc to use /dev/random to initialize secure magic seed for
- stamping footers. Otherwise, the current time is used.
-
-NO_MALLINFO default: 0
- If defined, don't compile "mallinfo". This can be a simple way
- of dealing with mismatches between system declarations and
- those in this file.
-
-MALLINFO_FIELD_TYPE default: size_t
- The type of the fields in the mallinfo struct. This was originally
- defined as "int" in SVID etc, but is more usefully defined as
- size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set
-
-REALLOC_ZERO_BYTES_FREES default: not defined
- This should be set if a call to realloc with zero bytes should
- be the same as a call to free. Some people think it should. Otherwise,
- since this malloc returns a unique pointer for malloc(0), so does
- realloc(p, 0).
-
-LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H
-LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H
-LACKS_STDLIB_H default: NOT defined unless on WIN32
- Define these if your system does not have these header files.
- You might need to manually insert some of the declarations they provide.
-
-DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS,
- system_info.dwAllocationGranularity in WIN32,
- GetLargePageMinimum() if ENABLE_LARGE_PAGES,
- otherwise 64K.
- Also settable using mallopt(M_GRANULARITY, x)
- The unit for allocating and deallocating memory from the system. On
- most systems with contiguous MORECORE, there is no reason to
- make this more than a page. However, systems with MMAP tend to
- either require or encourage larger granularities. You can increase
- this value to prevent system allocation functions to be called so
- often, especially if they are slow. The value must be at least one
- page and must be a power of two. Setting to 0 causes initialization
- to either page size or win32 region size. (Note: In previous
- versions of malloc, the equivalent of this option was called
- "TOP_PAD")
-
-DEFAULT_GRANULARITY_ALIGNED default: undefined (which means page size)
- Whether to enforce alignment when allocating and deallocating memory
- from the system i.e. the base address of all allocations will be
- aligned to DEFAULT_GRANULARITY if it is set. Note that enabling this carries
- some overhead as multiple calls must now be made when probing for a valid
- aligned value, however it does greatly ease the checking for whether
- a given memory pointer was allocated by this allocator rather than
- some other.
-
-DEFAULT_TRIM_THRESHOLD default: 2MB
- Also settable using mallopt(M_TRIM_THRESHOLD, x)
- The maximum amount of unused top-most memory to keep before
- releasing via malloc_trim in free(). Automatic trimming is mainly
- useful in long-lived programs using contiguous MORECORE. Because
- trimming via sbrk can be slow on some systems, and can sometimes be
- wasteful (in cases where programs immediately afterward allocate
- more large chunks) the value should be high enough so that your
- overall system performance would improve by releasing this much
- memory. As a rough guide, you might set to a value close to the
- average size of a process (program) running on your system.
- Releasing this much memory would allow such a process to run in
- memory. Generally, it is worth tuning trim thresholds when a
- program undergoes phases where several large chunks are allocated
- and released in ways that can reuse each other's storage, perhaps
- mixed with phases where there are no such chunks at all. The trim
- value must be greater than page size to have any useful effect. To
- disable trimming completely, you can set to MAX_SIZE_T. Note that the trick
- some people use of mallocing a huge space and then freeing it at
- program startup, in an attempt to reserve system memory, doesn't
- have the intended effect under automatic trimming, since that memory
- will immediately be returned to the system.
-
-DEFAULT_MMAP_THRESHOLD default: 256K
- Also settable using mallopt(M_MMAP_THRESHOLD, x)
- The request size threshold for using MMAP to directly service a
- request. Requests of at least this size that cannot be allocated
- using already-existing space will be serviced via mmap. (If enough
- normal freed space already exists it is used instead.) Using mmap
- segregates relatively large chunks of memory so that they can be
- individually obtained and released from the host system. A request
- serviced through mmap is never reused by any other request (at least
- not directly; the system may just so happen to remap successive
- requests to the same locations). Segregating space in this way has
- the benefits that: Mmapped space can always be individually released
- back to the system, which helps keep the system level memory demands
- of a long-lived program low. Also, mapped memory doesn't become
- `locked' between other chunks, as can happen with normally allocated
- chunks, which means that even trimming via malloc_trim would not
- release them. However, it has the disadvantage that the space
- cannot be reclaimed, consolidated, and then used to service later
- requests, as happens with normal chunks. The advantages of mmap
- nearly always outweigh disadvantages for "large" chunks, but the
- value of "large" may vary across systems. The default is an
- empirically derived value that works well in most systems. You can
- disable mmap by setting to MAX_SIZE_T.
-
-MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP
- The number of consolidated frees between checks to release
- unused segments when freeing. When using non-contiguous segments,
- especially with multiple mspaces, checking only for topmost space
- doesn't always suffice to trigger trimming. To compensate for this,
- free() will, with a period of MAX_RELEASE_CHECK_RATE (or the
- current number of segments, if greater) try to release unused
- segments to the OS when freeing chunks that result in
- consolidation. The best value for this parameter is a compromise
- between slowing down frees with relatively costly checks that
- rarely trigger versus holding on to unused memory. To effectively
- disable, set to MAX_SIZE_T. This may lead to a very slight speed
- improvement at the expense of carrying around more memory.
-*/
-
-/* Version identifier to allow people to support multiple versions */
-#ifndef DLMALLOC_VERSION
-#define DLMALLOC_VERSION 20804
-#endif /* DLMALLOC_VERSION */
-
-#ifndef WIN32
-#ifdef _WIN32
-#define WIN32 1
-#endif /* _WIN32 */
-#ifdef _WIN32_WCE
-#define LACKS_FCNTL_H
-#define WIN32 1
-#endif /* _WIN32_WCE */
-#endif /* WIN32 */
-#ifdef WIN32
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <tchar.h>
-#define HAVE_MMAP 1
-#define HAVE_MORECORE 0
-#define LACKS_UNISTD_H
-#define LACKS_SYS_PARAM_H
-#define LACKS_SYS_MMAN_H
-#define LACKS_STRING_H
-#define LACKS_STRINGS_H
-#define LACKS_SYS_TYPES_H
-#define LACKS_ERRNO_H
-#ifndef MALLOC_FAILURE_ACTION
-#define MALLOC_FAILURE_ACTION
-#endif /* MALLOC_FAILURE_ACTION */
-#ifdef _WIN32_WCE /* WINCE reportedly does not clear */
-#define MMAP_CLEARS 0
-#else
-#define MMAP_CLEARS 1
-#endif /* _WIN32_WCE */
-#endif /* WIN32 */
-
-#if defined(DARWIN) || defined(_DARWIN)
-/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
-#ifndef HAVE_MORECORE
-#define HAVE_MORECORE 0
-#define HAVE_MMAP 1
-/* OSX allocators provide 16 byte alignment */
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT ((size_t)16U)
-#endif
-#endif /* HAVE_MORECORE */
-#endif /* DARWIN */
-
-#ifndef LACKS_SYS_TYPES_H
-#include <sys/types.h> /* For size_t */
-#endif /* LACKS_SYS_TYPES_H */
-
-#if (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310)
-#define SPIN_LOCKS_AVAILABLE 1
-#else
-#define SPIN_LOCKS_AVAILABLE 0
-#endif
-
-/* The maximum possible size_t value has all bits set */
-#define MAX_SIZE_T (~(size_t)0)
-
-#ifndef ONLY_MSPACES
-#define ONLY_MSPACES 0 /* define to a value */
-#else
-#define ONLY_MSPACES 1
-#endif /* ONLY_MSPACES */
-#ifndef MSPACES
-#if ONLY_MSPACES
-#define MSPACES 1
-#else /* ONLY_MSPACES */
-#define MSPACES 0
-#endif /* ONLY_MSPACES */
-#endif /* MSPACES */
-#ifndef MALLOC_ALIGNMENT
-#define MALLOC_ALIGNMENT ((size_t)8U)
-#endif /* MALLOC_ALIGNMENT */
-#ifndef FOOTERS
-#define FOOTERS 0
-#endif /* FOOTERS */
-#ifndef ABORT
-#define ABORT abort()
-#endif /* ABORT */
-#ifndef ABORT_ON_ASSERT_FAILURE
-#define ABORT_ON_ASSERT_FAILURE 1
-#endif /* ABORT_ON_ASSERT_FAILURE */
-#ifndef PROCEED_ON_ERROR
-#define PROCEED_ON_ERROR 0
-#endif /* PROCEED_ON_ERROR */
-#ifndef USE_LOCKS
-#define USE_LOCKS 0
-#endif /* USE_LOCKS */
-#ifndef USE_SPIN_LOCKS
-#if USE_LOCKS && SPIN_LOCKS_AVAILABLE
-#define USE_SPIN_LOCKS 1
-#else
-#define USE_SPIN_LOCKS 0
-#endif /* USE_LOCKS && SPIN_LOCKS_AVAILABLE. */
-#endif /* USE_SPIN_LOCKS */
-#ifndef INSECURE
-#define INSECURE 0
-#endif /* INSECURE */
-#ifndef HAVE_MMAP
-#define HAVE_MMAP 1
-#endif /* HAVE_MMAP */
-#ifndef MMAP_CLEARS
-#define MMAP_CLEARS 1
-#endif /* MMAP_CLEARS */
-#ifndef HAVE_MREMAP
-#ifdef linux
-#define HAVE_MREMAP 1
-#else /* linux */
-#define HAVE_MREMAP 0
-#endif /* linux */
-#endif /* HAVE_MREMAP */
-#ifndef MALLOC_FAILURE_ACTION
-#define MALLOC_FAILURE_ACTION errno = ENOMEM;
-#endif /* MALLOC_FAILURE_ACTION */
-#ifndef HAVE_MORECORE
-#if ONLY_MSPACES
-#define HAVE_MORECORE 0
-#else /* ONLY_MSPACES */
-#define HAVE_MORECORE 1
-#endif /* ONLY_MSPACES */
-#endif /* HAVE_MORECORE */
-#if !HAVE_MORECORE
-#define MORECORE_CONTIGUOUS 0
-#else /* !HAVE_MORECORE */
-#define MORECORE_DEFAULT sbrk
-#ifndef MORECORE_CONTIGUOUS
-#define MORECORE_CONTIGUOUS 1
-#endif /* MORECORE_CONTIGUOUS */
-#endif /* HAVE_MORECORE */
-#ifndef DEFAULT_GRANULARITY
-#if (MORECORE_CONTIGUOUS || defined(WIN32))
-#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */
-#else /* MORECORE_CONTIGUOUS */
-#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
-#endif /* MORECORE_CONTIGUOUS */
-#endif /* DEFAULT_GRANULARITY */
-#ifndef DEFAULT_TRIM_THRESHOLD
-#ifndef MORECORE_CANNOT_TRIM
-#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
-#else /* MORECORE_CANNOT_TRIM */
-#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
-#endif /* MORECORE_CANNOT_TRIM */
-#endif /* DEFAULT_TRIM_THRESHOLD */
-#ifndef DEFAULT_MMAP_THRESHOLD
-#if HAVE_MMAP
-#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
-#else /* HAVE_MMAP */
-#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
-#endif /* HAVE_MMAP */
-#endif /* DEFAULT_MMAP_THRESHOLD */
-#ifndef MAX_RELEASE_CHECK_RATE
-#if HAVE_MMAP
-#define MAX_RELEASE_CHECK_RATE 4095
-#else
-#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
-#endif /* HAVE_MMAP */
-#endif /* MAX_RELEASE_CHECK_RATE */
-#ifndef USE_BUILTIN_FFS
-#define USE_BUILTIN_FFS 0
-#endif /* USE_BUILTIN_FFS */
-#ifndef USE_DEV_RANDOM
-#define USE_DEV_RANDOM 0
-#endif /* USE_DEV_RANDOM */
-#ifndef NO_MALLINFO
-#define NO_MALLINFO 0
-#endif /* NO_MALLINFO */
-#ifndef MALLINFO_FIELD_TYPE
-#define MALLINFO_FIELD_TYPE size_t
-#endif /* MALLINFO_FIELD_TYPE */
-#ifndef NO_SEGMENT_TRAVERSAL
-#define NO_SEGMENT_TRAVERSAL 0
-#endif /* NO_SEGMENT_TRAVERSAL */
-
-/*
- mallopt tuning options. SVID/XPG defines four standard parameter
- numbers for mallopt, normally defined in malloc.h. None of these
- are used in this malloc, so setting them has no effect. But this
- malloc does support the following options.
-*/
-
-#define M_TRIM_THRESHOLD (-1)
-#define M_GRANULARITY (-2)
-#define M_MMAP_THRESHOLD (-3)
-
-/* ------------------------ Mallinfo declarations ------------------------ */
-
-#if !NO_MALLINFO
-/*
- This version of malloc supports the standard SVID/XPG mallinfo
- routine that returns a struct containing usage properties and
- statistics. It should work on any system that has a
- /usr/include/malloc.h defining struct mallinfo. The main
- declaration needed is the mallinfo struct that is returned (by-copy)
- by mallinfo(). The malloinfo struct contains a bunch of fields that
- are not even meaningful in this version of malloc. These fields are
- are instead filled by mallinfo() with other numbers that might be of
- interest.
-
- HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
- /usr/include/malloc.h file that includes a declaration of struct
- mallinfo. If so, it is included; else a compliant version is
- declared below. These must be precisely the same for mallinfo() to
- work. The original SVID version of this struct, defined on most
- systems with mallinfo, declares all fields as ints. But some others
- define as unsigned long. If your system defines the fields using a
- type of different width than listed here, you MUST #include your
- system version and #define HAVE_USR_INCLUDE_MALLOC_H.
-*/
-
-/* #define HAVE_USR_INCLUDE_MALLOC_H */
-
-#ifdef HAVE_USR_INCLUDE_MALLOC_H
-#include "/usr/include/malloc.h"
-#else /* HAVE_USR_INCLUDE_MALLOC_H */
-#ifndef STRUCT_MALLINFO_DECLARED
-#define STRUCT_MALLINFO_DECLARED 1
-struct mallinfo {
- MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */
- MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */
- MALLINFO_FIELD_TYPE smblks; /* always 0 */
- MALLINFO_FIELD_TYPE hblks; /* always 0 */
- MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */
- MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */
- MALLINFO_FIELD_TYPE fsmblks; /* always 0 */
- MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
- MALLINFO_FIELD_TYPE fordblks; /* total free space */
- MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
-};
-#endif /* STRUCT_MALLINFO_DECLARED */
-#endif /* HAVE_USR_INCLUDE_MALLOC_H */
-#endif /* NO_MALLINFO */
-
-/*
- Try to persuade compilers to inline. The most critical functions for
- inlining are defined as macros, so these aren't used for them.
-*/
-
-#ifndef FORCEINLINE
- #if defined(__GNUC__)
-#define FORCEINLINE __inline __attribute__ ((always_inline))
- #elif defined(_MSC_VER)
- #define FORCEINLINE __forceinline
- #endif
-#endif
-#ifndef NOINLINE
- #if defined(__GNUC__)
- #define NOINLINE __attribute__ ((noinline))
- #elif defined(_MSC_VER)
- #define NOINLINE __declspec(noinline)
- #else
- #define NOINLINE
- #endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#ifndef FORCEINLINE
- #define FORCEINLINE inline
-#endif
-#endif /* __cplusplus */
-#ifndef FORCEINLINE
- #define FORCEINLINE
-#endif
-
-#if !ONLY_MSPACES
-
-/* ------------------- Declarations of public routines ------------------- */
-
-#ifndef USE_DL_PREFIX
-#define dlcalloc calloc
-#define dlfree free
-#define dlmalloc malloc
-#define dlmemalign memalign
-#define dlrealloc realloc
-#define dlvalloc valloc
-#define dlpvalloc pvalloc
-#define dlmallinfo mallinfo
-#define dlmallopt mallopt
-#define dlmalloc_trim malloc_trim
-#define dlmalloc_stats malloc_stats
-#define dlmalloc_usable_size malloc_usable_size
-#define dlmalloc_footprint malloc_footprint
-#define dlmalloc_max_footprint malloc_max_footprint
-#define dlindependent_calloc independent_calloc
-#define dlindependent_comalloc independent_comalloc
-#endif /* USE_DL_PREFIX */
-
-
-/*
- malloc(size_t n)
- Returns a pointer to a newly allocated chunk of at least n bytes, or
- null if no space is available, in which case errno is set to ENOMEM
- on ANSI C systems.
-
- If n is zero, malloc returns a minimum-sized chunk. (The minimum
- size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
- systems.) Note that size_t is an unsigned type, so calls with
- arguments that would be negative if signed are interpreted as
- requests for huge amounts of space, which will often fail. The
- maximum supported value of n differs across systems, but is in all
- cases less than the maximum representable value of a size_t.
-*/
-void* dlmalloc(size_t);
-
-/*
- free(void* p)
- Releases the chunk of memory pointed to by p, that had been previously
- allocated using malloc or a related routine such as realloc.
- It has no effect if p is null. If p was not malloced or already
- freed, free(p) will by default cause the current program to abort.
-*/
-void dlfree(void*);
-
-/*
- calloc(size_t n_elements, size_t element_size);
- Returns a pointer to n_elements * element_size bytes, with all locations
- set to zero.
-*/
-void* dlcalloc(size_t, size_t);
-
-/*
- realloc(void* p, size_t n)
- Returns a pointer to a chunk of size n that contains the same data
- as does chunk p up to the minimum of (n, p's size) bytes, or null
- if no space is available.
-
- The returned pointer may or may not be the same as p. The algorithm
- prefers extending p in most cases when possible, otherwise it
- employs the equivalent of a malloc-copy-free sequence.
-
- If p is null, realloc is equivalent to malloc.
-
- If space is not available, realloc returns null, errno is set (if on
- ANSI) and p is NOT freed.
-
- if n is for fewer bytes than already held by p, the newly unused
- space is lopped off and freed if possible. realloc with a size
- argument of zero (re)allocates a minimum-sized chunk.
-
- The old unix realloc convention of allowing the last-free'd chunk
- to be used as an argument to realloc is not supported.
-*/
-
-void* dlrealloc(void*, size_t);
-
-/*
- memalign(size_t alignment, size_t n);
- Returns a pointer to a newly allocated chunk of n bytes, aligned
- in accord with the alignment argument.
-
- The alignment argument should be a power of two. If the argument is
- not a power of two, the nearest greater power is used.
- 8-byte alignment is guaranteed by normal malloc calls, so don't
- bother calling memalign with an argument of 8 or less.
-
- Overreliance on memalign is a sure way to fragment space.
-*/
-void* dlmemalign(size_t, size_t);
-
-/*
- valloc(size_t n);
- Equivalent to memalign(pagesize, n), where pagesize is the page
- size of the system. If the pagesize is unknown, 4096 is used.
-*/
-void* dlvalloc(size_t);
-
-/*
- mallopt(int parameter_number, int parameter_value)
- Sets tunable parameters The format is to provide a
- (parameter-number, parameter-value) pair. mallopt then sets the
- corresponding parameter to the argument value if it can (i.e., so
- long as the value is meaningful), and returns 1 if successful else
- 0. To workaround the fact that mallopt is specified to use int,
- not size_t parameters, the value -1 is specially treated as the
- maximum unsigned size_t value.
-
- SVID/XPG/ANSI defines four standard param numbers for mallopt,
- normally defined in malloc.h. None of these are use in this malloc,
- so setting them has no effect. But this malloc also supports other
- options in mallopt. See below for details. Briefly, supported
- parameters are as follows (listed defaults are for "typical"
- configurations).
-
- Symbol param # default allowed param values
- M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables)
- M_GRANULARITY -2 page size any power of 2 >= page size
- M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support)
-*/
-int dlmallopt(int, int);
-
-/*
- malloc_footprint();
- Returns the number of bytes obtained from the system. The total
- number of bytes allocated by malloc, realloc etc., is less than this
- value. Unlike mallinfo, this function returns only a precomputed
- result, so can be called frequently to monitor memory consumption.
- Even if locks are otherwise defined, this function does not use them,
- so results might not be up to date.
-*/
-size_t dlmalloc_footprint(void);
-
-/*
- malloc_max_footprint();
- Returns the maximum number of bytes obtained from the system. This
- value will be greater than current footprint if deallocated space
- has been reclaimed by the system. The peak number of bytes allocated
- by malloc, realloc etc., is less than this value. Unlike mallinfo,
- this function returns only a precomputed result, so can be called
- frequently to monitor memory consumption. Even if locks are
- otherwise defined, this function does not use them, so results might
- not be up to date.
-*/
-size_t dlmalloc_max_footprint(void);
-
-#if !NO_MALLINFO
-/*
- mallinfo()
- Returns (by copy) a struct containing various summary statistics:
-
- arena: current total non-mmapped bytes allocated from system
- ordblks: the number of free chunks
- smblks: always zero.
- hblks: current number of mmapped regions
- hblkhd: total bytes held in mmapped regions
- usmblks: the maximum total allocated space. This will be greater
- than current total if trimming has occurred.
- fsmblks: always zero
- uordblks: current total allocated space (normal or mmapped)
- fordblks: total free space
- keepcost: the maximum number of bytes that could ideally be released
- back to system via malloc_trim. ("ideally" means that
- it ignores page restrictions etc.)
-
- Because these fields are ints, but internal bookkeeping may
- be kept as longs, the reported values may wrap around zero and
- thus be inaccurate.
-*/
-struct mallinfo dlmallinfo(void);
-#endif /* NO_MALLINFO */
-
-/*
- independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
-
- independent_calloc is similar to calloc, but instead of returning a
- single cleared space, it returns an array of pointers to n_elements
- independent elements that can hold contents of size elem_size, each
- of which starts out cleared, and can be independently freed,
- realloc'ed etc. The elements are guaranteed to be adjacently
- allocated (this is not guaranteed to occur with multiple callocs or
- mallocs), which may also improve cache locality in some
- applications.
-
- The "chunks" argument is optional (i.e., may be null, which is
- probably the most typical usage). If it is null, the returned array
- is itself dynamically allocated and should also be freed when it is
- no longer needed. Otherwise, the chunks array must be of at least
- n_elements in length. It is filled in with the pointers to the
- chunks.
-
- In either case, independent_calloc returns this pointer array, or
- null if the allocation failed. If n_elements is zero and "chunks"
- is null, it returns a chunk representing an array with zero elements
- (which should be freed if not wanted).
-
- Each element must be individually freed when it is no longer
- needed. If you'd like to instead be able to free all at once, you
- should instead use regular calloc and assign pointers into this
- space to represent elements. (In this case though, you cannot
- independently free elements.)
-
- independent_calloc simplifies and speeds up implementations of many
- kinds of pools. It may also be useful when constructing large data
- structures that initially have a fixed number of fixed-sized nodes,
- but the number is not known at compile time, and some of the nodes
- may later need to be freed. For example:
-
- struct Node { int item; struct Node* next; };
-
- struct Node* build_list() {
- struct Node** pool;
- int n = read_number_of_nodes_needed();
- if (n <= 0) return 0;
- pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
- if (pool == 0) die();
- // organize into a linked list...
- struct Node* first = pool[0];
- for (i = 0; i < n-1; ++i)
- pool[i]->next = pool[i+1];
- free(pool); // Can now free the array (or not, if it is needed later)
- return first;
- }
-*/
-void** dlindependent_calloc(size_t, size_t, void**);
-
-/*
- independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
-
- independent_comalloc allocates, all at once, a set of n_elements
- chunks with sizes indicated in the "sizes" array. It returns
- an array of pointers to these elements, each of which can be
- independently freed, realloc'ed etc. The elements are guaranteed to
- be adjacently allocated (this is not guaranteed to occur with
- multiple callocs or mallocs), which may also improve cache locality
- in some applications.
-
- The "chunks" argument is optional (i.e., may be null). If it is null
- the returned array is itself dynamically allocated and should also
- be freed when it is no longer needed. Otherwise, the chunks array
- must be of at least n_elements in length. It is filled in with the
- pointers to the chunks.
-
- In either case, independent_comalloc returns this pointer array, or
- null if the allocation failed. If n_elements is zero and chunks is
- null, it returns a chunk representing an array with zero elements
- (which should be freed if not wanted).
-
- Each element must be individually freed when it is no longer
- needed. If you'd like to instead be able to free all at once, you
- should instead use a single regular malloc, and assign pointers at
- particular offsets in the aggregate space. (In this case though, you
- cannot independently free elements.)
-
- independent_comallac differs from independent_calloc in that each
- element may have a different size, and also that it does not
- automatically clear elements.
-
- independent_comalloc can be used to speed up allocation in cases
- where several structs or objects must always be allocated at the
- same time. For example:
-
- struct Head { ... }
- struct Foot { ... }
-
- void send_message(char* msg) {
- int msglen = strlen(msg);
- size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
- void* chunks[3];
- if (independent_comalloc(3, sizes, chunks) == 0)
- die();
- struct Head* head = (struct Head*)(chunks[0]);
- char* body = (char*)(chunks[1]);
- struct Foot* foot = (struct Foot*)(chunks[2]);
- // ...
- }
-
- In general though, independent_comalloc is worth using only for
- larger values of n_elements. For small values, you probably won't
- detect enough difference from series of malloc calls to bother.
-
- Overuse of independent_comalloc can increase overall memory usage,
- since it cannot reuse existing noncontiguous small chunks that
- might be available for some of the elements.
-*/
-void** dlindependent_comalloc(size_t, size_t*, void**);
-
-
-/*
- pvalloc(size_t n);
- Equivalent to valloc(minimum-page-that-holds(n)), that is,
- round up n to nearest pagesize.
- */
-void* dlpvalloc(size_t);
-
-/*
- malloc_trim(size_t pad);
-
- If possible, gives memory back to the system (via negative arguments
- to sbrk) if there is unused memory at the `high' end of the malloc
- pool or in unused MMAP segments. You can call this after freeing
- large blocks of memory to potentially reduce the system-level memory
- requirements of a program. However, it cannot guarantee to reduce
- memory. Under some allocation patterns, some large free blocks of
- memory will be locked between two used chunks, so they cannot be
- given back to the system.
-
- The `pad' argument to malloc_trim represents the amount of free
- trailing space to leave untrimmed. If this argument is zero, only
- the minimum amount of memory to maintain internal data structures
- will be left. Non-zero arguments can be supplied to maintain enough
- trailing space to service future expected allocations without having
- to re-obtain memory from the system.
-
- Malloc_trim returns 1 if it actually released any memory, else 0.
-*/
-int dlmalloc_trim(size_t);
-
-/*
- malloc_stats();
- Prints on stderr the amount of space obtained from the system (both
- via sbrk and mmap), the maximum amount (which may be more than
- current if malloc_trim and/or munmap got called), and the current
- number of bytes allocated via malloc (or realloc, etc) but not yet
- freed. Note that this is the number of bytes allocated, not the
- number requested. It will be larger than the number requested
- because of alignment and bookkeeping overhead. Because it includes
- alignment wastage as being in use, this figure may be greater than
- zero even when no user-level chunks are allocated.
-
- The reported current and maximum system memory can be inaccurate if
- a program makes other calls to system memory allocation functions
- (normally sbrk) outside of malloc.
-
- malloc_stats prints only the most commonly interesting statistics.
- More information can be obtained by calling mallinfo.
-*/
-void dlmalloc_stats(void);
-
-#endif /* ONLY_MSPACES */
-
-/*
- malloc_usable_size(void* p);
-
- Returns the number of bytes you can actually use in
- an allocated chunk, which may be more than you requested (although
- often not) due to alignment and minimum size constraints.
- You can use this many bytes without worrying about
- overwriting other allocated objects. This is not a particularly great
- programming practice. malloc_usable_size can be more useful in
- debugging and assertions, for example:
-
- p = malloc(n);
- assert(malloc_usable_size(p) >= 256);
-*/
-size_t dlmalloc_usable_size(void*);
-
-
-#if MSPACES
-
-/*
- mspace is an opaque type representing an independent
- region of space that supports mspace_malloc, etc.
-*/
-typedef void* mspace;
-
-/*
- create_mspace creates and returns a new independent space with the
- given initial capacity, or, if 0, the default granularity size. It
- returns null if there is no system memory available to create the
- space. If argument locked is non-zero, the space uses a separate
- lock to control access. The capacity of the space will grow
- dynamically as needed to service mspace_malloc requests. You can
- control the sizes of incremental increases of this space by
- compiling with a different DEFAULT_GRANULARITY or dynamically
- setting with mallopt(M_GRANULARITY, value).
-*/
-mspace create_mspace(size_t capacity, int locked);
-
-/*
- destroy_mspace destroys the given space, and attempts to return all
- of its memory back to the system, returning the total number of
- bytes freed. After destruction, the results of access to all memory
- used by the space become undefined.
-*/
-size_t destroy_mspace(mspace msp);
-
-/*
- create_mspace_with_base uses the memory supplied as the initial base
- of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
- space is used for bookkeeping, so the capacity must be at least this
- large. (Otherwise 0 is returned.) When this initial space is
- exhausted, additional memory will be obtained from the system.
- Destroying this space will deallocate all additionally allocated
- space (if possible) but not the initial base.
-*/
-mspace create_mspace_with_base(void* base, size_t capacity, int locked);
-
-/*
- mspace_track_large_chunks controls whether requests for large chunks
- are allocated in their own untracked mmapped regions, separate from
- others in this mspace. By default large chunks are not tracked,
- which reduces fragmentation. However, such chunks are not
- necessarily released to the system upon destroy_mspace. Enabling
- tracking by setting to true may increase fragmentation, but avoids
- leakage when relying on destroy_mspace to release all memory
- allocated using this space. The function returns the previous
- setting.
-*/
-int mspace_track_large_chunks(mspace msp, int enable);
-
-
-/*
- mspace_malloc behaves as malloc, but operates within
- the given space.
-*/
-void* mspace_malloc(mspace msp, size_t bytes);
-
-/*
- mspace_free behaves as free, but operates within
- the given space.
-
- If compiled with FOOTERS==1, mspace_free is not actually needed.
- free may be called instead of mspace_free because freed chunks from
- any space are handled by their originating spaces.
-*/
-void mspace_free(mspace msp, void* mem);
-
-/*
- mspace_realloc behaves as realloc, but operates within
- the given space.
-
- If compiled with FOOTERS==1, mspace_realloc is not actually
- needed. realloc may be called instead of mspace_realloc because
- realloced chunks from any space are handled by their originating
- spaces.
-*/
-void* mspace_realloc(mspace msp, void* mem, size_t newsize);
-
-/*
- mspace_calloc behaves as calloc, but operates within
- the given space.
-*/
-void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
-
-/*
- mspace_memalign behaves as memalign, but operates within
- the given space.
-*/
-void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
-
-/*
- mspace_independent_calloc behaves as independent_calloc, but
- operates within the given space.
-*/
-void** mspace_independent_calloc(mspace msp, size_t n_elements,
- size_t elem_size, void* chunks[]);
-
-/*
- mspace_independent_comalloc behaves as independent_comalloc, but
- operates within the given space.
-*/
-void** mspace_independent_comalloc(mspace msp, size_t n_elements,
- size_t sizes[], void* chunks[]);
-
-/*
- mspace_footprint() returns the number of bytes obtained from the
- system for this space.
-*/
-size_t mspace_footprint(mspace msp);
-
-/*
- mspace_max_footprint() returns the peak number of bytes obtained from the
- system for this space.
-*/
-size_t mspace_max_footprint(mspace msp);
-
-
-#if !NO_MALLINFO
-/*
- mspace_mallinfo behaves as mallinfo, but reports properties of
- the given space.
-*/
-struct mallinfo mspace_mallinfo(mspace msp);
-#endif /* NO_MALLINFO */
-
-/*
- malloc_usable_size(void* p) behaves the same as malloc_usable_size;
-*/
- size_t mspace_usable_size(void* mem);
-
-/*
- mspace_malloc_stats behaves as malloc_stats, but reports
- properties of the given space.
-*/
-void mspace_malloc_stats(mspace msp);
-
-/*
- mspace_trim behaves as malloc_trim, but
- operates within the given space.
-*/
-int mspace_trim(mspace msp, size_t pad);
-
-/*
- An alias for mallopt.
-*/
-int mspace_mallopt(int, int);
-
-#endif /* MSPACES */
-
-#ifdef __cplusplus
-} /* end of extern "C" */
-#endif /* __cplusplus */
-
-/*
- ========================================================================
- To make a fully customizable malloc.h header file, cut everything
- above this line, put into file malloc.h, edit to suit, and #include it
- on the next line, as well as in programs that use this malloc.
- ========================================================================
-*/
-
-/* #include "malloc.h" */
-
-/*------------------------------ internal #includes ---------------------- */
-
-#ifdef WIN32
-#pragma warning( disable : 4146 ) /* no "unsigned" warnings */
-#endif /* WIN32 */
-
-#include <stdio.h> /* for printing in malloc_stats */
-
-#ifndef LACKS_ERRNO_H
-#include <errno.h> /* for MALLOC_FAILURE_ACTION */
-#endif /* LACKS_ERRNO_H */
-#if FOOTERS || DEBUG
-#include <time.h> /* for magic initialization */
-#endif /* FOOTERS */
-#ifndef LACKS_STDLIB_H
-#include <stdlib.h> /* for abort() */
-#endif /* LACKS_STDLIB_H */
-#ifdef DEBUG
-#if ABORT_ON_ASSERT_FAILURE
-#undef assert
-#define assert(x) if(!(x)) ABORT
-#else /* ABORT_ON_ASSERT_FAILURE */
-#include <assert.h>
-#endif /* ABORT_ON_ASSERT_FAILURE */
-#else /* DEBUG */
-#ifndef assert
-#define assert(x)
-#endif
-#define DEBUG 0
-#endif /* DEBUG */
-#ifndef LACKS_STRING_H
-#include <string.h> /* for memset etc */
-#endif /* LACKS_STRING_H */
-#if USE_BUILTIN_FFS
-#ifndef LACKS_STRINGS_H
-#include <strings.h> /* for ffs */
-#endif /* LACKS_STRINGS_H */
-#endif /* USE_BUILTIN_FFS */
-#if HAVE_MMAP
-#ifndef LACKS_SYS_MMAN_H
-/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
-#if (defined(linux) && !defined(__USE_GNU))
-#define __USE_GNU 1
-#include <sys/mman.h> /* for mmap */
-#undef __USE_GNU
-#else
-#include <sys/mman.h> /* for mmap */
-#endif /* linux */
-#endif /* LACKS_SYS_MMAN_H */
-#ifndef LACKS_FCNTL_H
-#include <fcntl.h>
-#endif /* LACKS_FCNTL_H */
-#endif /* HAVE_MMAP */
-#ifndef LACKS_UNISTD_H
-#include <unistd.h> /* for sbrk, sysconf */
-#else /* LACKS_UNISTD_H */
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
-extern void* sbrk(ptrdiff_t);
-#endif /* FreeBSD etc */
-#endif /* LACKS_UNISTD_H */
-
-/* Declarations for locking */
-#if USE_LOCKS
-#ifndef WIN32
-#include <pthread.h>
-#if defined (__SVR4) && defined (__sun) /* solaris */
-#include <thread.h>
-#endif /* solaris */
-#else
-#ifndef _M_AMD64
-/* These are already defined on AMD64 builds */
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp);
-LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-#endif /* _M_AMD64 */
-#pragma intrinsic (_InterlockedCompareExchange)
-#pragma intrinsic (_InterlockedExchange)
-#define interlockedcompareexchange _InterlockedCompareExchange
-#define interlockedexchange _InterlockedExchange
-#endif /* Win32 */
-#endif /* USE_LOCKS */
-
-/* Declarations for bit scanning on win32 */
-#if defined(_MSC_VER) && _MSC_VER>=1300
-#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
-unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
-#ifdef __cplusplus
-}
-#endif /* __cplusplus */
-
-#define BitScanForward _BitScanForward
-#define BitScanReverse _BitScanReverse
-#pragma intrinsic(_BitScanForward)
-#pragma intrinsic(_BitScanReverse)
-#endif /* BitScanForward */
-#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */
-
-#ifndef WIN32
-#ifndef malloc_getpagesize
-# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */
-# ifndef _SC_PAGE_SIZE
-# define _SC_PAGE_SIZE _SC_PAGESIZE
-# endif
-# endif
-# ifdef _SC_PAGE_SIZE
-# define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
-# else
-# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
- extern size_t getpagesize();
-# define malloc_getpagesize getpagesize()
-# else
-# ifdef WIN32 /* use supplied emulation of getpagesize */
-# define malloc_getpagesize getpagesize()
-# else
-# ifndef LACKS_SYS_PARAM_H
-# include <sys/param.h>
-# endif
-# ifdef EXEC_PAGESIZE
-# define malloc_getpagesize EXEC_PAGESIZE
-# else
-# ifdef NBPG
-# ifndef CLSIZE
-# define malloc_getpagesize NBPG
-# else
-# define malloc_getpagesize (NBPG * CLSIZE)
-# endif
-# else
-# ifdef NBPC
-# define malloc_getpagesize NBPC
-# else
-# ifdef PAGESIZE
-# define malloc_getpagesize PAGESIZE
-# else /* just guess */
-# define malloc_getpagesize ((size_t)4096U)
-# endif
-# endif
-# endif
-# endif
-# endif
-# endif
-# endif
-#endif
-#endif
-
-
-
-/* ------------------- size_t and alignment properties -------------------- */
-
-/* The byte and bit size of a size_t */
-#define SIZE_T_SIZE (sizeof(size_t))
-#define SIZE_T_BITSIZE (sizeof(size_t) << 3)
-
-/* Some constants coerced to size_t */
-/* Annoying but necessary to avoid errors on some platforms */
-#define SIZE_T_ZERO ((size_t)0)
-#define SIZE_T_ONE ((size_t)1)
-#define SIZE_T_TWO ((size_t)2)
-#define SIZE_T_FOUR ((size_t)4)
-#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1)
-#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2)
-#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
-#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U)
-
-/* The bit mask value corresponding to MALLOC_ALIGNMENT */
-#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE)
-
-/* True if address a has acceptable alignment */
-#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
-
-/* the number of bytes to offset an address to align it */
-#define align_offset(A)\
- ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
- ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
-
-/*
- malloc_params holds global properties, including those that can be
- dynamically set using mallopt. There is a single instance, mparams,
- initialized in init_mparams. Note that the non-zeroness of "magic"
- also serves as an initialization flag.
-*/
-typedef unsigned int flag_t;
-struct malloc_params {
- volatile size_t magic;
- size_t page_size;
- size_t granularity;
- size_t mmap_threshold;
- size_t trim_threshold;
- flag_t default_mflags;
-};
-
-static struct malloc_params mparams;
-
-/* Ensure mparams initialized */
-#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
-
-/* -------------------------- MMAP preliminaries ------------------------- */
-
-/*
- If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
- checks to fail so compiler optimizer can delete code rather than
- using so many "#if"s.
-*/
-
-
-/* MORECORE and MMAP must return MFAIL on failure */
-#define MFAIL ((void*)(MAX_SIZE_T))
-#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */
-
-#if HAVE_MMAP
-
-#ifndef WIN32
-#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
-#define MAP_ANONYMOUS MAP_ANON
-#endif /* MAP_ANON */
-#ifdef DEFAULT_GRANULARITY_ALIGNED
-#define MMAP_IMPL mmap_aligned
-static void* lastAlignedmmap; /* Used as a hint */
-static void* mmap_aligned(void *start, size_t length, int prot, int flags, int fd, off_t offset) {
- void* baseaddress = 0;
- void* ptr = 0;
- if(!start) {
- baseaddress = lastAlignedmmap;
- for(;;) {
- if(baseaddress) flags|=MAP_FIXED;
- ptr = mmap(baseaddress, length, prot, flags, fd, offset);
- if(!ptr)
- baseaddress = (void*)((size_t)baseaddress + mparams.granularity);
- else if((size_t)ptr & (mparams.granularity - SIZE_T_ONE)) {
- munmap(ptr, length);
- baseaddress = (void*)(((size_t)ptr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE));
- }
- else break;
- }
- }
- else ptr = mmap(start, length, prot, flags, fd, offset);
- if(ptr) lastAlignedmmap = (void*)((size_t) ptr + mparams.granularity);
- return ptr;
-}
-#else
-#define MMAP_IMPL mmap
-#endif /* DEFAULT_GRANULARITY_ALIGNED */
-#define MUNMAP_DEFAULT(a, s) munmap((a), (s))
-#define MMAP_PROT (PROT_READ|PROT_WRITE)
-#ifdef MAP_ANONYMOUS
-#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
-#define MMAP_DEFAULT(s) MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
-#else /* MAP_ANONYMOUS */
-/*
- Nearly all versions of mmap support MAP_ANONYMOUS, so the following
- is unlikely to be needed, but is supplied just in case.
-*/
-#define MMAP_FLAGS (MAP_PRIVATE)
-static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */
-#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \
- (dev_zero_fd = open("/dev/zero", O_RDWR), \
- MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \
- MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
-#endif /* MAP_ANONYMOUS */
-
-#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
-
-#else /* WIN32 */
-
-/* Win32 MMAP via VirtualAlloc */
-#ifdef DEFAULT_GRANULARITY_ALIGNED
-static void* lastWin32mmap; /* Used as a hint */
-#endif /* DEFAULT_GRANULARITY_ALIGNED */
-#ifdef ENABLE_LARGE_PAGES
-static int largepagesavailable = 1;
-#endif /* ENABLE_LARGE_PAGES */
-static FORCEINLINE void* win32mmap(size_t size) {
- void* baseaddress = 0;
- void* ptr = 0;
-#ifdef ENABLE_LARGE_PAGES
- /* Note that large pages are *always* allocated on a large page boundary.
- If however granularity is small then don't waste a kernel call if size
- isn't around the size of a large page */
- if(largepagesavailable && size >= 1*1024*1024) {
- ptr = VirtualAlloc(baseaddress, size, MEM_RESERVE|MEM_COMMIT|MEM_LARGE_PAGES, PAGE_READWRITE);
- if(!ptr && ERROR_PRIVILEGE_NOT_HELD==GetLastError()) largepagesavailable=0;
- }
-#endif
- if(!ptr) {
-#ifdef DEFAULT_GRANULARITY_ALIGNED
- /* We try to avoid overhead by speculatively reserving at aligned
- addresses until we succeed */
- baseaddress = lastWin32mmap;
- for(;;) {
- void* reserveaddr = VirtualAlloc(baseaddress, size, MEM_RESERVE, PAGE_READWRITE);
- if(!reserveaddr)
- baseaddress = (void*)((size_t)baseaddress + mparams.granularity);
- else if((size_t)reserveaddr & (mparams.granularity - SIZE_T_ONE)) {
- VirtualFree(reserveaddr, 0, MEM_RELEASE);
- baseaddress = (void*)(((size_t)reserveaddr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE));
- }
- else break;
- }
-#endif
- if(!ptr) ptr = VirtualAlloc(baseaddress, size, baseaddress ? MEM_COMMIT : MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
-#if DEBUG
- if(lastWin32mmap && ptr!=lastWin32mmap) printf("Non-contiguous VirtualAlloc between %p and %p\n", ptr, lastWin32mmap);
-#endif
-#ifdef DEFAULT_GRANULARITY_ALIGNED
- if(ptr) lastWin32mmap = (void*)((size_t) ptr + mparams.granularity);
-#endif
- }
-#if DEBUG
-#ifdef ENABLE_LARGE_PAGES
- printf("VirtualAlloc returns %p size %u. LargePagesAvailable=%d\n", ptr, size, largepagesavailable);
-#else
- printf("VirtualAlloc returns %p size %u\n", ptr, size);
-#endif
-#endif
- return (ptr != 0)? ptr: MFAIL;
-}
-
-/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
-static FORCEINLINE void* win32direct_mmap(size_t size) {
- void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
- PAGE_READWRITE);
- return (ptr != 0)? ptr: MFAIL;
-}
-
-/* This function supports releasing coalesed segments */
-static FORCEINLINE int win32munmap(void* ptr, size_t size) {
- MEMORY_BASIC_INFORMATION minfo;
- char* cptr = (char*)ptr;
- while (size) {
- if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
- return -1;
- if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
- minfo.State != MEM_COMMIT || minfo.RegionSize > size)
- return -1;
- if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
- return -1;
- cptr += minfo.RegionSize;
- size -= minfo.RegionSize;
- }
- return 0;
-}
-
-#define MMAP_DEFAULT(s) win32mmap(s)
-#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s))
-#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s)
-#endif /* WIN32 */
-#endif /* HAVE_MMAP */
-
-#if HAVE_MREMAP
-#ifndef WIN32
-#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
-#endif /* WIN32 */
-#endif /* HAVE_MREMAP */
-
-
-/**
- * Define CALL_MORECORE
- */
-#if HAVE_MORECORE
- #ifdef MORECORE
- #define CALL_MORECORE(S) MORECORE(S)
- #else /* MORECORE */
- #define CALL_MORECORE(S) MORECORE_DEFAULT(S)
- #endif /* MORECORE */
-#else /* HAVE_MORECORE */
- #define CALL_MORECORE(S) MFAIL
-#endif /* HAVE_MORECORE */
-
-/**
- * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
- */
-#if HAVE_MMAP
- #define USE_MMAP_BIT (SIZE_T_ONE)
-
- #ifdef MMAP
- #define CALL_MMAP(s) MMAP(s)
- #else /* MMAP */
- #define CALL_MMAP(s) MMAP_DEFAULT(s)
- #endif /* MMAP */
- #ifdef MUNMAP
- #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
- #else /* MUNMAP */
- #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s))
- #endif /* MUNMAP */
- #ifdef DIRECT_MMAP
- #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
- #else /* DIRECT_MMAP */
- #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
- #endif /* DIRECT_MMAP */
-#else /* HAVE_MMAP */
- #define USE_MMAP_BIT (SIZE_T_ZERO)
-
- #define MMAP(s) MFAIL
- #define MUNMAP(a, s) (-1)
- #define DIRECT_MMAP(s) MFAIL
- #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
- #define CALL_MMAP(s) MMAP(s)
- #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
-#endif /* HAVE_MMAP */
-
-/**
- * Define CALL_MREMAP
- */
-#if HAVE_MMAP && HAVE_MREMAP
- #ifdef MREMAP
- #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
- #else /* MREMAP */
- #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
- #endif /* MREMAP */
-#else /* HAVE_MMAP && HAVE_MREMAP */
- #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL
-#endif /* HAVE_MMAP && HAVE_MREMAP */
-
-/* mstate bit set if continguous morecore disabled or failed */
-#define USE_NONCONTIGUOUS_BIT (4U)
-
-/* segment bit set in create_mspace_with_base */
-#define EXTERN_BIT (8U)
-
-
-/* --------------------------- Lock preliminaries ------------------------ */
-
-/*
- When locks are defined, there is one global lock, plus
- one per-mspace lock.
-
- The global lock_ensures that mparams.magic and other unique
- mparams values are initialized only once. It also protects
- sequences of calls to MORECORE. In many cases sys_alloc requires
- two calls, that should not be interleaved with calls by other
- threads. This does not protect against direct calls to MORECORE
- by other threads not using this lock, so there is still code to
- cope the best we can on interference.
-
- Per-mspace locks surround calls to malloc, free, etc. To enable use
- in layered extensions, per-mspace locks are reentrant.
-
- Because lock-protected regions generally have bounded times, it is
- OK to use the supplied simple spinlocks in the custom versions for
- x86. Spinlocks are likely to improve performance for lightly
- contended applications, but worsen performance under heavy
- contention.
-
- If USE_LOCKS is > 1, the definitions of lock routines here are
- bypassed, in which case you will need to define the type MLOCK_T,
- and at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly
- TRY_LOCK (which is not used in this malloc, but commonly needed in
- extensions.) You must also declare a
- static MLOCK_T malloc_global_mutex = { initialization values };.
-
-*/
-
-#if USE_LOCKS == 1
-
-#if USE_SPIN_LOCKS && SPIN_LOCKS_AVAILABLE
-#ifndef WIN32
-
-/* Custom pthread-style spin locks on x86 and x64 for gcc */
-struct pthread_mlock_t {
- volatile unsigned int l;
- char cachelinepadding[64];
- unsigned int c;
- pthread_t threadid;
-};
-#define MLOCK_T struct pthread_mlock_t
-#define CURRENT_THREAD pthread_self()
-#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
-#define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl)
-#define RELEASE_LOCK(sl) pthread_release_lock(sl)
-#define TRY_LOCK(sl) pthread_try_lock(sl)
-#define SPINS_PER_YIELD 63
-
-static MLOCK_T malloc_global_mutex = { 0, "", 0, 0};
-
-static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) {
- int spins = 0;
- volatile unsigned int* lp = &sl->l;
- for (;;) {
- if (*lp != 0) {
- if (sl->threadid == CURRENT_THREAD) {
- ++sl->c;
- return 0;
- }
- }
- else {
- /* place args to cmpxchgl in locals to evade oddities in some gccs */
- int cmp = 0;
- int val = 1;
- int ret;
- __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
- : "=a" (ret)
- : "r" (val), "m" (*(lp)), "0"(cmp)
- : "memory", "cc");
- if (!ret) {
- assert(!sl->threadid);
- sl->threadid = CURRENT_THREAD;
- sl->c = 1;
- return 0;
- }
- }
- if ((++spins & SPINS_PER_YIELD) == 0) {
-#if defined (__SVR4) && defined (__sun) /* solaris */
- thr_yield();
-#else
-#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
- sched_yield();
-#else /* no-op yield on unknown systems */
- ;
-#endif /* __linux__ || __FreeBSD__ || __APPLE__ */
-#endif /* solaris */
- }
- }
-}
-
-static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) {
- volatile unsigned int* lp = &sl->l;
- assert(*lp != 0);
- assert(sl->threadid == CURRENT_THREAD);
- if (--sl->c == 0) {
- sl->threadid = 0;
- int prev = 0;
- int ret;
- __asm__ __volatile__ ("lock; xchgl %0, %1"
- : "=r" (ret)
- : "m" (*(lp)), "0"(prev)
- : "memory");
- }
-}
-
-static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) {
- volatile unsigned int* lp = &sl->l;
- if (*lp != 0) {
- if (sl->threadid == CURRENT_THREAD) {
- ++sl->c;
- return 1;
- }
- }
- else {
- int cmp = 0;
- int val = 1;
- int ret;
- __asm__ __volatile__ ("lock; cmpxchgl %1, %2"
- : "=a" (ret)
- : "r" (val), "m" (*(lp)), "0"(cmp)
- : "memory", "cc");
- if (!ret) {
- assert(!sl->threadid);
- sl->threadid = CURRENT_THREAD;
- sl->c = 1;
- return 1;
- }
- }
- return 0;
-}
-
-
-#else /* WIN32 */
-/* Custom win32-style spin locks on x86 and x64 for MSC */
-struct win32_mlock_t {
- volatile long l;
- char cachelinepadding[64];
- unsigned int c;
- long threadid;
-};
-
-#define MLOCK_T struct win32_mlock_t
-#define CURRENT_THREAD ((long)GetCurrentThreadId())
-#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0)
-#define ACQUIRE_LOCK(sl) win32_acquire_lock(sl)
-#define RELEASE_LOCK(sl) win32_release_lock(sl)
-#define TRY_LOCK(sl) win32_try_lock(sl)
-#define SPINS_PER_YIELD 63
-
-static MLOCK_T malloc_global_mutex = { 0, 0, 0};
-
-static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) {
- int spins = 0;
- for (;;) {
- if (sl->l != 0) {
- if (sl->threadid == CURRENT_THREAD) {
- ++sl->c;
- return 0;
- }
- }
- else {
- if (!interlockedexchange(&sl->l, 1)) {
- assert(!sl->threadid);
- sl->threadid = CURRENT_THREAD;
- sl->c = 1;
- return 0;
- }
- }
- if ((++spins & SPINS_PER_YIELD) == 0)
- SleepEx(0, FALSE);
- }
-}
-
-static FORCEINLINE void win32_release_lock (MLOCK_T *sl) {
- assert(sl->threadid == CURRENT_THREAD);
- assert(sl->l != 0);
- if (--sl->c == 0) {
- sl->threadid = 0;
- interlockedexchange (&sl->l, 0);
- }
-}
-
-static FORCEINLINE int win32_try_lock (MLOCK_T *sl) {
- if (sl->l != 0) {
- if (sl->threadid == CURRENT_THREAD) {
- ++sl->c;
- return 1;
- }
- }
- else {
- if (!interlockedexchange(&sl->l, 1)){
- assert(!sl->threadid);
- sl->threadid = CURRENT_THREAD;
- sl->c = 1;
- return 1;
- }
- }
- return 0;
-}
-
-#endif /* WIN32 */
-#else /* USE_SPIN_LOCKS */
-
-#ifndef WIN32
-/* pthreads-based locks */
-
-#define MLOCK_T pthread_mutex_t
-#define CURRENT_THREAD pthread_self()
-#define INITIAL_LOCK(sl) pthread_init_lock(sl)
-#define ACQUIRE_LOCK(sl) pthread_mutex_lock(sl)
-#define RELEASE_LOCK(sl) pthread_mutex_unlock(sl)
-#define TRY_LOCK(sl) (!pthread_mutex_trylock(sl))
-
-static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-/* Cope with old-style linux recursive lock initialization by adding */
-/* skipped internal declaration from pthread.h */
-#ifdef linux
-#ifndef PTHREAD_MUTEX_RECURSIVE
-extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr,
- int __kind));
-#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
-#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y)
-#endif
-#endif
-
-static int pthread_init_lock (MLOCK_T *sl) {
- pthread_mutexattr_t attr;
- if (pthread_mutexattr_init(&attr)) return 1;
- if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
- if (pthread_mutex_init(sl, &attr)) return 1;
- if (pthread_mutexattr_destroy(&attr)) return 1;
- return 0;
-}
-
-#else /* WIN32 */
-/* Win32 critical sections */
-#define MLOCK_T CRITICAL_SECTION
-#define CURRENT_THREAD GetCurrentThreadId()
-#define INITIAL_LOCK(s) (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000))
-#define ACQUIRE_LOCK(s) (EnterCriticalSection(sl), 0)
-#define RELEASE_LOCK(s) LeaveCriticalSection(sl)
-#define TRY_LOCK(s) TryEnterCriticalSection(sl)
-#define NEED_GLOBAL_LOCK_INIT
-
-static MLOCK_T malloc_global_mutex;
-static volatile long malloc_global_mutex_status;
-
-/* Use spin loop to initialize global lock */
-static void init_malloc_global_mutex() {
- for (;;) {
- long stat = malloc_global_mutex_status;
- if (stat > 0)
- return;
- /* transition to < 0 while initializing, then to > 0) */
- if (stat == 0 &&
- interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) {
- InitializeCriticalSection(&malloc_global_mutex);
- interlockedexchange(&malloc_global_mutex_status,1);
- return;
- }
- SleepEx(0, FALSE);
- }
-}
-
-#endif /* WIN32 */
-#endif /* USE_SPIN_LOCKS */
-#endif /* USE_LOCKS == 1 */
-
-/* ----------------------- User-defined locks ------------------------ */
-
-#if USE_LOCKS > 1
-/* Define your own lock implementation here */
-/* #define INITIAL_LOCK(sl) ... */
-/* #define ACQUIRE_LOCK(sl) ... */
-/* #define RELEASE_LOCK(sl) ... */
-/* #define TRY_LOCK(sl) ... */
-/* static MLOCK_T malloc_global_mutex = ... */
-#endif /* USE_LOCKS > 1 */
-
-/* ----------------------- Lock-based state ------------------------ */
-
-#if USE_LOCKS
-#define USE_LOCK_BIT (2U)
-#else /* USE_LOCKS */
-#define USE_LOCK_BIT (0U)
-#define INITIAL_LOCK(l)
-#endif /* USE_LOCKS */
-
-#if USE_LOCKS
-#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
-#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex);
-#endif
-#ifndef RELEASE_MALLOC_GLOBAL_LOCK
-#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex);
-#endif
-#else /* USE_LOCKS */
-#define ACQUIRE_MALLOC_GLOBAL_LOCK()
-#define RELEASE_MALLOC_GLOBAL_LOCK()
-#endif /* USE_LOCKS */
-
-
-/* ----------------------- Chunk representations ------------------------ */
-
-/*
- (The following includes lightly edited explanations by Colin Plumb.)
-
- The malloc_chunk declaration below is misleading (but accurate and
- necessary). It declares a "view" into memory allowing access to
- necessary fields at known offsets from a given base.
-
- Chunks of memory are maintained using a `boundary tag' method as
- originally described by Knuth. (See the paper by Paul Wilson
- ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such
- techniques.) Sizes of free chunks are stored both in the front of
- each chunk and at the end. This makes consolidating fragmented
- chunks into bigger chunks fast. The head fields also hold bits
- representing whether chunks are free or in use.
-
- Here are some pictures to make it clearer. They are "exploded" to
- show that the state of a chunk can be thought of as extending from
- the high 31 bits of the head field of its header through the
- prev_foot and PINUSE_BIT bit of the following chunk header.
-
- A chunk that's in use looks like:
-
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of previous chunk (if P = 0) |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
- | Size of this chunk 1| +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | |
- +- -+
- | |
- +- -+
- | :
- +- size - sizeof(size_t) available payload bytes -+
- : |
- chunk-> +- -+
- | |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1|
- | Size of next chunk (may or may not be in use) | +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
- And if it's free, it looks like this:
-
- chunk-> +- -+
- | User payload (must be in use, or we would have merged!) |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P|
- | Size of this chunk 0| +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Next pointer |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Prev pointer |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | :
- +- size - sizeof(struct chunk) unused bytes -+
- : |
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of this chunk |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0|
- | Size of next chunk (must be in use, or we would have merged)| +-+
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | :
- +- User payload -+
- : |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- |0|
- +-+
- Note that since we always merge adjacent free chunks, the chunks
- adjacent to a free chunk must be in use.
-
- Given a pointer to a chunk (which can be derived trivially from the
- payload pointer) we can, in O(1) time, find out whether the adjacent
- chunks are free, and if so, unlink them from the lists that they
- are on and merge them with the current chunk.
-
- Chunks always begin on even word boundaries, so the mem portion
- (which is returned to the user) is also on an even word boundary, and
- thus at least double-word aligned.
-
- The P (PINUSE_BIT) bit, stored in the unused low-order bit of the
- chunk size (which is always a multiple of two words), is an in-use
- bit for the *previous* chunk. If that bit is *clear*, then the
- word before the current chunk size contains the previous chunk
- size, and can be used to find the front of the previous chunk.
- The very first chunk allocated always has this bit set, preventing
- access to non-existent (or non-owned) memory. If pinuse is set for
- any given chunk, then you CANNOT determine the size of the
- previous chunk, and might even get a memory addressing fault when
- trying to do so.
-
- The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of
- the chunk size redundantly records whether the current chunk is
- inuse (unless the chunk is mmapped). This redundancy enables usage
- checks within free and realloc, and reduces indirection when freeing
- and consolidating chunks.
-
- Each freshly allocated chunk must have both cinuse and pinuse set.
- That is, each allocated chunk borders either a previously allocated
- and still in-use chunk, or the base of its memory arena. This is
- ensured by making all allocations from the the `lowest' part of any
- found chunk. Further, no free chunk physically borders another one,
- so each free chunk is known to be preceded and followed by either
- inuse chunks or the ends of memory.
-
- Note that the `foot' of the current chunk is actually represented
- as the prev_foot of the NEXT chunk. This makes it easier to
- deal with alignments etc but can be very confusing when trying
- to extend or adapt this code.
-
- The exceptions to all this are
-
- 1. The special chunk `top' is the top-most available chunk (i.e.,
- the one bordering the end of available memory). It is treated
- specially. Top is never included in any bin, is used only if
- no other chunk is available, and is released back to the
- system if it is very large (see M_TRIM_THRESHOLD). In effect,
- the top chunk is treated as larger (and thus less well
- fitting) than any other available chunk. The top chunk
- doesn't update its trailing size field since there is no next
- contiguous chunk that would have to index off it. However,
- space is still allocated for it (TOP_FOOT_SIZE) to enable
- separation or merging when space is extended.
-
- 3. Chunks allocated via mmap, have both cinuse and pinuse bits
- cleared in their head fields. Because they are allocated
- one-by-one, each must carry its own prev_foot field, which is
- also used to hold the offset this chunk has within its mmapped
- region, which is needed to preserve alignment. Each mmapped
- chunk is trailed by the first two fields of a fake next-chunk
- for sake of usage checks.
-
-*/
-
-struct malloc_chunk {
- size_t prev_foot; /* Size of previous chunk (if free). */
- size_t head; /* Size and inuse bits. */
- struct malloc_chunk* fd; /* double links -- used only if free. */
- struct malloc_chunk* bk;
-};
-
-typedef struct malloc_chunk mchunk;
-typedef struct malloc_chunk* mchunkptr;
-typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */
-typedef unsigned int bindex_t; /* Described below */
-typedef unsigned int binmap_t; /* Described below */
-
-/* ------------------- Chunks sizes and alignments ----------------------- */
-
-#define MCHUNK_SIZE (sizeof(mchunk))
-
-#if FOOTERS
-#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
-#else /* FOOTERS */
-#define CHUNK_OVERHEAD (SIZE_T_SIZE)
-#endif /* FOOTERS */
-
-/* MMapped chunks need a second word of overhead ... */
-#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
-/* ... and additional padding for fake next-chunk at foot */
-#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES)
-
-/* The smallest size we can malloc is an aligned minimal chunk */
-#define MIN_CHUNK_SIZE\
- ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
-
-/* conversion from malloc headers to user pointers, and back */
-#define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES))
-#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES))
-/* chunk associated with aligned address A */
-#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A)))
-
-/* Bounds on request (not chunk) sizes. */
-#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2)
-#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
-
-/* pad request bytes into a usable size */
-#define pad_request(req) \
- (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
-
-/* pad request, checking for minimum (but not maximum) */
-#define request2size(req) \
- (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
-
-
-/* ------------------ Operations on head and foot fields ----------------- */
-
-/*
- The head field of a chunk is or'ed with PINUSE_BIT when previous
- adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in
- use, unless mmapped, in which case both bits are cleared.
-
- FLAG4_BIT is not used by this malloc, but might be useful in extensions.
-*/
-
-#define PINUSE_BIT (SIZE_T_ONE)
-#define CINUSE_BIT (SIZE_T_TWO)
-#define FLAG4_BIT (SIZE_T_FOUR)
-#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT)
-#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT)
-
-/* Head value for fenceposts */
-#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE)
-
-/* extraction of fields from head words */
-#define cinuse(p) ((p)->head & CINUSE_BIT)
-#define pinuse(p) ((p)->head & PINUSE_BIT)
-#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT)
-#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0)
-
-#define chunksize(p) ((p)->head & ~(FLAG_BITS))
-
-#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT)
-
-/* Treat space at ptr +/- offset as a chunk */
-#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s)))
-#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s)))
-
-/* Ptr to next or previous physical malloc_chunk. */
-#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS)))
-#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) ))
-
-/* extract next chunk's pinuse bit */
-#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT)
-
-/* Get/set size at footer */
-#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot)
-#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s))
-
-/* Set size, pinuse bit, and foot */
-#define set_size_and_pinuse_of_free_chunk(p, s)\
- ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
-
-/* Set size, pinuse bit, foot, and clear next pinuse */
-#define set_free_with_pinuse(p, s, n)\
- (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
-
-/* Get the internal overhead associated with chunk p */
-#define overhead_for(p)\
- (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
-
-/* Return true if malloced space is not necessarily cleared */
-#if MMAP_CLEARS
-#define calloc_must_clear(p) (!is_mmapped(p))
-#else /* MMAP_CLEARS */
-#define calloc_must_clear(p) (1)
-#endif /* MMAP_CLEARS */
-
-/* ---------------------- Overlaid data structures ----------------------- */
-
-/*
- When chunks are not in use, they are treated as nodes of either
- lists or trees.
-
- "Small" chunks are stored in circular doubly-linked lists, and look
- like this:
-
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of previous chunk |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- `head:' | Size of chunk, in bytes |P|
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Forward pointer to next chunk in list |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Back pointer to previous chunk in list |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Unused space (may be 0 bytes long) .
- . .
- . |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- `foot:' | Size of chunk, in bytes |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
- Larger chunks are kept in a form of bitwise digital trees (aka
- tries) keyed on chunksizes. Because malloc_tree_chunks are only for
- free chunks greater than 256 bytes, their size doesn't impose any
- constraints on user chunk sizes. Each node looks like:
-
- chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Size of previous chunk |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- `head:' | Size of chunk, in bytes |P|
- mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Forward pointer to next chunk of same size |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Back pointer to previous chunk of same size |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Pointer to left child (child[0]) |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Pointer to right child (child[1]) |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Pointer to parent |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | bin index of this chunk |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Unused space .
- . |
-nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- `foot:' | Size of chunk, in bytes |
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
- Each tree holding treenodes is a tree of unique chunk sizes. Chunks
- of the same size are arranged in a circularly-linked list, with only
- the oldest chunk (the next to be used, in our FIFO ordering)
- actually in the tree. (Tree members are distinguished by a non-null
- parent pointer.) If a chunk with the same size an an existing node
- is inserted, it is linked off the existing node using pointers that
- work in the same way as fd/bk pointers of small chunks.
-
- Each tree contains a power of 2 sized range of chunk sizes (the
- smallest is 0x100 <= x < 0x180), which is is divided in half at each
- tree level, with the chunks in the smaller half of the range (0x100
- <= x < 0x140 for the top nose) in the left subtree and the larger
- half (0x140 <= x < 0x180) in the right subtree. This is, of course,
- done by inspecting individual bits.
-
- Using these rules, each node's left subtree contains all smaller
- sizes than its right subtree. However, the node at the root of each
- subtree has no particular ordering relationship to either. (The
- dividing line between the subtree sizes is based on trie relation.)
- If we remove the last chunk of a given size from the interior of the
- tree, we need to replace it with a leaf node. The tree ordering
- rules permit a node to be replaced by any leaf below it.
-
- The smallest chunk in a tree (a common operation in a best-fit
- allocator) can be found by walking a path to the leftmost leaf in
- the tree. Unlike a usual binary tree, where we follow left child
- pointers until we reach a null, here we follow the right child
- pointer any time the left one is null, until we reach a leaf with
- both child pointers null. The smallest chunk in the tree will be
- somewhere along that path.
-
- The worst case number of steps to add, find, or remove a node is
- bounded by the number of bits differentiating chunks within
- bins. Under current bin calculations, this ranges from 6 up to 21
- (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
- is of course much better.
-*/
-
-struct malloc_tree_chunk {
- /* The first four fields must be compatible with malloc_chunk */
- size_t prev_foot;
- size_t head;
- struct malloc_tree_chunk* fd;
- struct malloc_tree_chunk* bk;
-
- struct malloc_tree_chunk* child[2];
- struct malloc_tree_chunk* parent;
- bindex_t index;
-};
-
-typedef struct malloc_tree_chunk tchunk;
-typedef struct malloc_tree_chunk* tchunkptr;
-typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */
-
-/* A little helper macro for trees */
-#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
-
-/* ----------------------------- Segments -------------------------------- */
-
-/*
- Each malloc space may include non-contiguous segments, held in a
- list headed by an embedded malloc_segment record representing the
- top-most space. Segments also include flags holding properties of
- the space. Large chunks that are directly allocated by mmap are not
- included in this list. They are instead independently created and
- destroyed without otherwise keeping track of them.
-
- Segment management mainly comes into play for spaces allocated by
- MMAP. Any call to MMAP might or might not return memory that is
- adjacent to an existing segment. MORECORE normally contiguously
- extends the current space, so this space is almost always adjacent,
- which is simpler and faster to deal with. (This is why MORECORE is
- used preferentially to MMAP when both are available -- see
- sys_alloc.) When allocating using MMAP, we don't use any of the
- hinting mechanisms (inconsistently) supported in various
- implementations of unix mmap, or distinguish reserving from
- committing memory. Instead, we just ask for space, and exploit
- contiguity when we get it. It is probably possible to do
- better than this on some systems, but no general scheme seems
- to be significantly better.
-
- Management entails a simpler variant of the consolidation scheme
- used for chunks to reduce fragmentation -- new adjacent memory is
- normally prepended or appended to an existing segment. However,
- there are limitations compared to chunk consolidation that mostly
- reflect the fact that segment processing is relatively infrequent
- (occurring only when getting memory from system) and that we
- don't expect to have huge numbers of segments:
-
- * Segments are not indexed, so traversal requires linear scans. (It
- would be possible to index these, but is not worth the extra
- overhead and complexity for most programs on most platforms.)
- * New segments are only appended to old ones when holding top-most
- memory; if they cannot be prepended to others, they are held in
- different segments.
-
- Except for the top-most segment of an mstate, each segment record
- is kept at the tail of its segment. Segments are added by pushing
- segment records onto the list headed by &mstate.seg for the
- containing mstate.
-
- Segment flags control allocation/merge/deallocation policies:
- * If EXTERN_BIT set, then we did not allocate this segment,
- and so should not try to deallocate or merge with others.
- (This currently holds only for the initial segment passed
- into create_mspace_with_base.)
- * If USE_MMAP_BIT set, the segment may be merged with
- other surrounding mmapped segments and trimmed/de-allocated
- using munmap.
- * If neither bit is set, then the segment was obtained using
- MORECORE so can be merged with surrounding MORECORE'd segments
- and deallocated/trimmed using MORECORE with negative arguments.
-*/
-
-struct malloc_segment {
- char* base; /* base address */
- size_t size; /* allocated size */
- struct malloc_segment* next; /* ptr to next segment */
- flag_t sflags; /* mmap and extern flag */
-};
-
-#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT)
-#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT)
-
-typedef struct malloc_segment msegment;
-typedef struct malloc_segment* msegmentptr;
-
-/* ---------------------------- malloc_state ----------------------------- */
-
-/*
- A malloc_state holds all of the bookkeeping for a space.
- The main fields are:
-
- Top
- The topmost chunk of the currently active segment. Its size is
- cached in topsize. The actual size of topmost space is
- topsize+TOP_FOOT_SIZE, which includes space reserved for adding
- fenceposts and segment records if necessary when getting more
- space from the system. The size at which to autotrim top is
- cached from mparams in trim_check, except that it is disabled if
- an autotrim fails.
-
- Designated victim (dv)
- This is the preferred chunk for servicing small requests that
- don't have exact fits. It is normally the chunk split off most
- recently to service another small request. Its size is cached in
- dvsize. The link fields of this chunk are not maintained since it
- is not kept in a bin.
-
- SmallBins
- An array of bin headers for free chunks. These bins hold chunks
- with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
- chunks of all the same size, spaced 8 bytes apart. To simplify
- use in double-linked lists, each bin header acts as a malloc_chunk
- pointing to the real first node, if it exists (else pointing to
- itself). This avoids special-casing for headers. But to avoid
- waste, we allocate only the fd/bk pointers of bins, and then use
- repositioning tricks to treat these as the fields of a chunk.
-
- TreeBins
- Treebins are pointers to the roots of trees holding a range of
- sizes. There are 2 equally spaced treebins for each power of two
- from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
- larger.
-
- Bin maps
- There is one bit map for small bins ("smallmap") and one for
- treebins ("treemap). Each bin sets its bit when non-empty, and
- clears the bit when empty. Bit operations are then used to avoid
- bin-by-bin searching -- nearly all "search" is done without ever
- looking at bins that won't be selected. The bit maps
- conservatively use 32 bits per map word, even if on 64bit system.
- For a good description of some of the bit-based techniques used
- here, see Henry S. Warren Jr's book "Hacker's Delight" (and
- supplement at http://hackersdelight.org/). Many of these are
- intended to reduce the branchiness of paths through malloc etc, as
- well as to reduce the number of memory locations read or written.
-
- Segments
- A list of segments headed by an embedded malloc_segment record
- representing the initial space.
-
- Address check support
- The least_addr field is the least address ever obtained from
- MORECORE or MMAP. Attempted frees and reallocs of any address less
- than this are trapped (unless INSECURE is defined).
-
- Magic tag
- A cross-check field that should always hold same value as mparams.magic.
-
- Flags
- Bits recording whether to use MMAP, locks, or contiguous MORECORE
-
- Statistics
- Each space keeps track of current and maximum system memory
- obtained via MORECORE or MMAP.
-
- Trim support
- Fields holding the amount of unused topmost memory that should trigger
- timming, and a counter to force periodic scanning to release unused
- non-topmost segments.
-
- Locking
- If USE_LOCKS is defined, the "mutex" lock is acquired and released
- around every public call using this mspace.
-
- Extension support
- A void* pointer and a size_t field that can be used to help implement
- extensions to this malloc.
-*/
-
-/* Bin types, widths and sizes */
-#define NSMALLBINS (32U)
-#define NTREEBINS (32U)
-#define SMALLBIN_SHIFT (3U)
-#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT)
-#define TREEBIN_SHIFT (8U)
-#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT)
-#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE)
-#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
-
-struct malloc_state {
- binmap_t smallmap;
- binmap_t treemap;
- size_t dvsize;
- size_t topsize;
- char* least_addr;
- mchunkptr dv;
- mchunkptr top;
- size_t trim_check;
- size_t release_checks;
- size_t magic;
- mchunkptr smallbins[(NSMALLBINS+1)*2];
- tbinptr treebins[NTREEBINS];
- size_t footprint;
- size_t max_footprint;
- flag_t mflags;
- msegment seg;
-#if USE_LOCKS
- MLOCK_T mutex; /* locate lock among fields that rarely change */
-#endif /* USE_LOCKS */
- void* extp; /* Unused but available for extensions */
- size_t exts;
-};
-
-typedef struct malloc_state* mstate;
-
-/* ------------- Global malloc_state and malloc_params ------------------- */
-
-#if !ONLY_MSPACES
-
-/* The global malloc_state used for all non-"mspace" calls */
-static struct malloc_state _gm_;
-#define gm (&_gm_)
-#define is_global(M) ((M) == &_gm_)
-
-#endif /* !ONLY_MSPACES */
-
-#define is_initialized(M) ((M)->top != 0)
-
-/* -------------------------- system alloc setup ------------------------- */
-
-/* Operations on mflags */
-
-#define use_lock(M) ((M)->mflags & USE_LOCK_BIT)
-#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT)
-#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT)
-
-#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT)
-#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT)
-#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT)
-
-#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT)
-#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT)
-
-#define set_lock(M,L)\
- ((M)->mflags = (L)?\
- ((M)->mflags | USE_LOCK_BIT) :\
- ((M)->mflags & ~USE_LOCK_BIT))
-
-/* page-align a size */
-#define page_align(S)\
- (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE))
-
-/* granularity-align a size */
-#define granularity_align(S)\
- (((S) + (mparams.granularity - SIZE_T_ONE))\
- & ~(mparams.granularity - SIZE_T_ONE))
-
-
-/* For mmap, use granularity alignment on windows, else page-align */
-#ifdef WIN32
-#define mmap_align(S) granularity_align(S)
-#else
-#define mmap_align(S) page_align(S)
-#endif
-
-/* For sys_alloc, enough padding to ensure can malloc request on success */
-#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
-
-#define is_page_aligned(S)\
- (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
-#define is_granularity_aligned(S)\
- (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
-
-/* True if segment S holds address A */
-#define segment_holds(S, A)\
- ((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
-
-/* Return segment holding given address */
-static msegmentptr segment_holding(mstate m, char* addr) {
- msegmentptr sp = &m->seg;
- for (;;) {
- if (addr >= sp->base && addr < sp->base + sp->size)
- return sp;
- if ((sp = sp->next) == 0)
- return 0;
- }
-}
-
-/* Return true if segment contains a segment link */
-static int has_segment_link(mstate m, msegmentptr ss) {
- msegmentptr sp = &m->seg;
- for (;;) {
- if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size)
- return 1;
- if ((sp = sp->next) == 0)
- return 0;
- }
-}
-
-#ifndef MORECORE_CANNOT_TRIM
-#define should_trim(M,s) ((s) > (M)->trim_check)
-#else /* MORECORE_CANNOT_TRIM */
-#define should_trim(M,s) (0)
-#endif /* MORECORE_CANNOT_TRIM */
-
-/*
- TOP_FOOT_SIZE is padding at the end of a segment, including space
- that may be needed to place segment records and fenceposts when new
- noncontiguous segments are added.
-*/
-#define TOP_FOOT_SIZE\
- (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
-
-
-/* ------------------------------- Hooks -------------------------------- */
-
-/*
- PREACTION should be defined to return 0 on success, and nonzero on
- failure. If you are not using locking, you can redefine these to do
- anything you like.
-*/
-
-#if USE_LOCKS
-
-#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0)
-#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); }
-#else /* USE_LOCKS */
-
-#ifndef PREACTION
-#define PREACTION(M) (0)
-#endif /* PREACTION */
-
-#ifndef POSTACTION
-#define POSTACTION(M)
-#endif /* POSTACTION */
-
-#endif /* USE_LOCKS */
-
-/*
- CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
- USAGE_ERROR_ACTION is triggered on detected bad frees and
- reallocs. The argument p is an address that might have triggered the
- fault. It is ignored by the two predefined actions, but might be
- useful in custom actions that try to help diagnose errors.
-*/
-
-#if PROCEED_ON_ERROR
-
-/* A count of the number of corruption errors causing resets */
-int malloc_corruption_error_count;
-
-/* default corruption action */
-static void reset_on_error(mstate m);
-
-#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m)
-#define USAGE_ERROR_ACTION(m, p)
-
-#else /* PROCEED_ON_ERROR */
-
-#ifndef CORRUPTION_ERROR_ACTION
-#define CORRUPTION_ERROR_ACTION(m) ABORT
-#endif /* CORRUPTION_ERROR_ACTION */
-
-#ifndef USAGE_ERROR_ACTION
-#define USAGE_ERROR_ACTION(m,p) ABORT
-#endif /* USAGE_ERROR_ACTION */
-
-#endif /* PROCEED_ON_ERROR */
-
-/* -------------------------- Debugging setup ---------------------------- */
-
-#if ! DEBUG
-
-#define check_free_chunk(M,P)
-#define check_inuse_chunk(M,P)
-#define check_malloced_chunk(M,P,N)
-#define check_mmapped_chunk(M,P)
-#define check_malloc_state(M)
-#define check_top_chunk(M,P)
-
-#else /* DEBUG */
-#define check_free_chunk(M,P) do_check_free_chunk(M,P)
-#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P)
-#define check_top_chunk(M,P) do_check_top_chunk(M,P)
-#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N)
-#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P)
-#define check_malloc_state(M) do_check_malloc_state(M)
-
-static void do_check_any_chunk(mstate m, mchunkptr p);
-static void do_check_top_chunk(mstate m, mchunkptr p);
-static void do_check_mmapped_chunk(mstate m, mchunkptr p);
-static void do_check_inuse_chunk(mstate m, mchunkptr p);
-static void do_check_free_chunk(mstate m, mchunkptr p);
-static void do_check_malloced_chunk(mstate m, void* mem, size_t s);
-static void do_check_tree(mstate m, tchunkptr t);
-static void do_check_treebin(mstate m, bindex_t i);
-static void do_check_smallbin(mstate m, bindex_t i);
-static void do_check_malloc_state(mstate m);
-static int bin_find(mstate m, mchunkptr x);
-static size_t traverse_and_check(mstate m);
-#endif /* DEBUG */
-
-/* ---------------------------- Indexing Bins ---------------------------- */
-
-#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
-#define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT)
-#define small_index2size(i) ((i) << SMALLBIN_SHIFT)
-#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE))
-
-/* addressing by index. See above about smallbin repositioning */
-#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1])))
-#define treebin_at(M,i) (&((M)->treebins[i]))
-
-/* assign tree index for size S to variable I. Use x86 asm if possible */
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-#define compute_tree_index(S, I)\
-{\
- unsigned int X = S >> TREEBIN_SHIFT;\
- if (X == 0)\
- I = 0;\
- else if (X > 0xFFFF)\
- I = NTREEBINS-1;\
- else {\
- unsigned int K;\
- __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "g" (X));\
- I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
- }\
-}
-
-#elif defined (__INTEL_COMPILER)
-#define compute_tree_index(S, I)\
-{\
- size_t X = S >> TREEBIN_SHIFT;\
- if (X == 0)\
- I = 0;\
- else if (X > 0xFFFF)\
- I = NTREEBINS-1;\
- else {\
- unsigned int K = _bit_scan_reverse (X); \
- I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
- }\
-}
-
-#elif defined(_MSC_VER) && _MSC_VER>=1300
-#define compute_tree_index(S, I)\
-{\
- size_t X = S >> TREEBIN_SHIFT;\
- if (X == 0)\
- I = 0;\
- else if (X > 0xFFFF)\
- I = NTREEBINS-1;\
- else {\
- unsigned int K;\
- _BitScanReverse((DWORD *) &K, (DWORD) X);\
- I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
- }\
-}
-
-#else /* GNUC */
-#define compute_tree_index(S, I)\
-{\
- size_t X = S >> TREEBIN_SHIFT;\
- if (X == 0)\
- I = 0;\
- else if (X > 0xFFFF)\
- I = NTREEBINS-1;\
- else {\
- unsigned int Y = (unsigned int)X;\
- unsigned int N = ((Y - 0x100) >> 16) & 8;\
- unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\
- N += K;\
- N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\
- K = 14 - N + ((Y <<= K) >> 15);\
- I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\
- }\
-}
-#endif /* GNUC */
-
-/* Bit representing maximum resolved size in a treebin at i */
-#define bit_for_tree_index(i) \
- (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
-
-/* Shift placing maximum resolved bit in a treebin at i as sign bit */
-#define leftshift_for_tree_index(i) \
- ((i == NTREEBINS-1)? 0 : \
- ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
-
-/* The size of the smallest chunk held in bin with index i */
-#define minsize_for_tree_index(i) \
- ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \
- (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
-
-
-/* ------------------------ Operations on bin maps ----------------------- */
-
-/* bit corresponding to given index */
-#define idx2bit(i) ((binmap_t)(1) << (i))
-
-/* Mark/Clear bits with given index */
-#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i))
-#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i))
-#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i))
-
-#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i))
-#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i))
-#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i))
-
-/* isolate the least set bit of a bitmap */
-#define least_bit(x) ((x) & -(x))
-
-/* mask with all bits to left of least bit of x on */
-#define left_bits(x) ((x<<1) | -(x<<1))
-
-/* mask with all bits to left of or equal to least bit of x on */
-#define same_or_left_bits(x) ((x) | -(x))
-
-/* index corresponding to given bit. Use x86 asm if possible */
-
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-#define compute_bit2idx(X, I)\
-{\
- unsigned int J;\
- __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "g" (X));\
- I = (bindex_t)J;\
-}
-
-#elif defined (__INTEL_COMPILER)
-#define compute_bit2idx(X, I)\
-{\
- unsigned int J;\
- J = _bit_scan_forward (X); \
- I = (bindex_t)J;\
-}
-
-#elif defined(_MSC_VER) && _MSC_VER>=1300
-#define compute_bit2idx(X, I)\
-{\
- unsigned int J;\
- _BitScanForward((DWORD *) &J, X);\
- I = (bindex_t)J;\
-}
-
-#elif USE_BUILTIN_FFS
-#define compute_bit2idx(X, I) I = ffs(X)-1
-
-#else
-#define compute_bit2idx(X, I)\
-{\
- unsigned int Y = X - 1;\
- unsigned int K = Y >> (16-4) & 16;\
- unsigned int N = K; Y >>= K;\
- N += K = Y >> (8-3) & 8; Y >>= K;\
- N += K = Y >> (4-2) & 4; Y >>= K;\
- N += K = Y >> (2-1) & 2; Y >>= K;\
- N += K = Y >> (1-0) & 1; Y >>= K;\
- I = (bindex_t)(N + Y);\
-}
-#endif /* GNUC */
-
-
-/* ----------------------- Runtime Check Support ------------------------- */
-
-/*
- For security, the main invariant is that malloc/free/etc never
- writes to a static address other than malloc_state, unless static
- malloc_state itself has been corrupted, which cannot occur via
- malloc (because of these checks). In essence this means that we
- believe all pointers, sizes, maps etc held in malloc_state, but
- check all of those linked or offsetted from other embedded data
- structures. These checks are interspersed with main code in a way
- that tends to minimize their run-time cost.
-
- When FOOTERS is defined, in addition to range checking, we also
- verify footer fields of inuse chunks, which can be used guarantee
- that the mstate controlling malloc/free is intact. This is a
- streamlined version of the approach described by William Robertson
- et al in "Run-time Detection of Heap-based Overflows" LISA'03
- http://www.usenix.org/events/lisa03/tech/robertson.html The footer
- of an inuse chunk holds the xor of its mstate and a random seed,
- that is checked upon calls to free() and realloc(). This is
- (probablistically) unguessable from outside the program, but can be
- computed by any code successfully malloc'ing any chunk, so does not
- itself provide protection against code that has already broken
- security through some other means. Unlike Robertson et al, we
- always dynamically check addresses of all offset chunks (previous,
- next, etc). This turns out to be cheaper than relying on hashes.
-*/
-
-#if !INSECURE
-/* Check if address a is at least as high as any from MORECORE or MMAP */
-#define ok_address(M, a) ((char*)(a) >= (M)->least_addr)
-/* Check if address of next chunk n is higher than base chunk p */
-#define ok_next(p, n) ((char*)(p) < (char*)(n))
-/* Check if p has inuse status */
-#define ok_inuse(p) is_inuse(p)
-/* Check if p has its pinuse bit on */
-#define ok_pinuse(p) pinuse(p)
-
-#else /* !INSECURE */
-#define ok_address(M, a) (1)
-#define ok_next(b, n) (1)
-#define ok_inuse(p) (1)
-#define ok_pinuse(p) (1)
-#endif /* !INSECURE */
-
-#if (FOOTERS && !INSECURE)
-/* Check if (alleged) mstate m has expected magic field */
-#define ok_magic(M) ((M)->magic == mparams.magic)
-#else /* (FOOTERS && !INSECURE) */
-#define ok_magic(M) (1)
-#endif /* (FOOTERS && !INSECURE) */
-
-
-/* In gcc, use __builtin_expect to minimize impact of checks */
-#if !INSECURE
-#if defined(__GNUC__) && __GNUC__ >= 3
-#define RTCHECK(e) __builtin_expect(e, 1)
-#else /* GNUC */
-#define RTCHECK(e) (e)
-#endif /* GNUC */
-#else /* !INSECURE */
-#define RTCHECK(e) (1)
-#endif /* !INSECURE */
-
-/* macros to set up inuse chunks with or without footers */
-
-#if !FOOTERS
-
-#define mark_inuse_foot(M,p,s)
-
-/* Macros for setting head/foot of non-mmapped chunks */
-
-/* Set cinuse bit and pinuse bit of next chunk */
-#define set_inuse(M,p,s)\
- ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
- ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
-
-/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
-#define set_inuse_and_pinuse(M,p,s)\
- ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
- ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT)
-
-/* Set size, cinuse and pinuse bit of this chunk */
-#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
- ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
-
-#else /* FOOTERS */
-
-/* Set foot of inuse chunk to be xor of mstate and seed */
-#define mark_inuse_foot(M,p,s)\
- (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic))
-
-#define get_mstate_for(p)\
- ((mstate)(((mchunkptr)((char*)(p) +\
- (chunksize(p))))->prev_foot ^ mparams.magic))
-
-#define set_inuse(M,p,s)\
- ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
- (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \
- mark_inuse_foot(M,p,s))
-
-#define set_inuse_and_pinuse(M,p,s)\
- ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
- (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\
- mark_inuse_foot(M,p,s))
-
-#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
- ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
- mark_inuse_foot(M, p, s))
-
-#endif /* !FOOTERS */
-
-/* ---------------------------- setting mparams -------------------------- */
-
-#ifdef ENABLE_LARGE_PAGES
-typedef size_t (WINAPI *GetLargePageMinimum_t)(void);
-#endif
-
-/* Initialize mparams */
-static int init_mparams(void) {
-#ifdef NEED_GLOBAL_LOCK_INIT
- if (malloc_global_mutex_status <= 0)
- init_malloc_global_mutex();
-#endif
-
- ACQUIRE_MALLOC_GLOBAL_LOCK();
- if (mparams.magic == 0) {
- size_t magic;
- size_t psize;
- size_t gsize;
-
-#ifndef WIN32
- psize = malloc_getpagesize;
- gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize);
-#else /* WIN32 */
- {
- SYSTEM_INFO system_info;
- GetSystemInfo(&system_info);
- psize = system_info.dwPageSize;
- gsize = ((DEFAULT_GRANULARITY != 0)?
- DEFAULT_GRANULARITY : system_info.dwAllocationGranularity);
-#ifdef ENABLE_LARGE_PAGES
- {
- GetLargePageMinimum_t GetLargePageMinimum_ = (GetLargePageMinimum_t) GetProcAddress(GetModuleHandle(__T("kernel32.dll")), "GetLargePageMinimum");
- if(GetLargePageMinimum_) {
- size_t largepagesize = GetLargePageMinimum_();
- if(largepagesize) {
- psize = largepagesize;
- gsize = ((DEFAULT_GRANULARITY != 0)?
- DEFAULT_GRANULARITY : largepagesize);
- if(gsize < largepagesize) gsize = largepagesize;
- }
- }
- }
-#endif
- }
-#endif /* WIN32 */
-
- /* Sanity-check configuration:
- size_t must be unsigned and as wide as pointer type.
- ints must be at least 4 bytes.
- alignment must be at least 8.
- Alignment, min chunk size, and page size must all be powers of 2.
- */
- if ((sizeof(size_t) != sizeof(char*)) ||
- (MAX_SIZE_T < MIN_CHUNK_SIZE) ||
- (sizeof(int) < 4) ||
- (MALLOC_ALIGNMENT < (size_t)8U) ||
- ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) ||
- ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) ||
- ((gsize & (gsize-SIZE_T_ONE)) != 0) ||
- ((psize & (psize-SIZE_T_ONE)) != 0))
- ABORT;
-
- mparams.granularity = gsize;
- mparams.page_size = psize;
- mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
- mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
-#if MORECORE_CONTIGUOUS
- mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT;
-#else /* MORECORE_CONTIGUOUS */
- mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT;
-#endif /* MORECORE_CONTIGUOUS */
-
-#if !ONLY_MSPACES
- /* Set up lock for main malloc area */
- gm->mflags = mparams.default_mflags;
- INITIAL_LOCK(&gm->mutex);
-#endif
-
- {
-#if USE_DEV_RANDOM
- int fd;
- unsigned char buf[sizeof(size_t)];
- /* Try to use /dev/urandom, else fall back on using time */
- if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 &&
- read(fd, buf, sizeof(buf)) == sizeof(buf)) {
- magic = *((size_t *) buf);
- close(fd);
- }
- else
-#endif /* USE_DEV_RANDOM */
-#ifdef WIN32
- magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
-#else
- magic = (size_t)(time(0) ^ (size_t)0x55555555U);
-#endif
- magic |= (size_t)8U; /* ensure nonzero */
- magic &= ~(size_t)7U; /* improve chances of fault for bad values */
- mparams.magic = magic;
- }
- }
-
- RELEASE_MALLOC_GLOBAL_LOCK();
- return 1;
-}
-
-/* support for mallopt */
-static int change_mparam(int param_number, int value) {
- size_t val;
- ensure_initialization();
- val = (value == -1)? MAX_SIZE_T : (size_t)value;
- switch(param_number) {
- case M_TRIM_THRESHOLD:
- mparams.trim_threshold = val;
- return 1;
- case M_GRANULARITY:
- if (val >= mparams.page_size && ((val & (val-1)) == 0)) {
- mparams.granularity = val;
- return 1;
- }
- else
- return 0;
- case M_MMAP_THRESHOLD:
- mparams.mmap_threshold = val;
- return 1;
- default:
- return 0;
- }
-}
-
-#if DEBUG
-/* ------------------------- Debugging Support --------------------------- */
-
-/* Check properties of any chunk, whether free, inuse, mmapped etc */
-static void do_check_any_chunk(mstate m, mchunkptr p) {
- assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
- assert(ok_address(m, p));
-}
-
-/* Check properties of top chunk */
-static void do_check_top_chunk(mstate m, mchunkptr p) {
- msegmentptr sp = segment_holding(m, (char*)p);
- size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */
- assert(sp != 0);
- assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
- assert(ok_address(m, p));
- assert(sz == m->topsize);
- assert(sz > 0);
- assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE);
- assert(pinuse(p));
- assert(!pinuse(chunk_plus_offset(p, sz)));
-}
-
-/* Check properties of (inuse) mmapped chunks */
-static void do_check_mmapped_chunk(mstate m, mchunkptr p) {
- size_t sz = chunksize(p);
- size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD);
- assert(is_mmapped(p));
- assert(use_mmap(m));
- assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD));
- assert(ok_address(m, p));
- assert(!is_small(sz));
- assert((len & (mparams.page_size-SIZE_T_ONE)) == 0);
- assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD);
- assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0);
-}
-
-/* Check properties of inuse chunks */
-static void do_check_inuse_chunk(mstate m, mchunkptr p) {
- do_check_any_chunk(m, p);
- assert(is_inuse(p));
- assert(next_pinuse(p));
- /* If not pinuse and not mmapped, previous chunk has OK offset */
- assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p);
- if (is_mmapped(p))
- do_check_mmapped_chunk(m, p);
-}
-
-/* Check properties of free chunks */
-static void do_check_free_chunk(mstate m, mchunkptr p) {
- size_t sz = chunksize(p);
- mchunkptr next = chunk_plus_offset(p, sz);
- do_check_any_chunk(m, p);
- assert(!is_inuse(p));
- assert(!next_pinuse(p));
- assert (!is_mmapped(p));
- if (p != m->dv && p != m->top) {
- if (sz >= MIN_CHUNK_SIZE) {
- assert((sz & CHUNK_ALIGN_MASK) == 0);
- assert(is_aligned(chunk2mem(p)));
- assert(next->prev_foot == sz);
- assert(pinuse(p));
- assert (next == m->top || is_inuse(next));
- assert(p->fd->bk == p);
- assert(p->bk->fd == p);
- }
- else /* markers are always of size SIZE_T_SIZE */
- assert(sz == SIZE_T_SIZE);
- }
-}
-
-/* Check properties of malloced chunks at the point they are malloced */
-static void do_check_malloced_chunk(mstate m, void* mem, size_t s) {
- if (mem != 0) {
- mchunkptr p = mem2chunk(mem);
- size_t sz = p->head & ~INUSE_BITS;
- do_check_inuse_chunk(m, p);
- assert((sz & CHUNK_ALIGN_MASK) == 0);
- assert(sz >= MIN_CHUNK_SIZE);
- assert(sz >= s);
- /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */
- assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE));
- }
-}
-
-/* Check a tree and its subtrees. */
-static void do_check_tree(mstate m, tchunkptr t) {
- tchunkptr head = 0;
- tchunkptr u = t;
- bindex_t tindex = t->index;
- size_t tsize = chunksize(t);
- bindex_t idx;
- compute_tree_index(tsize, idx);
- assert(tindex == idx);
- assert(tsize >= MIN_LARGE_SIZE);
- assert(tsize >= minsize_for_tree_index(idx));
- assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1))));
-
- do { /* traverse through chain of same-sized nodes */
- do_check_any_chunk(m, ((mchunkptr)u));
- assert(u->index == tindex);
- assert(chunksize(u) == tsize);
- assert(!is_inuse(u));
- assert(!next_pinuse(u));
- assert(u->fd->bk == u);
- assert(u->bk->fd == u);
- if (u->parent == 0) {
- assert(u->child[0] == 0);
- assert(u->child[1] == 0);
- }
- else {
- assert(head == 0); /* only one node on chain has parent */
- head = u;
- assert(u->parent != u);
- assert (u->parent->child[0] == u ||
- u->parent->child[1] == u ||
- *((tbinptr*)(u->parent)) == u);
- if (u->child[0] != 0) {
- assert(u->child[0]->parent == u);
- assert(u->child[0] != u);
- do_check_tree(m, u->child[0]);
- }
- if (u->child[1] != 0) {
- assert(u->child[1]->parent == u);
- assert(u->child[1] != u);
- do_check_tree(m, u->child[1]);
- }
- if (u->child[0] != 0 && u->child[1] != 0) {
- assert(chunksize(u->child[0]) < chunksize(u->child[1]));
- }
- }
- u = u->fd;
- } while (u != t);
- assert(head != 0);
-}
-
-/* Check all the chunks in a treebin. */
-static void do_check_treebin(mstate m, bindex_t i) {
- tbinptr* tb = treebin_at(m, i);
- tchunkptr t = *tb;
- int empty = (m->treemap & (1U << i)) == 0;
- if (t == 0)
- assert(empty);
- if (!empty)
- do_check_tree(m, t);
-}
-
-/* Check all the chunks in a smallbin. */
-static void do_check_smallbin(mstate m, bindex_t i) {
- sbinptr b = smallbin_at(m, i);
- mchunkptr p = b->bk;
- unsigned int empty = (m->smallmap & (1U << i)) == 0;
- if (p == b)
- assert(empty);
- if (!empty) {
- for (; p != b; p = p->bk) {
- size_t size = chunksize(p);
- mchunkptr q;
- /* each chunk claims to be free */
- do_check_free_chunk(m, p);
- /* chunk belongs in bin */
- assert(small_index(size) == i);
- assert(p->bk == b || chunksize(p->bk) == chunksize(p));
- /* chunk is followed by an inuse chunk */
- q = next_chunk(p);
- if (q->head != FENCEPOST_HEAD)
- do_check_inuse_chunk(m, q);
- }
- }
-}
-
-/* Find x in a bin. Used in other check functions. */
-static int bin_find(mstate m, mchunkptr x) {
- size_t size = chunksize(x);
- if (is_small(size)) {
- bindex_t sidx = small_index(size);
- sbinptr b = smallbin_at(m, sidx);
- if (smallmap_is_marked(m, sidx)) {
- mchunkptr p = b;
- do {
- if (p == x)
- return 1;
- } while ((p = p->fd) != b);
- }
- }
- else {
- bindex_t tidx;
- compute_tree_index(size, tidx);
- if (treemap_is_marked(m, tidx)) {
- tchunkptr t = *treebin_at(m, tidx);
- size_t sizebits = size << leftshift_for_tree_index(tidx);
- while (t != 0 && chunksize(t) != size) {
- t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
- sizebits <<= 1;
- }
- if (t != 0) {
- tchunkptr u = t;
- do {
- if (u == (tchunkptr)x)
- return 1;
- } while ((u = u->fd) != t);
- }
- }
- }
- return 0;
-}
-
-/* Traverse each chunk and check it; return total */
-static size_t traverse_and_check(mstate m) {
- size_t sum = 0;
- if (is_initialized(m)) {
- msegmentptr s = &m->seg;
- sum += m->topsize + TOP_FOOT_SIZE;
- while (s != 0) {
- mchunkptr q = align_as_chunk(s->base);
- mchunkptr lastq = 0;
- assert(pinuse(q));
- while (segment_holds(s, q) &&
- q != m->top && q->head != FENCEPOST_HEAD) {
- sum += chunksize(q);
- if (is_inuse(q)) {
- assert(!bin_find(m, q));
- do_check_inuse_chunk(m, q);
- }
- else {
- assert(q == m->dv || bin_find(m, q));
- assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */
- do_check_free_chunk(m, q);
- }
- lastq = q;
- q = next_chunk(q);
- }
- s = s->next;
- }
- }
- return sum;
-}
-
-/* Check all properties of malloc_state. */
-static void do_check_malloc_state(mstate m) {
- bindex_t i;
- size_t total;
- /* check bins */
- for (i = 0; i < NSMALLBINS; ++i)
- do_check_smallbin(m, i);
- for (i = 0; i < NTREEBINS; ++i)
- do_check_treebin(m, i);
-
- if (m->dvsize != 0) { /* check dv chunk */
- do_check_any_chunk(m, m->dv);
- assert(m->dvsize == chunksize(m->dv));
- assert(m->dvsize >= MIN_CHUNK_SIZE);
- assert(bin_find(m, m->dv) == 0);
- }
-
- if (m->top != 0) { /* check top chunk */
- do_check_top_chunk(m, m->top);
- /*assert(m->topsize == chunksize(m->top)); redundant */
- assert(m->topsize > 0);
- assert(bin_find(m, m->top) == 0);
- }
-
- total = traverse_and_check(m);
- assert(total <= m->footprint);
- assert(m->footprint <= m->max_footprint);
-}
-#endif /* DEBUG */
-
-/* ----------------------------- statistics ------------------------------ */
-
-#if !NO_MALLINFO
-static struct mallinfo internal_mallinfo(mstate m) {
- struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
- ensure_initialization();
- if (!PREACTION(m)) {
- check_malloc_state(m);
- if (is_initialized(m)) {
- size_t nfree = SIZE_T_ONE; /* top always free */
- size_t mfree = m->topsize + TOP_FOOT_SIZE;
- size_t sum = mfree;
- msegmentptr s = &m->seg;
- while (s != 0) {
- mchunkptr q = align_as_chunk(s->base);
- while (segment_holds(s, q) &&
- q != m->top && q->head != FENCEPOST_HEAD) {
- size_t sz = chunksize(q);
- sum += sz;
- if (!is_inuse(q)) {
- mfree += sz;
- ++nfree;
- }
- q = next_chunk(q);
- }
- s = s->next;
- }
-
- nm.arena = sum;
- nm.ordblks = nfree;
- nm.hblkhd = m->footprint - sum;
- nm.usmblks = m->max_footprint;
- nm.uordblks = m->footprint - mfree;
- nm.fordblks = mfree;
- nm.keepcost = m->topsize;
- }
-
- POSTACTION(m);
- }
- return nm;
-}
-#endif /* !NO_MALLINFO */
-
-static void internal_malloc_stats(mstate m) {
- ensure_initialization();
- if (!PREACTION(m)) {
- size_t maxfp = 0;
- size_t fp = 0;
- size_t used = 0;
- check_malloc_state(m);
- if (is_initialized(m)) {
- msegmentptr s = &m->seg;
- maxfp = m->max_footprint;
- fp = m->footprint;
- used = fp - (m->topsize + TOP_FOOT_SIZE);
-
- while (s != 0) {
- mchunkptr q = align_as_chunk(s->base);
- while (segment_holds(s, q) &&
- q != m->top && q->head != FENCEPOST_HEAD) {
- if (!is_inuse(q))
- used -= chunksize(q);
- q = next_chunk(q);
- }
- s = s->next;
- }
- }
-
- fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp));
- fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp));
- fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used));
-
- POSTACTION(m);
- }
-}
-
-/* ----------------------- Operations on smallbins ----------------------- */
-
-/*
- Various forms of linking and unlinking are defined as macros. Even
- the ones for trees, which are very long but have very short typical
- paths. This is ugly but reduces reliance on inlining support of
- compilers.
-*/
-
-/* Link a free chunk into a smallbin */
-#define insert_small_chunk(M, P, S) {\
- bindex_t I = small_index(S);\
- mchunkptr B = smallbin_at(M, I);\
- mchunkptr F = B;\
- assert(S >= MIN_CHUNK_SIZE);\
- if (!smallmap_is_marked(M, I))\
- mark_smallmap(M, I);\
- else if (RTCHECK(ok_address(M, B->fd)))\
- F = B->fd;\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- B->fd = P;\
- F->bk = P;\
- P->fd = F;\
- P->bk = B;\
-}
-
-/* Unlink a chunk from a smallbin */
-#define unlink_small_chunk(M, P, S) {\
- mchunkptr F = P->fd;\
- mchunkptr B = P->bk;\
- bindex_t I = small_index(S);\
- assert(P != B);\
- assert(P != F);\
- assert(chunksize(P) == small_index2size(I));\
- if (F == B)\
- clear_smallmap(M, I);\
- else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\
- (B == smallbin_at(M,I) || ok_address(M, B)))) {\
- F->bk = B;\
- B->fd = F;\
- }\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- }\
-}
-
-/* Unlink the first chunk from a smallbin */
-#define unlink_first_small_chunk(M, B, P, I) {\
- mchunkptr F = P->fd;\
- assert(P != B);\
- assert(P != F);\
- assert(chunksize(P) == small_index2size(I));\
- if (B == F)\
- clear_smallmap(M, I);\
- else if (RTCHECK(ok_address(M, F))) {\
- B->fd = F;\
- F->bk = B;\
- }\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- }\
-}
-
-
-
-/* Replace dv node, binning the old one */
-/* Used only when dvsize known to be small */
-#define replace_dv(M, P, S) {\
- size_t DVS = M->dvsize;\
- if (DVS != 0) {\
- mchunkptr DV = M->dv;\
- assert(is_small(DVS));\
- insert_small_chunk(M, DV, DVS);\
- }\
- M->dvsize = S;\
- M->dv = P;\
-}
-
-/* ------------------------- Operations on trees ------------------------- */
-
-/* Insert chunk into tree */
-#define insert_large_chunk(M, X, S) {\
- tbinptr* H;\
- bindex_t I;\
- compute_tree_index(S, I);\
- H = treebin_at(M, I);\
- X->index = I;\
- X->child[0] = X->child[1] = 0;\
- if (!treemap_is_marked(M, I)) {\
- mark_treemap(M, I);\
- *H = X;\
- X->parent = (tchunkptr)H;\
- X->fd = X->bk = X;\
- }\
- else {\
- tchunkptr T = *H;\
- size_t K = S << leftshift_for_tree_index(I);\
- for (;;) {\
- if (chunksize(T) != S) {\
- tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
- K <<= 1;\
- if (*C != 0)\
- T = *C;\
- else if (RTCHECK(ok_address(M, C))) {\
- *C = X;\
- X->parent = T;\
- X->fd = X->bk = X;\
- break;\
- }\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- break;\
- }\
- }\
- else {\
- tchunkptr F = T->fd;\
- if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\
- T->fd = F->bk = X;\
- X->fd = F;\
- X->bk = T;\
- X->parent = 0;\
- break;\
- }\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- break;\
- }\
- }\
- }\
- }\
-}
-
-/*
- Unlink steps:
-
- 1. If x is a chained node, unlink it from its same-sized fd/bk links
- and choose its bk node as its replacement.
- 2. If x was the last node of its size, but not a leaf node, it must
- be replaced with a leaf node (not merely one with an open left or
- right), to make sure that lefts and rights of descendents
- correspond properly to bit masks. We use the rightmost descendent
- of x. We could use any other leaf, but this is easy to locate and
- tends to counteract removal of leftmosts elsewhere, and so keeps
- paths shorter than minimally guaranteed. This doesn't loop much
- because on average a node in a tree is near the bottom.
- 3. If x is the base of a chain (i.e., has parent links) relink
- x's parent and children to x's replacement (or null if none).
-*/
-
-#define unlink_large_chunk(M, X) {\
- tchunkptr XP = X->parent;\
- tchunkptr R;\
- if (X->bk != X) {\
- tchunkptr F = X->fd;\
- R = X->bk;\
- if (RTCHECK(ok_address(M, F))) {\
- F->bk = R;\
- R->fd = F;\
- }\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- }\
- else {\
- tchunkptr* RP;\
- if (((R = *(RP = &(X->child[1]))) != 0) ||\
- ((R = *(RP = &(X->child[0]))) != 0)) {\
- tchunkptr* CP;\
- while ((*(CP = &(R->child[1])) != 0) ||\
- (*(CP = &(R->child[0])) != 0)) {\
- R = *(RP = CP);\
- }\
- if (RTCHECK(ok_address(M, RP)))\
- *RP = 0;\
- else {\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- }\
- }\
- if (XP != 0) {\
- tbinptr* H = treebin_at(M, X->index);\
- if (X == *H) {\
- if ((*H = R) == 0) \
- clear_treemap(M, X->index);\
- }\
- else if (RTCHECK(ok_address(M, XP))) {\
- if (XP->child[0] == X) \
- XP->child[0] = R;\
- else \
- XP->child[1] = R;\
- }\
- else\
- CORRUPTION_ERROR_ACTION(M);\
- if (R != 0) {\
- if (RTCHECK(ok_address(M, R))) {\
- tchunkptr C0, C1;\
- R->parent = XP;\
- if ((C0 = X->child[0]) != 0) {\
- if (RTCHECK(ok_address(M, C0))) {\
- R->child[0] = C0;\
- C0->parent = R;\
- }\
- else\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- if ((C1 = X->child[1]) != 0) {\
- if (RTCHECK(ok_address(M, C1))) {\
- R->child[1] = C1;\
- C1->parent = R;\
- }\
- else\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- }\
- else\
- CORRUPTION_ERROR_ACTION(M);\
- }\
- }\
-}
-
-/* Relays to large vs small bin operations */
-
-#define insert_chunk(M, P, S)\
- if (is_small(S)) insert_small_chunk(M, P, S)\
- else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
-
-#define unlink_chunk(M, P, S)\
- if (is_small(S)) unlink_small_chunk(M, P, S)\
- else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
-
-
-/* Relays to internal calls to malloc/free from realloc, memalign etc */
-
-#if ONLY_MSPACES
-#define internal_malloc(m, b) mspace_malloc(m, b)
-#define internal_free(m, mem) mspace_free(m,mem);
-#else /* ONLY_MSPACES */
-#if MSPACES
-#define internal_malloc(m, b)\
- (m == gm)? dlmalloc(b) : mspace_malloc(m, b)
-#define internal_free(m, mem)\
- if (m == gm) dlfree(mem); else mspace_free(m,mem);
-#else /* MSPACES */
-#define internal_malloc(m, b) dlmalloc(b)
-#define internal_free(m, mem) dlfree(mem)
-#endif /* MSPACES */
-#endif /* ONLY_MSPACES */
-
-/* ----------------------- Direct-mmapping chunks ----------------------- */
-
-/*
- Directly mmapped chunks are set up with an offset to the start of
- the mmapped region stored in the prev_foot field of the chunk. This
- allows reconstruction of the required argument to MUNMAP when freed,
- and also allows adjustment of the returned chunk to meet alignment
- requirements (especially in memalign).
-*/
-
-/* Malloc using mmap */
-static void* mmap_alloc(mstate m, size_t nb) {
- size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
- if (mmsize > nb) { /* Check for wrap around 0 */
- char* mm = (char*)(CALL_DIRECT_MMAP(mmsize));
- if (mm != CMFAIL) {
- size_t offset = align_offset(chunk2mem(mm));
- size_t psize = mmsize - offset - MMAP_FOOT_PAD;
- mchunkptr p = (mchunkptr)(mm + offset);
- p->prev_foot = offset;
- p->head = psize;
- mark_inuse_foot(m, p, psize);
- chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
- chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
-
- if (m->least_addr == 0 || mm < m->least_addr)
- m->least_addr = mm;
- if ((m->footprint += mmsize) > m->max_footprint)
- m->max_footprint = m->footprint;
- assert(is_aligned(chunk2mem(p)));
- check_mmapped_chunk(m, p);
- return chunk2mem(p);
- }
- }
- return 0;
-}
-
-/* Realloc using mmap */
-static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) {
- size_t oldsize = chunksize(oldp);
- if (is_small(nb)) /* Can't shrink mmap regions below small size */
- return 0;
- /* Keep old chunk if big enough but not too big */
- if (oldsize >= nb + SIZE_T_SIZE &&
- (oldsize - nb) <= (mparams.granularity << 1))
- return oldp;
- else {
- size_t offset = oldp->prev_foot;
- size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD;
- size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
- char* cp = (char*)CALL_MREMAP((char*)oldp - offset,
- oldmmsize, newmmsize, 1);
- if (cp != CMFAIL) {
- mchunkptr newp = (mchunkptr)(cp + offset);
- size_t psize = newmmsize - offset - MMAP_FOOT_PAD;
- newp->head = psize;
- mark_inuse_foot(m, newp, psize);
- chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
- chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
-
- if (cp < m->least_addr)
- m->least_addr = cp;
- if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint)
- m->max_footprint = m->footprint;
- check_mmapped_chunk(m, newp);
- return newp;
- }
- }
- return 0;
-}
-
-/* -------------------------- mspace management -------------------------- */
-
-/* Initialize top chunk and its size */
-static void init_top(mstate m, mchunkptr p, size_t psize) {
- /* Ensure alignment */
- size_t offset = align_offset(chunk2mem(p));
- p = (mchunkptr)((char*)p + offset);
- psize -= offset;
-
- m->top = p;
- m->topsize = psize;
- p->head = psize | PINUSE_BIT;
- /* set size of fake trailing chunk holding overhead space only once */
- chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
- m->trim_check = mparams.trim_threshold; /* reset on each update */
-}
-
-/* Initialize bins for a new mstate that is otherwise zeroed out */
-static void init_bins(mstate m) {
- /* Establish circular links for smallbins */
- bindex_t i;
- for (i = 0; i < NSMALLBINS; ++i) {
- sbinptr bin = smallbin_at(m,i);
- bin->fd = bin->bk = bin;
- }
-}
-
-#if PROCEED_ON_ERROR
-
-/* default corruption action */
-static void reset_on_error(mstate m) {
- int i;
- ++malloc_corruption_error_count;
- /* Reinitialize fields to forget about all memory */
- m->smallbins = m->treebins = 0;
- m->dvsize = m->topsize = 0;
- m->seg.base = 0;
- m->seg.size = 0;
- m->seg.next = 0;
- m->top = m->dv = 0;
- for (i = 0; i < NTREEBINS; ++i)
- *treebin_at(m, i) = 0;
- init_bins(m);
-}
-#endif /* PROCEED_ON_ERROR */
-
-/* Allocate chunk and prepend remainder with chunk in successor base. */
-static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
- size_t nb) {
- mchunkptr p = align_as_chunk(newbase);
- mchunkptr oldfirst = align_as_chunk(oldbase);
- size_t psize = (char*)oldfirst - (char*)p;
- mchunkptr q = chunk_plus_offset(p, nb);
- size_t qsize = psize - nb;
- set_size_and_pinuse_of_inuse_chunk(m, p, nb);
-
- assert((char*)oldfirst > (char*)q);
- assert(pinuse(oldfirst));
- assert(qsize >= MIN_CHUNK_SIZE);
-
- /* consolidate remainder with first chunk of old base */
- if (oldfirst == m->top) {
- size_t tsize = m->topsize += qsize;
- m->top = q;
- q->head = tsize | PINUSE_BIT;
- check_top_chunk(m, q);
- }
- else if (oldfirst == m->dv) {
- size_t dsize = m->dvsize += qsize;
- m->dv = q;
- set_size_and_pinuse_of_free_chunk(q, dsize);
- }
- else {
- if (!is_inuse(oldfirst)) {
- size_t nsize = chunksize(oldfirst);
- unlink_chunk(m, oldfirst, nsize);
- oldfirst = chunk_plus_offset(oldfirst, nsize);
- qsize += nsize;
- }
- set_free_with_pinuse(q, qsize, oldfirst);
- insert_chunk(m, q, qsize);
- check_free_chunk(m, q);
- }
-
- check_malloced_chunk(m, chunk2mem(p), nb);
- return chunk2mem(p);
-}
-
-/* Add a segment to hold a new noncontiguous region */
-static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
- /* Determine locations and sizes of segment, fenceposts, old top */
- char* old_top = (char*)m->top;
- msegmentptr oldsp = segment_holding(m, old_top);
- char* old_end = oldsp->base + oldsp->size;
- size_t ssize = pad_request(sizeof(struct malloc_segment));
- char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
- size_t offset = align_offset(chunk2mem(rawsp));
- char* asp = rawsp + offset;
- char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
- mchunkptr sp = (mchunkptr)csp;
- msegmentptr ss = (msegmentptr)(chunk2mem(sp));
- mchunkptr tnext = chunk_plus_offset(sp, ssize);
- mchunkptr p = tnext;
- int nfences = 0;
-
- /* reset top to new space */
- init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
-
- /* Set up segment record */
- assert(is_aligned(ss));
- set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
- *ss = m->seg; /* Push current record */
- m->seg.base = tbase;
- m->seg.size = tsize;
- m->seg.sflags = mmapped;
- m->seg.next = ss;
-
- /* Insert trailing fenceposts */
- for (;;) {
- mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
- p->head = FENCEPOST_HEAD;
- ++nfences;
- if ((char*)(&(nextp->head)) < old_end)
- p = nextp;
- else
- break;
- }
- assert(nfences >= 2);
-
- /* Insert the rest of old top into a bin as an ordinary free chunk */
- if (csp != old_top) {
- mchunkptr q = (mchunkptr)old_top;
- size_t psize = csp - old_top;
- mchunkptr tn = chunk_plus_offset(q, psize);
- set_free_with_pinuse(q, psize, tn);
- insert_chunk(m, q, psize);
- }
-
- check_top_chunk(m, m->top);
-}
-
-/* -------------------------- System allocation -------------------------- */
-
-/* Get memory from system using MORECORE or MMAP */
-static void* sys_alloc(mstate m, size_t nb) {
- char* tbase = CMFAIL;
- size_t tsize = 0;
- flag_t mmap_flag = 0;
-
- ensure_initialization();
-
- /* Directly map large chunks, but only if already initialized */
- if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) {
- void* mem = mmap_alloc(m, nb);
- if (mem != 0)
- return mem;
- }
-
- /*
- Try getting memory in any of three ways (in most-preferred to
- least-preferred order):
- 1. A call to MORECORE that can normally contiguously extend memory.
- (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or
- or main space is mmapped or a previous contiguous call failed)
- 2. A call to MMAP new space (disabled if not HAVE_MMAP).
- Note that under the default settings, if MORECORE is unable to
- fulfill a request, and HAVE_MMAP is true, then mmap is
- used as a noncontiguous system allocator. This is a useful backup
- strategy for systems with holes in address spaces -- in this case
- sbrk cannot contiguously expand the heap, but mmap may be able to
- find space.
- 3. A call to MORECORE that cannot usually contiguously extend memory.
- (disabled if not HAVE_MORECORE)
-
- In all cases, we need to request enough bytes from system to ensure
- we can malloc nb bytes upon success, so pad with enough space for
- top_foot, plus alignment-pad to make sure we don't lose bytes if
- not on boundary, and round this up to a granularity unit.
- */
-
- if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) {
- char* br = CMFAIL;
- msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top);
- size_t asize = 0;
- ACQUIRE_MALLOC_GLOBAL_LOCK();
-
- if (ss == 0) { /* First time through or recovery */
- char* base = (char*)CALL_MORECORE(0);
- if (base != CMFAIL) {
- asize = granularity_align(nb + SYS_ALLOC_PADDING);
- /* Adjust to end on a page boundary */
- if (!is_page_aligned(base))
- asize += (page_align((size_t)base) - (size_t)base);
- /* Can't call MORECORE if size is negative when treated as signed */
- if (asize < HALF_MAX_SIZE_T &&
- (br = (char*)(CALL_MORECORE(asize))) == base) {
- tbase = base;
- tsize = asize;
- }
- }
- }
- else {
- /* Subtract out existing available top space from MORECORE request. */
- asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING);
- /* Use mem here only if it did continuously extend old space */
- if (asize < HALF_MAX_SIZE_T &&
- (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) {
- tbase = br;
- tsize = asize;
- }
- }
-
- if (tbase == CMFAIL) { /* Cope with partial failure */
- if (br != CMFAIL) { /* Try to use/extend the space we did get */
- if (asize < HALF_MAX_SIZE_T &&
- asize < nb + SYS_ALLOC_PADDING) {
- size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize);
- if (esize < HALF_MAX_SIZE_T) {
- char* end = (char*)CALL_MORECORE(esize);
- if (end != CMFAIL)
- asize += esize;
- else { /* Can't use; try to release */
- (void) CALL_MORECORE(-asize);
- br = CMFAIL;
- }
- }
- }
- }
- if (br != CMFAIL) { /* Use the space we did get */
- tbase = br;
- tsize = asize;
- }
- else
- disable_contiguous(m); /* Don't try contiguous path in the future */
- }
-
- RELEASE_MALLOC_GLOBAL_LOCK();
- }
-
- if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */
- size_t rsize = granularity_align(nb + SYS_ALLOC_PADDING);
- if (rsize > nb) { /* Fail if wraps around zero */
- char* mp = (char*)(CALL_MMAP(rsize));
- if (mp != CMFAIL) {
- tbase = mp;
- tsize = rsize;
- mmap_flag = USE_MMAP_BIT;
- }
- }
- }
-
- if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */
- size_t asize = granularity_align(nb + SYS_ALLOC_PADDING);
- if (asize < HALF_MAX_SIZE_T) {
- char* br = CMFAIL;
- char* end = CMFAIL;
- ACQUIRE_MALLOC_GLOBAL_LOCK();
- br = (char*)(CALL_MORECORE(asize));
- end = (char*)(CALL_MORECORE(0));
- RELEASE_MALLOC_GLOBAL_LOCK();
- if (br != CMFAIL && end != CMFAIL && br < end) {
- size_t ssize = end - br;
- if (ssize > nb + TOP_FOOT_SIZE) {
- tbase = br;
- tsize = ssize;
- }
- }
- }
- }
-
- if (tbase != CMFAIL) {
-
- if ((m->footprint += tsize) > m->max_footprint)
- m->max_footprint = m->footprint;
-
- if (!is_initialized(m)) { /* first-time initialization */
- if (m->least_addr == 0 || tbase < m->least_addr)
- m->least_addr = tbase;
- m->seg.base = tbase;
- m->seg.size = tsize;
- m->seg.sflags = mmap_flag;
- m->magic = mparams.magic;
- m->release_checks = MAX_RELEASE_CHECK_RATE;
- init_bins(m);
-#if !ONLY_MSPACES
- if (is_global(m))
- init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
- else
-#endif
- {
- /* Offset top by embedded malloc_state */
- mchunkptr mn = next_chunk(mem2chunk(m));
- init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE);
- }
- }
-
- else {
- /* Try to merge with an existing segment */
- msegmentptr sp = &m->seg;
- /* Only consider most recent segment if traversal suppressed */
- while (sp != 0 && tbase != sp->base + sp->size)
- sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
- if (sp != 0 &&
- !is_extern_segment(sp) &&
- (sp->sflags & USE_MMAP_BIT) == mmap_flag &&
- segment_holds(sp, m->top)) { /* append */
- sp->size += tsize;
- init_top(m, m->top, m->topsize + tsize);
- }
- else {
- if (tbase < m->least_addr)
- m->least_addr = tbase;
- sp = &m->seg;
- while (sp != 0 && sp->base != tbase + tsize)
- sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next;
- if (sp != 0 &&
- !is_extern_segment(sp) &&
- (sp->sflags & USE_MMAP_BIT) == mmap_flag) {
- char* oldbase = sp->base;
- sp->base = tbase;
- sp->size += tsize;
- return prepend_alloc(m, tbase, oldbase, nb);
- }
- else
- add_segment(m, tbase, tsize, mmap_flag);
- }
- }
-
- if (nb < m->topsize) { /* Allocate from new or extended top space */
- size_t rsize = m->topsize -= nb;
- mchunkptr p = m->top;
- mchunkptr r = m->top = chunk_plus_offset(p, nb);
- r->head = rsize | PINUSE_BIT;
- set_size_and_pinuse_of_inuse_chunk(m, p, nb);
- check_top_chunk(m, m->top);
- check_malloced_chunk(m, chunk2mem(p), nb);
- return chunk2mem(p);
- }
- }
-
- MALLOC_FAILURE_ACTION;
- return 0;
-}
-
-/* ----------------------- system deallocation -------------------------- */
-
-/* Unmap and unlink any mmapped segments that don't contain used chunks */
-static size_t release_unused_segments(mstate m) {
- size_t released = 0;
- int nsegs = 0;
- msegmentptr pred = &m->seg;
- msegmentptr sp = pred->next;
- while (sp != 0) {
- char* base = sp->base;
- size_t size = sp->size;
- msegmentptr next = sp->next;
- ++nsegs;
- if (is_mmapped_segment(sp) && !is_extern_segment(sp)) {
- mchunkptr p = align_as_chunk(base);
- size_t psize = chunksize(p);
- /* Can unmap if first chunk holds entire segment and not pinned */
- if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) {
- tchunkptr tp = (tchunkptr)p;
- assert(segment_holds(sp, (char*)sp));
- if (p == m->dv) {
- m->dv = 0;
- m->dvsize = 0;
- }
- else {
- unlink_large_chunk(m, tp);
- }
- if (CALL_MUNMAP(base, size) == 0) {
- released += size;
- m->footprint -= size;
- /* unlink obsoleted record */
- sp = pred;
- sp->next = next;
- }
- else { /* back out if cannot unmap */
- insert_large_chunk(m, tp, psize);
- }
- }
- }
- if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */
- break;
- pred = sp;
- sp = next;
- }
- /* Reset check counter */
- m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)?
- nsegs : MAX_RELEASE_CHECK_RATE);
- return released;
-}
-
-static int sys_trim(mstate m, size_t pad) {
- size_t released = 0;
- ensure_initialization();
- if (pad < MAX_REQUEST && is_initialized(m)) {
- pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
-
- if (m->topsize > pad) {
- /* Shrink top space in granularity-size units, keeping at least one */
- size_t unit = mparams.granularity;
- size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
- SIZE_T_ONE) * unit;
- msegmentptr sp = segment_holding(m, (char*)m->top);
-
- if (!is_extern_segment(sp)) {
- if (is_mmapped_segment(sp)) {
- if (HAVE_MMAP &&
- sp->size >= extra &&
- !has_segment_link(m, sp)) { /* can't shrink if pinned */
- size_t newsize = sp->size - extra;
- /* Prefer mremap, fall back to munmap */
- if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
- (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
- released = extra;
- }
- }
- }
- else if (HAVE_MORECORE) {
- if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */
- extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit;
- ACQUIRE_MALLOC_GLOBAL_LOCK();
- {
- /* Make sure end of memory is where we last set it. */
- char* old_br = (char*)(CALL_MORECORE(0));
- if (old_br == sp->base + sp->size) {
- char* rel_br = (char*)(CALL_MORECORE(-extra));
- char* new_br = (char*)(CALL_MORECORE(0));
- if (rel_br != CMFAIL && new_br < old_br)
- released = old_br - new_br;
- }
- }
- RELEASE_MALLOC_GLOBAL_LOCK();
- }
- }
-
- if (released != 0) {
- sp->size -= released;
- m->footprint -= released;
- init_top(m, m->top, m->topsize - released);
- check_top_chunk(m, m->top);
- }
- }
-
- /* Unmap any unused mmapped segments */
- if (HAVE_MMAP)
- released += release_unused_segments(m);
-
- /* On failure, disable autotrim to avoid repeated failed future calls */
- if (released == 0 && m->topsize > m->trim_check)
- m->trim_check = MAX_SIZE_T;
- }
-
- return (released != 0)? 1 : 0;
-}
-
-
-/* ---------------------------- malloc support --------------------------- */
-
-/* allocate a large request from the best fitting chunk in a treebin */
-static void* tmalloc_large(mstate m, size_t nb) {
- tchunkptr v = 0;
- size_t rsize = -nb; /* Unsigned negation */
- tchunkptr t;
- bindex_t idx;
- compute_tree_index(nb, idx);
- if ((t = *treebin_at(m, idx)) != 0) {
- /* Traverse tree for this bin looking for node with size == nb */
- size_t sizebits = nb << leftshift_for_tree_index(idx);
- tchunkptr rst = 0; /* The deepest untaken right subtree */
- for (;;) {
- tchunkptr rt;
- size_t trem = chunksize(t) - nb;
- if (trem < rsize) {
- v = t;
- if ((rsize = trem) == 0)
- break;
- }
- rt = t->child[1];
- t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
- if (rt != 0 && rt != t)
- rst = rt;
- if (t == 0) {
- t = rst; /* set t to least subtree holding sizes > nb */
- break;
- }
- sizebits <<= 1;
- }
- }
- if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
- binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
- if (leftbits != 0) {
- bindex_t i;
- binmap_t leastbit = least_bit(leftbits);
- compute_bit2idx(leastbit, i);
- t = *treebin_at(m, i);
- }
- }
-
- while (t != 0) { /* find smallest of tree or subtree */
- size_t trem = chunksize(t) - nb;
- if (trem < rsize) {
- rsize = trem;
- v = t;
- }
- t = leftmost_child(t);
- }
-
- /* If dv is a better fit, return 0 so malloc will use it */
- if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
- if (RTCHECK(ok_address(m, v))) { /* split */
- mchunkptr r = chunk_plus_offset(v, nb);
- assert(chunksize(v) == rsize + nb);
- if (RTCHECK(ok_next(v, r))) {
- unlink_large_chunk(m, v);
- if (rsize < MIN_CHUNK_SIZE)
- set_inuse_and_pinuse(m, v, (rsize + nb));
- else {
- set_size_and_pinuse_of_inuse_chunk(m, v, nb);
- set_size_and_pinuse_of_free_chunk(r, rsize);
- insert_chunk(m, r, rsize);
- }
- return chunk2mem(v);
- }
- }
- CORRUPTION_ERROR_ACTION(m);
- }
- return 0;
-}
-
-/* allocate a small request from the best fitting chunk in a treebin */
-static void* tmalloc_small(mstate m, size_t nb) {
- tchunkptr t, v;
- size_t rsize;
- bindex_t i;
- binmap_t leastbit = least_bit(m->treemap);
- compute_bit2idx(leastbit, i);
- v = t = *treebin_at(m, i);
- rsize = chunksize(t) - nb;
-
- while ((t = leftmost_child(t)) != 0) {
- size_t trem = chunksize(t) - nb;
- if (trem < rsize) {
- rsize = trem;
- v = t;
- }
- }
-
- if (RTCHECK(ok_address(m, v))) {
- mchunkptr r = chunk_plus_offset(v, nb);
- assert(chunksize(v) == rsize + nb);
- if (RTCHECK(ok_next(v, r))) {
- unlink_large_chunk(m, v);
- if (rsize < MIN_CHUNK_SIZE)
- set_inuse_and_pinuse(m, v, (rsize + nb));
- else {
- set_size_and_pinuse_of_inuse_chunk(m, v, nb);
- set_size_and_pinuse_of_free_chunk(r, rsize);
- replace_dv(m, r, rsize);
- }
- return chunk2mem(v);
- }
- }
-
- CORRUPTION_ERROR_ACTION(m);
- return 0;
-}
-
-/* --------------------------- realloc support --------------------------- */
-
-static void* internal_realloc(mstate m, void* oldmem, size_t bytes) {
- if (bytes >= MAX_REQUEST) {
- MALLOC_FAILURE_ACTION;
- return 0;
- }
- if (!PREACTION(m)) {
- mchunkptr oldp = mem2chunk(oldmem);
- size_t oldsize = chunksize(oldp);
- mchunkptr next = chunk_plus_offset(oldp, oldsize);
- mchunkptr newp = 0;
- void* extra = 0;
-
- /* Try to either shrink or extend into top. Else malloc-copy-free */
-
- if (RTCHECK(ok_address(m, oldp) && ok_inuse(oldp) &&
- ok_next(oldp, next) && ok_pinuse(next))) {
- size_t nb = request2size(bytes);
- if (is_mmapped(oldp))
- newp = mmap_resize(m, oldp, nb);
- else if (oldsize >= nb) { /* already big enough */
- size_t rsize = oldsize - nb;
- newp = oldp;
- if (rsize >= MIN_CHUNK_SIZE) {
- mchunkptr remainder = chunk_plus_offset(newp, nb);
- set_inuse(m, newp, nb);
- set_inuse_and_pinuse(m, remainder, rsize);
- extra = chunk2mem(remainder);
- }
- }
- else if (next == m->top && oldsize + m->topsize > nb) {
- /* Expand into top */
- size_t newsize = oldsize + m->topsize;
- size_t newtopsize = newsize - nb;
- mchunkptr newtop = chunk_plus_offset(oldp, nb);
- set_inuse(m, oldp, nb);
- newtop->head = newtopsize |PINUSE_BIT;
- m->top = newtop;
- m->topsize = newtopsize;
- newp = oldp;
- }
- }
- else {
- USAGE_ERROR_ACTION(m, oldmem);
- POSTACTION(m);
- return 0;
- }
-#if DEBUG
- if (newp != 0) {
- check_inuse_chunk(m, newp); /* Check requires lock */
- }
-#endif
-
- POSTACTION(m);
-
- if (newp != 0) {
- if (extra != 0) {
- internal_free(m, extra);
- }
- return chunk2mem(newp);
- }
- else {
- void* newmem = internal_malloc(m, bytes);
- if (newmem != 0) {
- size_t oc = oldsize - overhead_for(oldp);
- memcpy(newmem, oldmem, (oc < bytes)? oc : bytes);
- internal_free(m, oldmem);
- }
- return newmem;
- }
- }
- return 0;
-}
-
-/* --------------------------- memalign support -------------------------- */
-
-static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
- if (alignment <= MALLOC_ALIGNMENT) /* Can just use malloc */
- return internal_malloc(m, bytes);
- if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
- alignment = MIN_CHUNK_SIZE;
- if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */
- size_t a = MALLOC_ALIGNMENT << 1;
- while (a < alignment) a <<= 1;
- alignment = a;
- }
-
- if (bytes >= MAX_REQUEST - alignment) {
- if (m != 0) { /* Test isn't needed but avoids compiler warning */
- MALLOC_FAILURE_ACTION;
- }
- }
- else {
- size_t nb = request2size(bytes);
- size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD;
- char* mem = (char*)internal_malloc(m, req);
- if (mem != 0) {
- void* leader = 0;
- void* trailer = 0;
- mchunkptr p = mem2chunk(mem);
-
- if (PREACTION(m)) return 0;
- if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */
- /*
- Find an aligned spot inside chunk. Since we need to give
- back leading space in a chunk of at least MIN_CHUNK_SIZE, if
- the first calculation places us at a spot with less than
- MIN_CHUNK_SIZE leader, we can move to the next aligned spot.
- We've allocated enough total room so that this is always
- possible.
- */
- char* br = (char*)mem2chunk((size_t)(((size_t)(mem +
- alignment -
- SIZE_T_ONE)) &
- -alignment));
- char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)?
- br : br+alignment;
- mchunkptr newp = (mchunkptr)pos;
- size_t leadsize = pos - (char*)(p);
- size_t newsize = chunksize(p) - leadsize;
-
- if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */
- newp->prev_foot = p->prev_foot + leadsize;
- newp->head = newsize;
- }
- else { /* Otherwise, give back leader, use the rest */
- set_inuse(m, newp, newsize);
- set_inuse(m, p, leadsize);
- leader = chunk2mem(p);
- }
- p = newp;
- }
-
- /* Give back spare room at the end */
- if (!is_mmapped(p)) {
- size_t size = chunksize(p);
- if (size > nb + MIN_CHUNK_SIZE) {
- size_t remainder_size = size - nb;
- mchunkptr remainder = chunk_plus_offset(p, nb);
- set_inuse(m, p, nb);
- set_inuse(m, remainder, remainder_size);
- trailer = chunk2mem(remainder);
- }
- }
-
- assert (chunksize(p) >= nb);
- assert((((size_t)(chunk2mem(p))) % alignment) == 0);
- check_inuse_chunk(m, p);
- POSTACTION(m);
- if (leader != 0) {
- internal_free(m, leader);
- }
- if (trailer != 0) {
- internal_free(m, trailer);
- }
- return chunk2mem(p);
- }
- }
- return 0;
-}
-
-/* ------------------------ comalloc/coalloc support --------------------- */
-
-static void** ialloc(mstate m,
- size_t n_elements,
- size_t* sizes,
- int opts,
- void* chunks[]) {
- /*
- This provides common support for independent_X routines, handling
- all of the combinations that can result.
-
- The opts arg has:
- bit 0 set if all elements are same size (using sizes[0])
- bit 1 set if elements should be zeroed
- */
-
- size_t element_size; /* chunksize of each element, if all same */
- size_t contents_size; /* total size of elements */
- size_t array_size; /* request size of pointer array */
- void* mem; /* malloced aggregate space */
- mchunkptr p; /* corresponding chunk */
- size_t remainder_size; /* remaining bytes while splitting */
- void** marray; /* either "chunks" or malloced ptr array */
- mchunkptr array_chunk; /* chunk for malloced ptr array */
- flag_t was_enabled; /* to disable mmap */
- size_t size;
- size_t i;
-
- ensure_initialization();
- /* compute array length, if needed */
- if (chunks != 0) {
- if (n_elements == 0)
- return chunks; /* nothing to do */
- marray = chunks;
- array_size = 0;
- }
- else {
- /* if empty req, must still return chunk representing empty array */
- if (n_elements == 0)
- return (void**)internal_malloc(m, 0);
- marray = 0;
- array_size = request2size(n_elements * (sizeof(void*)));
- }
-
- /* compute total element size */
- if (opts & 0x1) { /* all-same-size */
- element_size = request2size(*sizes);
- contents_size = n_elements * element_size;
- }
- else { /* add up all the sizes */
- element_size = 0;
- contents_size = 0;
- for (i = 0; i != n_elements; ++i)
- contents_size += request2size(sizes[i]);
- }
-
- size = contents_size + array_size;
-
- /*
- Allocate the aggregate chunk. First disable direct-mmapping so
- malloc won't use it, since we would not be able to later
- free/realloc space internal to a segregated mmap region.
- */
- was_enabled = use_mmap(m);
- disable_mmap(m);
- mem = internal_malloc(m, size - CHUNK_OVERHEAD);
- if (was_enabled)
- enable_mmap(m);
- if (mem == 0)
- return 0;
-
- if (PREACTION(m)) return 0;
- p = mem2chunk(mem);
- remainder_size = chunksize(p);
-
- assert(!is_mmapped(p));
-
- if (opts & 0x2) { /* optionally clear the elements */
- memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size);
- }
-
- /* If not provided, allocate the pointer array as final part of chunk */
- if (marray == 0) {
- size_t array_chunk_size;
- array_chunk = chunk_plus_offset(p, contents_size);
- array_chunk_size = remainder_size - contents_size;
- marray = (void**) (chunk2mem(array_chunk));
- set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size);
- remainder_size = contents_size;
- }
-
- /* split out elements */
- for (i = 0; ; ++i) {
- marray[i] = chunk2mem(p);
- if (i != n_elements-1) {
- if (element_size != 0)
- size = element_size;
- else
- size = request2size(sizes[i]);
- remainder_size -= size;
- set_size_and_pinuse_of_inuse_chunk(m, p, size);
- p = chunk_plus_offset(p, size);
- }
- else { /* the final element absorbs any overallocation slop */
- set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size);
- break;
- }
- }
-
-#if DEBUG
- if (marray != chunks) {
- /* final element must have exactly exhausted chunk */
- if (element_size != 0) {
- assert(remainder_size == element_size);
- }
- else {
- assert(remainder_size == request2size(sizes[i]));
- }
- check_inuse_chunk(m, mem2chunk(marray));
- }
- for (i = 0; i != n_elements; ++i)
- check_inuse_chunk(m, mem2chunk(marray[i]));
-
-#endif /* DEBUG */
-
- POSTACTION(m);
- return marray;
-}
-
-
-/* -------------------------- public routines ---------------------------- */
-
-#if !ONLY_MSPACES
-
-void* dlmalloc(size_t bytes) {
- /*
- Basic algorithm:
- If a small request (< 256 bytes minus per-chunk overhead):
- 1. If one exists, use a remainderless chunk in associated smallbin.
- (Remainderless means that there are too few excess bytes to
- represent as a chunk.)
- 2. If it is big enough, use the dv chunk, which is normally the
- chunk adjacent to the one used for the most recent small request.
- 3. If one exists, split the smallest available chunk in a bin,
- saving remainder in dv.
- 4. If it is big enough, use the top chunk.
- 5. If available, get memory from system and use it
- Otherwise, for a large request:
- 1. Find the smallest available binned chunk that fits, and use it
- if it is better fitting than dv chunk, splitting if necessary.
- 2. If better fitting than any binned chunk, use the dv chunk.
- 3. If it is big enough, use the top chunk.
- 4. If request size >= mmap threshold, try to directly mmap this chunk.
- 5. If available, get memory from system and use it
-
- The ugly goto's here ensure that postaction occurs along all paths.
- */
-
-#if USE_LOCKS
- ensure_initialization(); /* initialize in sys_alloc if not using locks */
-#endif
-
- if (!PREACTION(gm)) {
- void* mem;
- size_t nb;
- if (bytes <= MAX_SMALL_REQUEST) {
- bindex_t idx;
- binmap_t smallbits;
- nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
- idx = small_index(nb);
- smallbits = gm->smallmap >> idx;
-
- if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
- mchunkptr b, p;
- idx += ~smallbits & 1; /* Uses next bin if idx empty */
- b = smallbin_at(gm, idx);
- p = b->fd;
- assert(chunksize(p) == small_index2size(idx));
- unlink_first_small_chunk(gm, b, p, idx);
- set_inuse_and_pinuse(gm, p, small_index2size(idx));
- mem = chunk2mem(p);
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
-
- else if (nb > gm->dvsize) {
- if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
- mchunkptr b, p, r;
- size_t rsize;
- bindex_t i;
- binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
- binmap_t leastbit = least_bit(leftbits);
- compute_bit2idx(leastbit, i);
- b = smallbin_at(gm, i);
- p = b->fd;
- assert(chunksize(p) == small_index2size(i));
- unlink_first_small_chunk(gm, b, p, i);
- rsize = small_index2size(i) - nb;
- /* Fit here cannot be remainderless if 4byte sizes */
- if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
- set_inuse_and_pinuse(gm, p, small_index2size(i));
- else {
- set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
- r = chunk_plus_offset(p, nb);
- set_size_and_pinuse_of_free_chunk(r, rsize);
- replace_dv(gm, r, rsize);
- }
- mem = chunk2mem(p);
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
-
- else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) {
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
- }
- }
- else if (bytes >= MAX_REQUEST)
- nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
- else {
- nb = pad_request(bytes);
- if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) {
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
- }
-
- if (nb <= gm->dvsize) {
- size_t rsize = gm->dvsize - nb;
- mchunkptr p = gm->dv;
- if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
- mchunkptr r = gm->dv = chunk_plus_offset(p, nb);
- gm->dvsize = rsize;
- set_size_and_pinuse_of_free_chunk(r, rsize);
- set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
- }
- else { /* exhaust dv */
- size_t dvs = gm->dvsize;
- gm->dvsize = 0;
- gm->dv = 0;
- set_inuse_and_pinuse(gm, p, dvs);
- }
- mem = chunk2mem(p);
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
-
- else if (nb < gm->topsize) { /* Split top */
- size_t rsize = gm->topsize -= nb;
- mchunkptr p = gm->top;
- mchunkptr r = gm->top = chunk_plus_offset(p, nb);
- r->head = rsize | PINUSE_BIT;
- set_size_and_pinuse_of_inuse_chunk(gm, p, nb);
- mem = chunk2mem(p);
- check_top_chunk(gm, gm->top);
- check_malloced_chunk(gm, mem, nb);
- goto postaction;
- }
-
- mem = sys_alloc(gm, nb);
-
- postaction:
- POSTACTION(gm);
- return mem;
- }
-
- return 0;
-}
-
-void dlfree(void* mem) {
- /*
- Consolidate freed chunks with preceeding or succeeding bordering
- free chunks, if they exist, and then place in a bin. Intermixed
- with special cases for top, dv, mmapped chunks, and usage errors.
- */
-
- if (mem != 0) {
- mchunkptr p = mem2chunk(mem);
-#if FOOTERS
- mstate fm = get_mstate_for(p);
- if (!ok_magic(fm)) {
- USAGE_ERROR_ACTION(fm, p);
- return;
- }
-#else /* FOOTERS */
-#define fm gm
-#endif /* FOOTERS */
- if (!PREACTION(fm)) {
- check_inuse_chunk(fm, p);
- if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
- size_t psize = chunksize(p);
- mchunkptr next = chunk_plus_offset(p, psize);
- if (!pinuse(p)) {
- size_t prevsize = p->prev_foot;
- if (is_mmapped(p)) {
- psize += prevsize + MMAP_FOOT_PAD;
- if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
- fm->footprint -= psize;
- goto postaction;
- }
- else {
- mchunkptr prev = chunk_minus_offset(p, prevsize);
- psize += prevsize;
- p = prev;
- if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
- if (p != fm->dv) {
- unlink_chunk(fm, p, prevsize);
- }
- else if ((next->head & INUSE_BITS) == INUSE_BITS) {
- fm->dvsize = psize;
- set_free_with_pinuse(p, psize, next);
- goto postaction;
- }
- }
- else
- goto erroraction;
- }
- }
-
- if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
- if (!cinuse(next)) { /* consolidate forward */
- if (next == fm->top) {
- size_t tsize = fm->topsize += psize;
- fm->top = p;
- p->head = tsize | PINUSE_BIT;
- if (p == fm->dv) {
- fm->dv = 0;
- fm->dvsize = 0;
- }
- if (should_trim(fm, tsize))
- sys_trim(fm, 0);
- goto postaction;
- }
- else if (next == fm->dv) {
- size_t dsize = fm->dvsize += psize;
- fm->dv = p;
- set_size_and_pinuse_of_free_chunk(p, dsize);
- goto postaction;
- }
- else {
- size_t nsize = chunksize(next);
- psize += nsize;
- unlink_chunk(fm, next, nsize);
- set_size_and_pinuse_of_free_chunk(p, psize);
- if (p == fm->dv) {
- fm->dvsize = psize;
- goto postaction;
- }
- }
- }
- else
- set_free_with_pinuse(p, psize, next);
-
- if (is_small(psize)) {
- insert_small_chunk(fm, p, psize);
- check_free_chunk(fm, p);
- }
- else {
- tchunkptr tp = (tchunkptr)p;
- insert_large_chunk(fm, tp, psize);
- check_free_chunk(fm, p);
- if (--fm->release_checks == 0)
- release_unused_segments(fm);
- }
- goto postaction;
- }
- }
- erroraction:
- USAGE_ERROR_ACTION(fm, p);
- postaction:
- POSTACTION(fm);
- }
- }
-#if !FOOTERS
-#undef fm
-#endif /* FOOTERS */
-}
-
-void* dlcalloc(size_t n_elements, size_t elem_size) {
- void* mem;
- size_t req = 0;
- if (n_elements != 0) {
- req = n_elements * elem_size;
- if (((n_elements | elem_size) & ~(size_t)0xffff) &&
- (req / n_elements != elem_size))
- req = MAX_SIZE_T; /* force downstream failure on overflow */
- }
- mem = dlmalloc(req);
- if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
- memset(mem, 0, req);
- return mem;
-}
-
-void* dlrealloc(void* oldmem, size_t bytes) {
- if (oldmem == 0)
- return dlmalloc(bytes);
-#ifdef REALLOC_ZERO_BYTES_FREES
- if (bytes == 0) {
- dlfree(oldmem);
- return 0;
- }
-#endif /* REALLOC_ZERO_BYTES_FREES */
- else {
-#if ! FOOTERS
- mstate m = gm;
-#else /* FOOTERS */
- mstate m = get_mstate_for(mem2chunk(oldmem));
- if (!ok_magic(m)) {
- USAGE_ERROR_ACTION(m, oldmem);
- return 0;
- }
-#endif /* FOOTERS */
- return internal_realloc(m, oldmem, bytes);
- }
-}
-
-void* dlmemalign(size_t alignment, size_t bytes) {
- return internal_memalign(gm, alignment, bytes);
-}
-
-void** dlindependent_calloc(size_t n_elements, size_t elem_size,
- void* chunks[]) {
- size_t sz = elem_size; /* serves as 1-element array */
- return ialloc(gm, n_elements, &sz, 3, chunks);
-}
-
-void** dlindependent_comalloc(size_t n_elements, size_t sizes[],
- void* chunks[]) {
- return ialloc(gm, n_elements, sizes, 0, chunks);
-}
-
-void* dlvalloc(size_t bytes) {
- size_t pagesz;
- ensure_initialization();
- pagesz = mparams.page_size;
- return dlmemalign(pagesz, bytes);
-}
-
-void* dlpvalloc(size_t bytes) {
- size_t pagesz;
- ensure_initialization();
- pagesz = mparams.page_size;
- return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE));
-}
-
-int dlmalloc_trim(size_t pad) {
- int result = 0;
- ensure_initialization();
- if (!PREACTION(gm)) {
- result = sys_trim(gm, pad);
- POSTACTION(gm);
- }
- return result;
-}
-
-size_t dlmalloc_footprint(void) {
- return gm->footprint;
-}
-
-size_t dlmalloc_max_footprint(void) {
- return gm->max_footprint;
-}
-
-#if !NO_MALLINFO
-struct mallinfo dlmallinfo(void) {
- return internal_mallinfo(gm);
-}
-#endif /* NO_MALLINFO */
-
-void dlmalloc_stats() {
- internal_malloc_stats(gm);
-}
-
-int dlmallopt(int param_number, int value) {
- return change_mparam(param_number, value);
-}
-
-#endif /* !ONLY_MSPACES */
-
-size_t dlmalloc_usable_size(void* mem) {
- if (mem != 0) {
- mchunkptr p = mem2chunk(mem);
- if (is_inuse(p))
- return chunksize(p) - overhead_for(p);
- }
- return 0;
-}
-
-/* ----------------------------- user mspaces ---------------------------- */
-
-#if MSPACES
-
-static mstate init_user_mstate(char* tbase, size_t tsize) {
- size_t msize = pad_request(sizeof(struct malloc_state));
- mchunkptr mn;
- mchunkptr msp = align_as_chunk(tbase);
- mstate m = (mstate)(chunk2mem(msp));
- memset(m, 0, msize);
- INITIAL_LOCK(&m->mutex);
- msp->head = (msize|INUSE_BITS);
- m->seg.base = m->least_addr = tbase;
- m->seg.size = m->footprint = m->max_footprint = tsize;
- m->magic = mparams.magic;
- m->release_checks = MAX_RELEASE_CHECK_RATE;
- m->mflags = mparams.default_mflags;
- m->extp = 0;
- m->exts = 0;
- disable_contiguous(m);
- init_bins(m);
- mn = next_chunk(mem2chunk(m));
- init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE);
- check_top_chunk(m, m->top);
- return m;
-}
-
-mspace create_mspace(size_t capacity, int locked) {
- mstate m = 0;
- size_t msize;
- ensure_initialization();
- msize = pad_request(sizeof(struct malloc_state));
- if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
- size_t rs = ((capacity == 0)? mparams.granularity :
- (capacity + TOP_FOOT_SIZE + msize));
- size_t tsize = granularity_align(rs);
- char* tbase = (char*)(CALL_MMAP(tsize));
- if (tbase != CMFAIL) {
- m = init_user_mstate(tbase, tsize);
- m->seg.sflags = USE_MMAP_BIT;
- set_lock(m, locked);
- }
- }
- return (mspace)m;
-}
-
-mspace create_mspace_with_base(void* base, size_t capacity, int locked) {
- mstate m = 0;
- size_t msize;
- ensure_initialization();
- msize = pad_request(sizeof(struct malloc_state));
- if (capacity > msize + TOP_FOOT_SIZE &&
- capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) {
- m = init_user_mstate((char*)base, capacity);
- m->seg.sflags = EXTERN_BIT;
- set_lock(m, locked);
- }
- return (mspace)m;
-}
-
-int mspace_track_large_chunks(mspace msp, int enable) {
- int ret = 0;
- mstate ms = (mstate)msp;
- if (!PREACTION(ms)) {
- if (!use_mmap(ms))
- ret = 1;
- if (!enable)
- enable_mmap(ms);
- else
- disable_mmap(ms);
- POSTACTION(ms);
- }
- return ret;
-}
-
-size_t destroy_mspace(mspace msp) {
- size_t freed = 0;
- mstate ms = (mstate)msp;
- if (ok_magic(ms)) {
- msegmentptr sp = &ms->seg;
- while (sp != 0) {
- char* base = sp->base;
- size_t size = sp->size;
- flag_t flag = sp->sflags;
- sp = sp->next;
- if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) &&
- CALL_MUNMAP(base, size) == 0)
- freed += size;
- }
- }
- else {
- USAGE_ERROR_ACTION(ms,ms);
- }
- return freed;
-}
-
-/*
- mspace versions of routines are near-clones of the global
- versions. This is not so nice but better than the alternatives.
-*/
-
-
-void* mspace_malloc(mspace msp, size_t bytes) {
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- if (!PREACTION(ms)) {
- void* mem;
- size_t nb;
- if (bytes <= MAX_SMALL_REQUEST) {
- bindex_t idx;
- binmap_t smallbits;
- nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes);
- idx = small_index(nb);
- smallbits = ms->smallmap >> idx;
-
- if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
- mchunkptr b, p;
- idx += ~smallbits & 1; /* Uses next bin if idx empty */
- b = smallbin_at(ms, idx);
- p = b->fd;
- assert(chunksize(p) == small_index2size(idx));
- unlink_first_small_chunk(ms, b, p, idx);
- set_inuse_and_pinuse(ms, p, small_index2size(idx));
- mem = chunk2mem(p);
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
-
- else if (nb > ms->dvsize) {
- if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
- mchunkptr b, p, r;
- size_t rsize;
- bindex_t i;
- binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
- binmap_t leastbit = least_bit(leftbits);
- compute_bit2idx(leastbit, i);
- b = smallbin_at(ms, i);
- p = b->fd;
- assert(chunksize(p) == small_index2size(i));
- unlink_first_small_chunk(ms, b, p, i);
- rsize = small_index2size(i) - nb;
- /* Fit here cannot be remainderless if 4byte sizes */
- if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE)
- set_inuse_and_pinuse(ms, p, small_index2size(i));
- else {
- set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
- r = chunk_plus_offset(p, nb);
- set_size_and_pinuse_of_free_chunk(r, rsize);
- replace_dv(ms, r, rsize);
- }
- mem = chunk2mem(p);
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
-
- else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
- }
- }
- else if (bytes >= MAX_REQUEST)
- nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
- else {
- nb = pad_request(bytes);
- if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
- }
-
- if (nb <= ms->dvsize) {
- size_t rsize = ms->dvsize - nb;
- mchunkptr p = ms->dv;
- if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
- mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
- ms->dvsize = rsize;
- set_size_and_pinuse_of_free_chunk(r, rsize);
- set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
- }
- else { /* exhaust dv */
- size_t dvs = ms->dvsize;
- ms->dvsize = 0;
- ms->dv = 0;
- set_inuse_and_pinuse(ms, p, dvs);
- }
- mem = chunk2mem(p);
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
-
- else if (nb < ms->topsize) { /* Split top */
- size_t rsize = ms->topsize -= nb;
- mchunkptr p = ms->top;
- mchunkptr r = ms->top = chunk_plus_offset(p, nb);
- r->head = rsize | PINUSE_BIT;
- set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
- mem = chunk2mem(p);
- check_top_chunk(ms, ms->top);
- check_malloced_chunk(ms, mem, nb);
- goto postaction;
- }
-
- mem = sys_alloc(ms, nb);
-
- postaction:
- POSTACTION(ms);
- return mem;
- }
-
- return 0;
-}
-
-void mspace_free(mspace msp, void* mem) {
- if (mem != 0) {
- mchunkptr p = mem2chunk(mem);
-#if FOOTERS
- mstate fm = get_mstate_for(p);
- msp = msp; /* placate people compiling -Wunused */
-#else /* FOOTERS */
- mstate fm = (mstate)msp;
-#endif /* FOOTERS */
- if (!ok_magic(fm)) {
- USAGE_ERROR_ACTION(fm, p);
- return;
- }
- if (!PREACTION(fm)) {
- check_inuse_chunk(fm, p);
- if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) {
- size_t psize = chunksize(p);
- mchunkptr next = chunk_plus_offset(p, psize);
- if (!pinuse(p)) {
- size_t prevsize = p->prev_foot;
- if (is_mmapped(p)) {
- psize += prevsize + MMAP_FOOT_PAD;
- if (CALL_MUNMAP((char*)p - prevsize, psize) == 0)
- fm->footprint -= psize;
- goto postaction;
- }
- else {
- mchunkptr prev = chunk_minus_offset(p, prevsize);
- psize += prevsize;
- p = prev;
- if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */
- if (p != fm->dv) {
- unlink_chunk(fm, p, prevsize);
- }
- else if ((next->head & INUSE_BITS) == INUSE_BITS) {
- fm->dvsize = psize;
- set_free_with_pinuse(p, psize, next);
- goto postaction;
- }
- }
- else
- goto erroraction;
- }
- }
-
- if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) {
- if (!cinuse(next)) { /* consolidate forward */
- if (next == fm->top) {
- size_t tsize = fm->topsize += psize;
- fm->top = p;
- p->head = tsize | PINUSE_BIT;
- if (p == fm->dv) {
- fm->dv = 0;
- fm->dvsize = 0;
- }
- if (should_trim(fm, tsize))
- sys_trim(fm, 0);
- goto postaction;
- }
- else if (next == fm->dv) {
- size_t dsize = fm->dvsize += psize;
- fm->dv = p;
- set_size_and_pinuse_of_free_chunk(p, dsize);
- goto postaction;
- }
- else {
- size_t nsize = chunksize(next);
- psize += nsize;
- unlink_chunk(fm, next, nsize);
- set_size_and_pinuse_of_free_chunk(p, psize);
- if (p == fm->dv) {
- fm->dvsize = psize;
- goto postaction;
- }
- }
- }
- else
- set_free_with_pinuse(p, psize, next);
-
- if (is_small(psize)) {
- insert_small_chunk(fm, p, psize);
- check_free_chunk(fm, p);
- }
- else {
- tchunkptr tp = (tchunkptr)p;
- insert_large_chunk(fm, tp, psize);
- check_free_chunk(fm, p);
- if (--fm->release_checks == 0)
- release_unused_segments(fm);
- }
- goto postaction;
- }
- }
- erroraction:
- USAGE_ERROR_ACTION(fm, p);
- postaction:
- POSTACTION(fm);
- }
- }
-}
-
-void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) {
- void* mem;
- size_t req = 0;
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- if (n_elements != 0) {
- req = n_elements * elem_size;
- if (((n_elements | elem_size) & ~(size_t)0xffff) &&
- (req / n_elements != elem_size))
- req = MAX_SIZE_T; /* force downstream failure on overflow */
- }
- mem = internal_malloc(ms, req);
- if (mem != 0 && calloc_must_clear(mem2chunk(mem)))
- memset(mem, 0, req);
- return mem;
-}
-
-void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
- if (oldmem == 0)
- return mspace_malloc(msp, bytes);
-#ifdef REALLOC_ZERO_BYTES_FREES
- if (bytes == 0) {
- mspace_free(msp, oldmem);
- return 0;
- }
-#endif /* REALLOC_ZERO_BYTES_FREES */
- else {
-#if FOOTERS
- mchunkptr p = mem2chunk(oldmem);
- mstate ms = get_mstate_for(p);
-#else /* FOOTERS */
- mstate ms = (mstate)msp;
-#endif /* FOOTERS */
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- return internal_realloc(ms, oldmem, bytes);
- }
-}
-
-void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- return internal_memalign(ms, alignment, bytes);
-}
-
-void** mspace_independent_calloc(mspace msp, size_t n_elements,
- size_t elem_size, void* chunks[]) {
- size_t sz = elem_size; /* serves as 1-element array */
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- return ialloc(ms, n_elements, &sz, 3, chunks);
-}
-
-void** mspace_independent_comalloc(mspace msp, size_t n_elements,
- size_t sizes[], void* chunks[]) {
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- return 0;
- }
- return ialloc(ms, n_elements, sizes, 0, chunks);
-}
-
-int mspace_trim(mspace msp, size_t pad) {
- int result = 0;
- mstate ms = (mstate)msp;
- if (ok_magic(ms)) {
- if (!PREACTION(ms)) {
- result = sys_trim(ms, pad);
- POSTACTION(ms);
- }
- }
- else {
- USAGE_ERROR_ACTION(ms,ms);
- }
- return result;
-}
-
-void mspace_malloc_stats(mspace msp) {
- mstate ms = (mstate)msp;
- if (ok_magic(ms)) {
- internal_malloc_stats(ms);
- }
- else {
- USAGE_ERROR_ACTION(ms,ms);
- }
-}
-
-size_t mspace_footprint(mspace msp) {
- size_t result = 0;
- mstate ms = (mstate)msp;
- if (ok_magic(ms)) {
- result = ms->footprint;
- }
- else {
- USAGE_ERROR_ACTION(ms,ms);
- }
- return result;
-}
-
-
-size_t mspace_max_footprint(mspace msp) {
- size_t result = 0;
- mstate ms = (mstate)msp;
- if (ok_magic(ms)) {
- result = ms->max_footprint;
- }
- else {
- USAGE_ERROR_ACTION(ms,ms);
- }
- return result;
-}
-
-
-#if !NO_MALLINFO
-struct mallinfo mspace_mallinfo(mspace msp) {
- mstate ms = (mstate)msp;
- if (!ok_magic(ms)) {
- USAGE_ERROR_ACTION(ms,ms);
- }
- return internal_mallinfo(ms);
-}
-#endif /* NO_MALLINFO */
-
-size_t mspace_usable_size(void* mem) {
- if (mem != 0) {
- mchunkptr p = mem2chunk(mem);
- if (is_inuse(p))
- return chunksize(p) - overhead_for(p);
- }
- return 0;
-}
-
-int mspace_mallopt(int param_number, int value) {
- return change_mparam(param_number, value);
-}
-
-#endif /* MSPACES */
-
-
-/* -------------------- Alternative MORECORE functions ------------------- */
-
-/*
- Guidelines for creating a custom version of MORECORE:
-
- * For best performance, MORECORE should allocate in multiples of pagesize.
- * MORECORE may allocate more memory than requested. (Or even less,
- but this will usually result in a malloc failure.)
- * MORECORE must not allocate memory when given argument zero, but
- instead return one past the end address of memory from previous
- nonzero call.
- * For best performance, consecutive calls to MORECORE with positive
- arguments should return increasing addresses, indicating that
- space has been contiguously extended.
- * Even though consecutive calls to MORECORE need not return contiguous
- addresses, it must be OK for malloc'ed chunks to span multiple
- regions in those cases where they do happen to be contiguous.
- * MORECORE need not handle negative arguments -- it may instead
- just return MFAIL when given negative arguments.
- Negative arguments are always multiples of pagesize. MORECORE
- must not misinterpret negative args as large positive unsigned
- args. You can suppress all such calls from even occurring by defining
- MORECORE_CANNOT_TRIM,
-
- As an example alternative MORECORE, here is a custom allocator
- kindly contributed for pre-OSX macOS. It uses virtually but not
- necessarily physically contiguous non-paged memory (locked in,
- present and won't get swapped out). You can use it by uncommenting
- this section, adding some #includes, and setting up the appropriate
- defines above:
-
- #define MORECORE osMoreCore
-
- There is also a shutdown routine that should somehow be called for
- cleanup upon program exit.
-
- #define MAX_POOL_ENTRIES 100
- #define MINIMUM_MORECORE_SIZE (64 * 1024U)
- static int next_os_pool;
- void *our_os_pools[MAX_POOL_ENTRIES];
-
- void *osMoreCore(int size)
- {
- void *ptr = 0;
- static void *sbrk_top = 0;
-
- if (size > 0)
- {
- if (size < MINIMUM_MORECORE_SIZE)
- size = MINIMUM_MORECORE_SIZE;
- if (CurrentExecutionLevel() == kTaskLevel)
- ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0);
- if (ptr == 0)
- {
- return (void *) MFAIL;
- }
- // save ptrs so they can be freed during cleanup
- our_os_pools[next_os_pool] = ptr;
- next_os_pool++;
- ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK);
- sbrk_top = (char *) ptr + size;
- return ptr;
- }
- else if (size < 0)
- {
- // we don't currently support shrink behavior
- return (void *) MFAIL;
- }
- else
- {
- return sbrk_top;
- }
- }
-
- // cleanup any allocated memory pools
- // called as last thing before shutting down driver
-
- void osCleanupMem(void)
- {
- void **ptr;
-
- for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++)
- if (*ptr)
- {
- PoolDeallocate(*ptr);
- *ptr = 0;
- }
- }
-
-*/
-
-
-/* -----------------------------------------------------------------------
-History:
- V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee)
- * Use zeros instead of prev foot for is_mmapped
- * Add mspace_track_large_chunks; thanks to Jean Brouwers
- * Fix set_inuse in internal_realloc; thanks to Jean Brouwers
- * Fix insufficient sys_alloc padding when using 16byte alignment
- * Fix bad error check in mspace_footprint
- * Adaptations for ptmalloc; thanks to Wolfram Gloger.
- * Reentrant spin locks; thanks to Earl Chew and others
- * Win32 improvements; thanks to Niall Douglas and Earl Chew
- * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options
- * Extension hook in malloc_state
- * Various small adjustments to reduce warnings on some compilers
- * Various configuration extensions/changes for more platforms. Thanks
- to all who contributed these.
-
- V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee)
- * Add max_footprint functions
- * Ensure all appropriate literals are size_t
- * Fix conditional compilation problem for some #define settings
- * Avoid concatenating segments with the one provided
- in create_mspace_with_base
- * Rename some variables to avoid compiler shadowing warnings
- * Use explicit lock initialization.
- * Better handling of sbrk interference.
- * Simplify and fix segment insertion, trimming and mspace_destroy
- * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x
- * Thanks especially to Dennis Flanagan for help on these.
-
- V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee)
- * Fix memalign brace error.
-
- V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee)
- * Fix improper #endif nesting in C++
- * Add explicit casts needed for C++
-
- V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee)
- * Use trees for large bins
- * Support mspaces
- * Use segments to unify sbrk-based and mmap-based system allocation,
- removing need for emulation on most platforms without sbrk.
- * Default safety checks
- * Optional footer checks. Thanks to William Robertson for the idea.
- * Internal code refactoring
- * Incorporate suggestions and platform-specific changes.
- Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas,
- Aaron Bachmann, Emery Berger, and others.
- * Speed up non-fastbin processing enough to remove fastbins.
- * Remove useless cfree() to avoid conflicts with other apps.
- * Remove internal memcpy, memset. Compilers handle builtins better.
- * Remove some options that no one ever used and rename others.
-
- V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee)
- * Fix malloc_state bitmap array misdeclaration
-
- V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee)
- * Allow tuning of FIRST_SORTED_BIN_SIZE
- * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte.
- * Better detection and support for non-contiguousness of MORECORE.
- Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger
- * Bypass most of malloc if no frees. Thanks To Emery Berger.
- * Fix freeing of old top non-contiguous chunk im sysmalloc.
- * Raised default trim and map thresholds to 256K.
- * Fix mmap-related #defines. Thanks to Lubos Lunak.
- * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield.
- * Branch-free bin calculation
- * Default trim and mmap thresholds now 256K.
-
- V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee)
- * Introduce independent_comalloc and independent_calloc.
- Thanks to Michael Pachos for motivation and help.
- * Make optional .h file available
- * Allow > 2GB requests on 32bit systems.
- * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>.
- Thanks also to Andreas Mueller <a.mueller at paradatec.de>,
- and Anonymous.
- * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for
- helping test this.)
- * memalign: check alignment arg
- * realloc: don't try to shift chunks backwards, since this
- leads to more fragmentation in some programs and doesn't
- seem to help in any others.
- * Collect all cases in malloc requiring system memory into sysmalloc
- * Use mmap as backup to sbrk
- * Place all internal state in malloc_state
- * Introduce fastbins (although similar to 2.5.1)
- * Many minor tunings and cosmetic improvements
- * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK
- * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS
- Thanks to Tony E. Bennett <tbennett@nvidia.com> and others.
- * Include errno.h to support default failure action.
-
- V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee)
- * return null for negative arguments
- * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com>
- * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h'
- (e.g. WIN32 platforms)
- * Cleanup header file inclusion for WIN32 platforms
- * Cleanup code to avoid Microsoft Visual C++ compiler complaints
- * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing
- memory allocation routines
- * Set 'malloc_getpagesize' for WIN32 platforms (needs more work)
- * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to
- usage of 'assert' in non-WIN32 code
- * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to
- avoid infinite loop
- * Always call 'fREe()' rather than 'free()'
-
- V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee)
- * Fixed ordering problem with boundary-stamping
-
- V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee)
- * Added pvalloc, as recommended by H.J. Liu
- * Added 64bit pointer support mainly from Wolfram Gloger
- * Added anonymously donated WIN32 sbrk emulation
- * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen
- * malloc_extend_top: fix mask error that caused wastage after
- foreign sbrks
- * Add linux mremap support code from HJ Liu
-
- V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee)
- * Integrated most documentation with the code.
- * Add support for mmap, with help from
- Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
- * Use last_remainder in more cases.
- * Pack bins using idea from colin@nyx10.cs.du.edu
- * Use ordered bins instead of best-fit threshhold
- * Eliminate block-local decls to simplify tracing and debugging.
- * Support another case of realloc via move into top
- * Fix error occuring when initial sbrk_base not word-aligned.
- * Rely on page size for units instead of SBRK_UNIT to
- avoid surprises about sbrk alignment conventions.
- * Add mallinfo, mallopt. Thanks to Raymond Nijssen
- (raymond@es.ele.tue.nl) for the suggestion.
- * Add `pad' argument to malloc_trim and top_pad mallopt parameter.
- * More precautions for cases where other routines call sbrk,
- courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
- * Added macros etc., allowing use in linux libc from
- H.J. Lu (hjl@gnu.ai.mit.edu)
- * Inverted this history list
-
- V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee)
- * Re-tuned and fixed to behave more nicely with V2.6.0 changes.
- * Removed all preallocation code since under current scheme
- the work required to undo bad preallocations exceeds
- the work saved in good cases for most test programs.
- * No longer use return list or unconsolidated bins since
- no scheme using them consistently outperforms those that don't
- given above changes.
- * Use best fit for very large chunks to prevent some worst-cases.
- * Added some support for debugging
-
- V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee)
- * Removed footers when chunks are in use. Thanks to
- Paul Wilson (wilson@cs.texas.edu) for the suggestion.
-
- V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee)
- * Added malloc_trim, with help from Wolfram Gloger
- (wmglo@Dent.MED.Uni-Muenchen.DE).
-
- V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g)
-
- V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g)
- * realloc: try to expand in both directions
- * malloc: swap order of clean-bin strategy;
- * realloc: only conditionally expand backwards
- * Try not to scavenge used bins
- * Use bin counts as a guide to preallocation
- * Occasionally bin return list chunks in first scan
- * Add a few optimizations from colin@nyx10.cs.du.edu
-
- V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g)
- * faster bin computation & slightly different binning
- * merged all consolidations to one part of malloc proper
- (eliminating old malloc_find_space & malloc_clean_bin)
- * Scan 2 returns chunks (not just 1)
- * Propagate failure in realloc if malloc returns 0
- * Add stuff to allow compilation on non-ANSI compilers
- from kpv@research.att.com
-
- V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu)
- * removed potential for odd address access in prev_chunk
- * removed dependency on getpagesize.h
- * misc cosmetics and a bit more internal documentation
- * anticosmetics: mangled names in macros to evade debugger strangeness
- * tested on sparc, hp-700, dec-mips, rs6000
- with gcc & native cc (hp, dec only) allowing
- Detlefs & Zorn comparison study (in SIGPLAN Notices.)
-
- Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu)
- * Based loosely on libg++-1.2X malloc. (It retains some of the overall
- structure of old version, but most details differ.)
-
-*/
-
-#endif
+#ifdef NEDMALLOC_ENABLED +/* + This is a version (aka dlmalloc) of malloc/free/realloc written by + Doug Lea and released to the public domain, as explained at + http://creativecommons.org/licenses/publicdomain. Send questions, + comments, complaints, performance data, etc to dl@cs.oswego.edu + +* Version 2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) + + Note: There may be an updated version of this malloc obtainable at + ftp://gee.cs.oswego.edu/pub/misc/malloc.c + Check before installing! + +* Quickstart + + This library is all in one file to simplify the most common usage: + ftp it, compile it (-O3), and link it into another program. All of + the compile-time options default to reasonable values for use on + most platforms. You might later want to step through various + compile-time and dynamic tuning options. + + For convenience, an include file for code using this malloc is at: + ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h + You don't really need this .h file unless you call functions not + defined in your system include files. The .h file contains only the + excerpts from this file needed for using this malloc on ANSI C/C++ + systems, so long as you haven't changed compile-time options about + naming and tuning parameters. If you do, then you can create your + own malloc.h that does include all settings by cutting at the point + indicated below. Note that you may already by default be using a C + library containing a malloc that is based on some version of this + malloc (for example in linux). You might still want to use the one + in this file to customize settings or to avoid overheads associated + with library versions. + +* Vital statistics: + + Supported pointer/size_t representation: 4 or 8 bytes + size_t MUST be an unsigned type of the same width as + pointers. (If you are using an ancient system that declares + size_t as a signed type, or need it to be a different width + than pointers, you can use a previous release of this malloc + (e.g. 2.7.2) supporting these.) + + Alignment: 8 bytes (default) + This suffices for nearly all current machines and C compilers. + However, you can define MALLOC_ALIGNMENT to be wider than this + if necessary (up to 128bytes), at the expense of using more space. + + Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes) + 8 or 16 bytes (if 8byte sizes) + Each malloced chunk has a hidden word of overhead holding size + and status information, and additional cross-check word + if FOOTERS is defined. + + Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead) + 8-byte ptrs: 32 bytes (including overhead) + + Even a request for zero bytes (i.e., malloc(0)) returns a + pointer to something of the minimum allocatable size. + The maximum overhead wastage (i.e., number of extra bytes + allocated than were requested in malloc) is less than or equal + to the minimum size, except for requests >= mmap_threshold that + are serviced via mmap(), where the worst case wastage is about + 32 bytes plus the remainder from a system page (the minimal + mmap unit); typically 4096 or 8192 bytes. + + Security: static-safe; optionally more or less + The "security" of malloc refers to the ability of malicious + code to accentuate the effects of errors (for example, freeing + space that is not currently malloc'ed or overwriting past the + ends of chunks) in code that calls malloc. This malloc + guarantees not to modify any memory locations below the base of + heap, i.e., static variables, even in the presence of usage + errors. The routines additionally detect most improper frees + and reallocs. All this holds as long as the static bookkeeping + for malloc itself is not corrupted by some other means. This + is only one aspect of security -- these checks do not, and + cannot, detect all possible programming errors. + + If FOOTERS is defined nonzero, then each allocated chunk + carries an additional check word to verify that it was malloced + from its space. These check words are the same within each + execution of a program using malloc, but differ across + executions, so externally crafted fake chunks cannot be + freed. This improves security by rejecting frees/reallocs that + could corrupt heap memory, in addition to the checks preventing + writes to statics that are always on. This may further improve + security at the expense of time and space overhead. (Note that + FOOTERS may also be worth using with MSPACES.) + + By default detected errors cause the program to abort (calling + "abort()"). You can override this to instead proceed past + errors by defining PROCEED_ON_ERROR. In this case, a bad free + has no effect, and a malloc that encounters a bad address + caused by user overwrites will ignore the bad address by + dropping pointers and indices to all known memory. This may + be appropriate for programs that should continue if at all + possible in the face of programming errors, although they may + run out of memory because dropped memory is never reclaimed. + + If you don't like either of these options, you can define + CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything + else. And if if you are sure that your program using malloc has + no errors or vulnerabilities, you can define INSECURE to 1, + which might (or might not) provide a small performance improvement. + + Thread-safety: NOT thread-safe unless USE_LOCKS defined + When USE_LOCKS is defined, each public call to malloc, free, + etc is surrounded with either a pthread mutex or a win32 + spinlock (depending on WIN32). This is not especially fast, and + can be a major bottleneck. It is designed only to provide + minimal protection in concurrent environments, and to provide a + basis for extensions. If you are using malloc in a concurrent + program, consider instead using nedmalloc + (http://www.nedprod.com/programs/portable/nedmalloc/) or + ptmalloc (See http://www.malloc.de), which are derived + from versions of this malloc. + + System requirements: Any combination of MORECORE and/or MMAP/MUNMAP + This malloc can use unix sbrk or any emulation (invoked using + the CALL_MORECORE macro) and/or mmap/munmap or any emulation + (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system + memory. On most unix systems, it tends to work best if both + MORECORE and MMAP are enabled. On Win32, it uses emulations + based on VirtualAlloc. It also uses common C library functions + like memset. + + Compliance: I believe it is compliant with the Single Unix Specification + (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably + others as well. + +* Overview of algorithms + + This is not the fastest, most space-conserving, most portable, or + most tunable malloc ever written. However it is among the fastest + while also being among the most space-conserving, portable and + tunable. Consistent balance across these factors results in a good + general-purpose allocator for malloc-intensive programs. + + In most ways, this malloc is a best-fit allocator. Generally, it + chooses the best-fitting existing chunk for a request, with ties + broken in approximately least-recently-used order. (This strategy + normally maintains low fragmentation.) However, for requests less + than 256bytes, it deviates from best-fit when there is not an + exactly fitting available chunk by preferring to use space adjacent + to that used for the previous small request, as well as by breaking + ties in approximately most-recently-used order. (These enhance + locality of series of small allocations.) And for very large requests + (>= 256Kb by default), it relies on system memory mapping + facilities, if supported. (This helps avoid carrying around and + possibly fragmenting memory used only for large chunks.) + + All operations (except malloc_stats and mallinfo) have execution + times that are bounded by a constant factor of the number of bits in + a size_t, not counting any clearing in calloc or copying in realloc, + or actions surrounding MORECORE and MMAP that have times + proportional to the number of non-contiguous regions returned by + system allocation routines, which is often just 1. In real-time + applications, you can optionally suppress segment traversals using + NO_SEGMENT_TRAVERSAL, which assures bounded execution even when + system allocators return non-contiguous spaces, at the typical + expense of carrying around more memory and increased fragmentation. + + The implementation is not very modular and seriously overuses + macros. Perhaps someday all C compilers will do as good a job + inlining modular code as can now be done by brute-force expansion, + but now, enough of them seem not to. + + Some compilers issue a lot of warnings about code that is + dead/unreachable only on some platforms, and also about intentional + uses of negation on unsigned types. All known cases of each can be + ignored. + + For a longer but out of date high-level description, see + http://gee.cs.oswego.edu/dl/html/malloc.html + +* MSPACES + If MSPACES is defined, then in addition to malloc, free, etc., + this file also defines mspace_malloc, mspace_free, etc. These + are versions of malloc routines that take an "mspace" argument + obtained using create_mspace, to control all internal bookkeeping. + If ONLY_MSPACES is defined, only these versions are compiled. + So if you would like to use this allocator for only some allocations, + and your system malloc for others, you can compile with + ONLY_MSPACES and then do something like... + static mspace mymspace = create_mspace(0,0); // for example + #define mymalloc(bytes) mspace_malloc(mymspace, bytes) + + (Note: If you only need one instance of an mspace, you can instead + use "USE_DL_PREFIX" to relabel the global malloc.) + + You can similarly create thread-local allocators by storing + mspaces as thread-locals. For example: + static __thread mspace tlms = 0; + void* tlmalloc(size_t bytes) { + if (tlms == 0) tlms = create_mspace(0, 0); + return mspace_malloc(tlms, bytes); + } + void tlfree(void* mem) { mspace_free(tlms, mem); } + + Unless FOOTERS is defined, each mspace is completely independent. + You cannot allocate from one and free to another (although + conformance is only weakly checked, so usage errors are not always + caught). If FOOTERS is defined, then each chunk carries around a tag + indicating its originating mspace, and frees are directed to their + originating spaces. + + ------------------------- Compile-time options --------------------------- + +Be careful in setting #define values for numerical constants of type +size_t. On some systems, literal values are not automatically extended +to size_t precision unless they are explicitly casted. You can also +use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below. + +WIN32 default: defined if _WIN32 defined + Defining WIN32 sets up defaults for MS environment and compilers. + Otherwise defaults are for unix. Beware that there seem to be some + cases where this malloc might not be a pure drop-in replacement for + Win32 malloc: Random-looking failures from Win32 GDI API's (eg; + SetDIBits()) may be due to bugs in some video driver implementations + when pixel buffers are malloc()ed, and the region spans more than + one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb) + default granularity, pixel buffers may straddle virtual allocation + regions more often than when using the Microsoft allocator. You can + avoid this by using VirtualAlloc() and VirtualFree() for all pixel + buffers rather than using malloc(). If this is not possible, + recompile this malloc with a larger DEFAULT_GRANULARITY. + +MALLOC_ALIGNMENT default: (size_t)8 + Controls the minimum alignment for malloc'ed chunks. It must be a + power of two and at least 8, even on machines for which smaller + alignments would suffice. It may be defined as larger than this + though. Note however that code and data structures are optimized for + the case of 8-byte alignment. + +MSPACES default: 0 (false) + If true, compile in support for independent allocation spaces. + This is only supported if HAVE_MMAP is true. + +ONLY_MSPACES default: 0 (false) + If true, only compile in mspace versions, not regular versions. + +USE_LOCKS default: 0 (false) + Causes each call to each public routine to be surrounded with + pthread or WIN32 mutex lock/unlock. (If set true, this can be + overridden on a per-mspace basis for mspace versions.) If set to a + non-zero value other than 1, locks are used, but their + implementation is left out, so lock functions must be supplied manually, + as described below. + +USE_SPIN_LOCKS default: 1 iff USE_LOCKS and on x86 using gcc or MSC + If true, uses custom spin locks for locking. This is currently + supported only for x86 platforms using gcc or recent MS compilers. + Otherwise, posix locks or win32 critical sections are used. + +FOOTERS default: 0 + If true, provide extra checking and dispatching by placing + information in the footers of allocated chunks. This adds + space and time overhead. + +INSECURE default: 0 + If true, omit checks for usage errors and heap space overwrites. + +USE_DL_PREFIX default: NOT defined + Causes compiler to prefix all public routines with the string 'dl'. + This can be useful when you only want to use this malloc in one part + of a program, using your regular system malloc elsewhere. + +ABORT default: defined as abort() + Defines how to abort on failed checks. On most systems, a failed + check cannot die with an "assert" or even print an informative + message, because the underlying print routines in turn call malloc, + which will fail again. Generally, the best policy is to simply call + abort(). It's not very useful to do more than this because many + errors due to overwriting will show up as address faults (null, odd + addresses etc) rather than malloc-triggered checks, so will also + abort. Also, most compilers know that abort() does not return, so + can better optimize code conditionally calling it. + +PROCEED_ON_ERROR default: defined as 0 (false) + Controls whether detected bad addresses cause them to bypassed + rather than aborting. If set, detected bad arguments to free and + realloc are ignored. And all bookkeeping information is zeroed out + upon a detected overwrite of freed heap space, thus losing the + ability to ever return it from malloc again, but enabling the + application to proceed. If PROCEED_ON_ERROR is defined, the + static variable malloc_corruption_error_count is compiled in + and can be examined to see if errors have occurred. This option + generates slower code than the default abort policy. + +DEBUG default: NOT defined + The DEBUG setting is mainly intended for people trying to modify + this code or diagnose problems when porting to new platforms. + However, it may also be able to better isolate user errors than just + using runtime checks. The assertions in the check routines spell + out in more detail the assumptions and invariants underlying the + algorithms. The checking is fairly extensive, and will slow down + execution noticeably. Calling malloc_stats or mallinfo with DEBUG + set will attempt to check every non-mmapped allocated and free chunk + in the course of computing the summaries. + +ABORT_ON_ASSERT_FAILURE default: defined as 1 (true) + Debugging assertion failures can be nearly impossible if your + version of the assert macro causes malloc to be called, which will + lead to a cascade of further failures, blowing the runtime stack. + ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), + which will usually make debugging easier. + +MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32 + The action to take before "return 0" when malloc fails to be able to + return memory because there is none available. + +HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES + True if this system supports sbrk or an emulation of it. + +MORECORE default: sbrk + The name of the sbrk-style system routine to call to obtain more + memory. See below for guidance on writing custom MORECORE + functions. The type of the argument to sbrk/MORECORE varies across + systems. It cannot be size_t, because it supports negative + arguments, so it is normally the signed type of the same width as + size_t (sometimes declared as "intptr_t"). It doesn't much matter + though. Internally, we only call it with arguments less than half + the max value of a size_t, which should work across all reasonable + possibilities, although sometimes generating compiler warnings. + +MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE + If true, take advantage of fact that consecutive calls to MORECORE + with positive arguments always return contiguous increasing + addresses. This is true of unix sbrk. It does not hurt too much to + set it true anyway, since malloc copes with non-contiguities. + Setting it false when definitely non-contiguous saves time + and possibly wasted space it would take to discover this though. + +MORECORE_CANNOT_TRIM default: NOT defined + True if MORECORE cannot release space back to the system when given + negative arguments. This is generally necessary only if you are + using a hand-crafted MORECORE function that cannot handle negative + arguments. + +NO_SEGMENT_TRAVERSAL default: 0 + If non-zero, suppresses traversals of memory segments + returned by either MORECORE or CALL_MMAP. This disables + merging of segments that are contiguous, and selectively + releasing them to the OS if unused, but bounds execution times. + +HAVE_MMAP default: 1 (true) + True if this system supports mmap or an emulation of it. If so, and + HAVE_MORECORE is not true, MMAP is used for all system + allocation. If set and HAVE_MORECORE is true as well, MMAP is + primarily used to directly allocate very large blocks. It is also + used as a backup strategy in cases where MORECORE fails to provide + space from system. Note: A single call to MUNMAP is assumed to be + able to unmap memory that may have be allocated using multiple calls + to MMAP, so long as they are adjacent. + +HAVE_MREMAP default: 1 on linux, else 0 + If true realloc() uses mremap() to re-allocate large blocks and + extend or shrink allocation spaces. + +MMAP_CLEARS default: 1 except on WINCE. + True if mmap clears memory so calloc doesn't need to. This is true + for standard unix mmap using /dev/zero and on WIN32 except for WINCE. + +USE_BUILTIN_FFS default: 0 (i.e., not used) + Causes malloc to use the builtin ffs() function to compute indices. + Some compilers may recognize and intrinsify ffs to be faster than the + supplied C version. Also, the case of x86 using gcc is special-cased + to an asm instruction, so is already as fast as it can be, and so + this setting has no effect. Similarly for Win32 under recent MS compilers. + (On most x86s, the asm version is only slightly faster than the C version.) + +malloc_getpagesize default: derive from system includes, or 4096. + The system page size. To the extent possible, this malloc manages + memory from the system in page-size units. This may be (and + usually is) a function rather than a constant. This is ignored + if WIN32, where page size is determined using getSystemInfo during + initialization. This may be several megabytes if ENABLE_LARGE_PAGES + is enabled. + +ENABLE_LARGE_PAGES default: NOT defined + Causes the system page size to be the value of GetLargePageMinimum() + if that function is available (Windows Server 2003/Vista or later). + This allows the use of large page entries in the MMU which can + significantly improve performance in large working set applications + as TLB cache load is reduced by a factor of three. Note that enabling + this option is equal to locking the process' memory in current + implementations of Windows and requires the SE_LOCK_MEMORY_PRIVILEGE + to be held by the process in order to succeed. + +USE_DEV_RANDOM default: 0 (i.e., not used) + Causes malloc to use /dev/random to initialize secure magic seed for + stamping footers. Otherwise, the current time is used. + +NO_MALLINFO default: 0 + If defined, don't compile "mallinfo". This can be a simple way + of dealing with mismatches between system declarations and + those in this file. + +MALLINFO_FIELD_TYPE default: size_t + The type of the fields in the mallinfo struct. This was originally + defined as "int" in SVID etc, but is more usefully defined as + size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set + +REALLOC_ZERO_BYTES_FREES default: not defined + This should be set if a call to realloc with zero bytes should + be the same as a call to free. Some people think it should. Otherwise, + since this malloc returns a unique pointer for malloc(0), so does + realloc(p, 0). + +LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H +LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H +LACKS_STDLIB_H default: NOT defined unless on WIN32 + Define these if your system does not have these header files. + You might need to manually insert some of the declarations they provide. + +DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, + system_info.dwAllocationGranularity in WIN32, + GetLargePageMinimum() if ENABLE_LARGE_PAGES, + otherwise 64K. + Also settable using mallopt(M_GRANULARITY, x) + The unit for allocating and deallocating memory from the system. On + most systems with contiguous MORECORE, there is no reason to + make this more than a page. However, systems with MMAP tend to + either require or encourage larger granularities. You can increase + this value to prevent system allocation functions to be called so + often, especially if they are slow. The value must be at least one + page and must be a power of two. Setting to 0 causes initialization + to either page size or win32 region size. (Note: In previous + versions of malloc, the equivalent of this option was called + "TOP_PAD") + +DEFAULT_GRANULARITY_ALIGNED default: undefined (which means page size) + Whether to enforce alignment when allocating and deallocating memory + from the system i.e. the base address of all allocations will be + aligned to DEFAULT_GRANULARITY if it is set. Note that enabling this carries + some overhead as multiple calls must now be made when probing for a valid + aligned value, however it does greatly ease the checking for whether + a given memory pointer was allocated by this allocator rather than + some other. + +DEFAULT_TRIM_THRESHOLD default: 2MB + Also settable using mallopt(M_TRIM_THRESHOLD, x) + The maximum amount of unused top-most memory to keep before + releasing via malloc_trim in free(). Automatic trimming is mainly + useful in long-lived programs using contiguous MORECORE. Because + trimming via sbrk can be slow on some systems, and can sometimes be + wasteful (in cases where programs immediately afterward allocate + more large chunks) the value should be high enough so that your + overall system performance would improve by releasing this much + memory. As a rough guide, you might set to a value close to the + average size of a process (program) running on your system. + Releasing this much memory would allow such a process to run in + memory. Generally, it is worth tuning trim thresholds when a + program undergoes phases where several large chunks are allocated + and released in ways that can reuse each other's storage, perhaps + mixed with phases where there are no such chunks at all. The trim + value must be greater than page size to have any useful effect. To + disable trimming completely, you can set to MAX_SIZE_T. Note that the trick + some people use of mallocing a huge space and then freeing it at + program startup, in an attempt to reserve system memory, doesn't + have the intended effect under automatic trimming, since that memory + will immediately be returned to the system. + +DEFAULT_MMAP_THRESHOLD default: 256K + Also settable using mallopt(M_MMAP_THRESHOLD, x) + The request size threshold for using MMAP to directly service a + request. Requests of at least this size that cannot be allocated + using already-existing space will be serviced via mmap. (If enough + normal freed space already exists it is used instead.) Using mmap + segregates relatively large chunks of memory so that they can be + individually obtained and released from the host system. A request + serviced through mmap is never reused by any other request (at least + not directly; the system may just so happen to remap successive + requests to the same locations). Segregating space in this way has + the benefits that: Mmapped space can always be individually released + back to the system, which helps keep the system level memory demands + of a long-lived program low. Also, mapped memory doesn't become + `locked' between other chunks, as can happen with normally allocated + chunks, which means that even trimming via malloc_trim would not + release them. However, it has the disadvantage that the space + cannot be reclaimed, consolidated, and then used to service later + requests, as happens with normal chunks. The advantages of mmap + nearly always outweigh disadvantages for "large" chunks, but the + value of "large" may vary across systems. The default is an + empirically derived value that works well in most systems. You can + disable mmap by setting to MAX_SIZE_T. + +MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP + The number of consolidated frees between checks to release + unused segments when freeing. When using non-contiguous segments, + especially with multiple mspaces, checking only for topmost space + doesn't always suffice to trigger trimming. To compensate for this, + free() will, with a period of MAX_RELEASE_CHECK_RATE (or the + current number of segments, if greater) try to release unused + segments to the OS when freeing chunks that result in + consolidation. The best value for this parameter is a compromise + between slowing down frees with relatively costly checks that + rarely trigger versus holding on to unused memory. To effectively + disable, set to MAX_SIZE_T. This may lead to a very slight speed + improvement at the expense of carrying around more memory. +*/ + +/* Version identifier to allow people to support multiple versions */ +#ifndef DLMALLOC_VERSION +#define DLMALLOC_VERSION 20804 +#endif /* DLMALLOC_VERSION */ + +#ifndef WIN32 +#ifdef _WIN32 +#define WIN32 1 +#endif /* _WIN32 */ +#ifdef _WIN32_WCE +#define LACKS_FCNTL_H +#define WIN32 1 +#endif /* _WIN32_WCE */ +#endif /* WIN32 */ +#ifdef WIN32 +#define WIN32_LEAN_AND_MEAN +#include <windows.h> +#include <tchar.h> +#define HAVE_MMAP 1 +#define HAVE_MORECORE 0 +#define LACKS_UNISTD_H +#define LACKS_SYS_PARAM_H +#define LACKS_SYS_MMAN_H +#define LACKS_STRING_H +#define LACKS_STRINGS_H +#define LACKS_SYS_TYPES_H +#define LACKS_ERRNO_H +#ifndef MALLOC_FAILURE_ACTION +#define MALLOC_FAILURE_ACTION +#endif /* MALLOC_FAILURE_ACTION */ +#ifdef _WIN32_WCE /* WINCE reportedly does not clear */ +#define MMAP_CLEARS 0 +#else +#define MMAP_CLEARS 1 +#endif /* _WIN32_WCE */ +#endif /* WIN32 */ + +#if defined(DARWIN) || defined(_DARWIN) +/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */ +#ifndef HAVE_MORECORE +#define HAVE_MORECORE 0 +#define HAVE_MMAP 1 +/* OSX allocators provide 16 byte alignment */ +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT ((size_t)16U) +#endif +#endif /* HAVE_MORECORE */ +#endif /* DARWIN */ + +#ifndef LACKS_SYS_TYPES_H +#include <sys/types.h> /* For size_t */ +#endif /* LACKS_SYS_TYPES_H */ + +#if (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310) +#define SPIN_LOCKS_AVAILABLE 1 +#else +#define SPIN_LOCKS_AVAILABLE 0 +#endif + +/* The maximum possible size_t value has all bits set */ +#define MAX_SIZE_T (~(size_t)0) + +#ifndef ONLY_MSPACES +#define ONLY_MSPACES 0 /* define to a value */ +#else +#define ONLY_MSPACES 1 +#endif /* ONLY_MSPACES */ +#ifndef MSPACES +#if ONLY_MSPACES +#define MSPACES 1 +#else /* ONLY_MSPACES */ +#define MSPACES 0 +#endif /* ONLY_MSPACES */ +#endif /* MSPACES */ +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT ((size_t)8U) +#endif /* MALLOC_ALIGNMENT */ +#ifndef FOOTERS +#define FOOTERS 0 +#endif /* FOOTERS */ +#ifndef ABORT +#define ABORT abort() +#endif /* ABORT */ +#ifndef ABORT_ON_ASSERT_FAILURE +#define ABORT_ON_ASSERT_FAILURE 1 +#endif /* ABORT_ON_ASSERT_FAILURE */ +#ifndef PROCEED_ON_ERROR +#define PROCEED_ON_ERROR 0 +#endif /* PROCEED_ON_ERROR */ +#ifndef USE_LOCKS +#define USE_LOCKS 0 +#endif /* USE_LOCKS */ +#ifndef USE_SPIN_LOCKS +#if USE_LOCKS && SPIN_LOCKS_AVAILABLE +#define USE_SPIN_LOCKS 1 +#else +#define USE_SPIN_LOCKS 0 +#endif /* USE_LOCKS && SPIN_LOCKS_AVAILABLE. */ +#endif /* USE_SPIN_LOCKS */ +#ifndef INSECURE +#define INSECURE 0 +#endif /* INSECURE */ +#ifndef HAVE_MMAP +#define HAVE_MMAP 1 +#endif /* HAVE_MMAP */ +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 1 +#endif /* MMAP_CLEARS */ +#ifndef HAVE_MREMAP +#ifdef linux +#define HAVE_MREMAP 1 +#else /* linux */ +#define HAVE_MREMAP 0 +#endif /* linux */ +#endif /* HAVE_MREMAP */ +#ifndef MALLOC_FAILURE_ACTION +#define MALLOC_FAILURE_ACTION errno = ENOMEM; +#endif /* MALLOC_FAILURE_ACTION */ +#ifndef HAVE_MORECORE +#if ONLY_MSPACES +#define HAVE_MORECORE 0 +#else /* ONLY_MSPACES */ +#define HAVE_MORECORE 1 +#endif /* ONLY_MSPACES */ +#endif /* HAVE_MORECORE */ +#if !HAVE_MORECORE +#define MORECORE_CONTIGUOUS 0 +#else /* !HAVE_MORECORE */ +#define MORECORE_DEFAULT sbrk +#ifndef MORECORE_CONTIGUOUS +#define MORECORE_CONTIGUOUS 1 +#endif /* MORECORE_CONTIGUOUS */ +#endif /* HAVE_MORECORE */ +#ifndef DEFAULT_GRANULARITY +#if (MORECORE_CONTIGUOUS || defined(WIN32)) +#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ +#else /* MORECORE_CONTIGUOUS */ +#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) +#endif /* MORECORE_CONTIGUOUS */ +#endif /* DEFAULT_GRANULARITY */ +#ifndef DEFAULT_TRIM_THRESHOLD +#ifndef MORECORE_CANNOT_TRIM +#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) +#else /* MORECORE_CANNOT_TRIM */ +#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T +#endif /* MORECORE_CANNOT_TRIM */ +#endif /* DEFAULT_TRIM_THRESHOLD */ +#ifndef DEFAULT_MMAP_THRESHOLD +#if HAVE_MMAP +#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) +#else /* HAVE_MMAP */ +#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T +#endif /* HAVE_MMAP */ +#endif /* DEFAULT_MMAP_THRESHOLD */ +#ifndef MAX_RELEASE_CHECK_RATE +#if HAVE_MMAP +#define MAX_RELEASE_CHECK_RATE 4095 +#else +#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T +#endif /* HAVE_MMAP */ +#endif /* MAX_RELEASE_CHECK_RATE */ +#ifndef USE_BUILTIN_FFS +#define USE_BUILTIN_FFS 0 +#endif /* USE_BUILTIN_FFS */ +#ifndef USE_DEV_RANDOM +#define USE_DEV_RANDOM 0 +#endif /* USE_DEV_RANDOM */ +#ifndef NO_MALLINFO +#define NO_MALLINFO 0 +#endif /* NO_MALLINFO */ +#ifndef MALLINFO_FIELD_TYPE +#define MALLINFO_FIELD_TYPE size_t +#endif /* MALLINFO_FIELD_TYPE */ +#ifndef NO_SEGMENT_TRAVERSAL +#define NO_SEGMENT_TRAVERSAL 0 +#endif /* NO_SEGMENT_TRAVERSAL */ + +/* + mallopt tuning options. SVID/XPG defines four standard parameter + numbers for mallopt, normally defined in malloc.h. None of these + are used in this malloc, so setting them has no effect. But this + malloc does support the following options. +*/ + +#define M_TRIM_THRESHOLD (-1) +#define M_GRANULARITY (-2) +#define M_MMAP_THRESHOLD (-3) + +/* ------------------------ Mallinfo declarations ------------------------ */ + +#if !NO_MALLINFO +/* + This version of malloc supports the standard SVID/XPG mallinfo + routine that returns a struct containing usage properties and + statistics. It should work on any system that has a + /usr/include/malloc.h defining struct mallinfo. The main + declaration needed is the mallinfo struct that is returned (by-copy) + by mallinfo(). The malloinfo struct contains a bunch of fields that + are not even meaningful in this version of malloc. These fields are + are instead filled by mallinfo() with other numbers that might be of + interest. + + HAVE_USR_INCLUDE_MALLOC_H should be set if you have a + /usr/include/malloc.h file that includes a declaration of struct + mallinfo. If so, it is included; else a compliant version is + declared below. These must be precisely the same for mallinfo() to + work. The original SVID version of this struct, defined on most + systems with mallinfo, declares all fields as ints. But some others + define as unsigned long. If your system defines the fields using a + type of different width than listed here, you MUST #include your + system version and #define HAVE_USR_INCLUDE_MALLOC_H. +*/ + +/* #define HAVE_USR_INCLUDE_MALLOC_H */ + +#ifdef HAVE_USR_INCLUDE_MALLOC_H +#include "/usr/include/malloc.h" +#else /* HAVE_USR_INCLUDE_MALLOC_H */ +#ifndef STRUCT_MALLINFO_DECLARED +#define STRUCT_MALLINFO_DECLARED 1 +struct mallinfo { + MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ + MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ + MALLINFO_FIELD_TYPE smblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblks; /* always 0 */ + MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ + MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ + MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ + MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ + MALLINFO_FIELD_TYPE fordblks; /* total free space */ + MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ +}; +#endif /* STRUCT_MALLINFO_DECLARED */ +#endif /* HAVE_USR_INCLUDE_MALLOC_H */ +#endif /* NO_MALLINFO */ + +/* + Try to persuade compilers to inline. The most critical functions for + inlining are defined as macros, so these aren't used for them. +*/ + +#ifndef FORCEINLINE + #if defined(__GNUC__) +#define FORCEINLINE __inline __attribute__ ((always_inline)) + #elif defined(_MSC_VER) + #define FORCEINLINE __forceinline + #endif +#endif +#ifndef NOINLINE + #if defined(__GNUC__) + #define NOINLINE __attribute__ ((noinline)) + #elif defined(_MSC_VER) + #define NOINLINE __declspec(noinline) + #else + #define NOINLINE + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#ifndef FORCEINLINE + #define FORCEINLINE inline +#endif +#endif /* __cplusplus */ +#ifndef FORCEINLINE + #define FORCEINLINE +#endif + +#if !ONLY_MSPACES + +/* ------------------- Declarations of public routines ------------------- */ + +#ifndef USE_DL_PREFIX +#define dlcalloc calloc +#define dlfree free +#define dlmalloc malloc +#define dlmemalign memalign +#define dlrealloc realloc +#define dlvalloc valloc +#define dlpvalloc pvalloc +#define dlmallinfo mallinfo +#define dlmallopt mallopt +#define dlmalloc_trim malloc_trim +#define dlmalloc_stats malloc_stats +#define dlmalloc_usable_size malloc_usable_size +#define dlmalloc_footprint malloc_footprint +#define dlmalloc_max_footprint malloc_max_footprint +#define dlindependent_calloc independent_calloc +#define dlindependent_comalloc independent_comalloc +#endif /* USE_DL_PREFIX */ + + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or + null if no space is available, in which case errno is set to ENOMEM + on ANSI C systems. + + If n is zero, malloc returns a minimum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 32 bytes on 64bit + systems.) Note that size_t is an unsigned type, so calls with + arguments that would be negative if signed are interpreted as + requests for huge amounts of space, which will often fail. The + maximum supported value of n differs across systems, but is in all + cases less than the maximum representable value of a size_t. +*/ +void* dlmalloc(size_t); + +/* + free(void* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. If p was not malloced or already + freed, free(p) will by default cause the current program to abort. +*/ +void dlfree(void*); + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +void* dlcalloc(size_t, size_t); + +/* + realloc(void* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p in most cases when possible, otherwise it + employs the equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. realloc with a size + argument of zero (re)allocates a minimum-sized chunk. + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ + +void* dlrealloc(void*, size_t); + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ +void* dlmemalign(size_t, size_t); + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ +void* dlvalloc(size_t); + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. To workaround the fact that mallopt is specified to use int, + not size_t parameters, the value -1 is specially treated as the + maximum unsigned size_t value. + + SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. None of these are use in this malloc, + so setting them has no effect. But this malloc also supports other + options in mallopt. See below for details. Briefly, supported + parameters are as follows (listed defaults are for "typical" + configurations). + + Symbol param # default allowed param values + M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables) + M_GRANULARITY -2 page size any power of 2 >= page size + M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) +*/ +int dlmallopt(int, int); + +/* + malloc_footprint(); + Returns the number of bytes obtained from the system. The total + number of bytes allocated by malloc, realloc etc., is less than this + value. Unlike mallinfo, this function returns only a precomputed + result, so can be called frequently to monitor memory consumption. + Even if locks are otherwise defined, this function does not use them, + so results might not be up to date. +*/ +size_t dlmalloc_footprint(void); + +/* + malloc_max_footprint(); + Returns the maximum number of bytes obtained from the system. This + value will be greater than current footprint if deallocated space + has been reclaimed by the system. The peak number of bytes allocated + by malloc, realloc etc., is less than this value. Unlike mallinfo, + this function returns only a precomputed result, so can be called + frequently to monitor memory consumption. Even if locks are + otherwise defined, this function does not use them, so results might + not be up to date. +*/ +size_t dlmalloc_max_footprint(void); + +#if !NO_MALLINFO +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: always zero. + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: always zero + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. +*/ +struct mallinfo dlmallinfo(void); +#endif /* NO_MALLINFO */ + +/* + independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use regular calloc and assign pointers into this + space to represent elements. (In this case though, you cannot + independently free elements.) + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +void** dlindependent_calloc(size_t, size_t, void**); + +/* + independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use a single regular malloc, and assign pointers at + particular offsets in the aggregate space. (In this case though, you + cannot independently free elements.) + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +void** dlindependent_comalloc(size_t, size_t*, void**); + + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +void* dlpvalloc(size_t); + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative arguments + to sbrk) if there is unused memory at the `high' end of the malloc + pool or in unused MMAP segments. You can call this after freeing + large blocks of memory to potentially reduce the system-level memory + requirements of a program. However, it cannot guarantee to reduce + memory. Under some allocation patterns, some large free blocks of + memory will be locked between two used chunks, so they cannot be + given back to the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, only + the minimum amount of memory to maintain internal data structures + will be left. Non-zero arguments can be supplied to maintain enough + trailing space to service future expected allocations without having + to re-obtain memory from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. +*/ +int dlmalloc_trim(size_t); + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. +*/ +void dlmalloc_stats(void); + +#endif /* ONLY_MSPACES */ + +/* + malloc_usable_size(void* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); +*/ +size_t dlmalloc_usable_size(void*); + + +#if MSPACES + +/* + mspace is an opaque type representing an independent + region of space that supports mspace_malloc, etc. +*/ +typedef void* mspace; + +/* + create_mspace creates and returns a new independent space with the + given initial capacity, or, if 0, the default granularity size. It + returns null if there is no system memory available to create the + space. If argument locked is non-zero, the space uses a separate + lock to control access. The capacity of the space will grow + dynamically as needed to service mspace_malloc requests. You can + control the sizes of incremental increases of this space by + compiling with a different DEFAULT_GRANULARITY or dynamically + setting with mallopt(M_GRANULARITY, value). +*/ +mspace create_mspace(size_t capacity, int locked); + +/* + destroy_mspace destroys the given space, and attempts to return all + of its memory back to the system, returning the total number of + bytes freed. After destruction, the results of access to all memory + used by the space become undefined. +*/ +size_t destroy_mspace(mspace msp); + +/* + create_mspace_with_base uses the memory supplied as the initial base + of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this + space is used for bookkeeping, so the capacity must be at least this + large. (Otherwise 0 is returned.) When this initial space is + exhausted, additional memory will be obtained from the system. + Destroying this space will deallocate all additionally allocated + space (if possible) but not the initial base. +*/ +mspace create_mspace_with_base(void* base, size_t capacity, int locked); + +/* + mspace_track_large_chunks controls whether requests for large chunks + are allocated in their own untracked mmapped regions, separate from + others in this mspace. By default large chunks are not tracked, + which reduces fragmentation. However, such chunks are not + necessarily released to the system upon destroy_mspace. Enabling + tracking by setting to true may increase fragmentation, but avoids + leakage when relying on destroy_mspace to release all memory + allocated using this space. The function returns the previous + setting. +*/ +int mspace_track_large_chunks(mspace msp, int enable); + + +/* + mspace_malloc behaves as malloc, but operates within + the given space. +*/ +void* mspace_malloc(mspace msp, size_t bytes); + +/* + mspace_free behaves as free, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_free is not actually needed. + free may be called instead of mspace_free because freed chunks from + any space are handled by their originating spaces. +*/ +void mspace_free(mspace msp, void* mem); + +/* + mspace_realloc behaves as realloc, but operates within + the given space. + + If compiled with FOOTERS==1, mspace_realloc is not actually + needed. realloc may be called instead of mspace_realloc because + realloced chunks from any space are handled by their originating + spaces. +*/ +void* mspace_realloc(mspace msp, void* mem, size_t newsize); + +/* + mspace_calloc behaves as calloc, but operates within + the given space. +*/ +void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); + +/* + mspace_memalign behaves as memalign, but operates within + the given space. +*/ +void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); + +/* + mspace_independent_calloc behaves as independent_calloc, but + operates within the given space. +*/ +void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]); + +/* + mspace_independent_comalloc behaves as independent_comalloc, but + operates within the given space. +*/ +void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]); + +/* + mspace_footprint() returns the number of bytes obtained from the + system for this space. +*/ +size_t mspace_footprint(mspace msp); + +/* + mspace_max_footprint() returns the peak number of bytes obtained from the + system for this space. +*/ +size_t mspace_max_footprint(mspace msp); + + +#if !NO_MALLINFO +/* + mspace_mallinfo behaves as mallinfo, but reports properties of + the given space. +*/ +struct mallinfo mspace_mallinfo(mspace msp); +#endif /* NO_MALLINFO */ + +/* + malloc_usable_size(void* p) behaves the same as malloc_usable_size; +*/ + size_t mspace_usable_size(void* mem); + +/* + mspace_malloc_stats behaves as malloc_stats, but reports + properties of the given space. +*/ +void mspace_malloc_stats(mspace msp); + +/* + mspace_trim behaves as malloc_trim, but + operates within the given space. +*/ +int mspace_trim(mspace msp, size_t pad); + +/* + An alias for mallopt. +*/ +int mspace_mallopt(int, int); + +#endif /* MSPACES */ + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif /* __cplusplus */ + +/* + ======================================================================== + To make a fully customizable malloc.h header file, cut everything + above this line, put into file malloc.h, edit to suit, and #include it + on the next line, as well as in programs that use this malloc. + ======================================================================== +*/ + +/* #include "malloc.h" */ + +/*------------------------------ internal #includes ---------------------- */ + +#ifdef WIN32 +#pragma warning( disable : 4146 ) /* no "unsigned" warnings */ +#endif /* WIN32 */ + +#include <stdio.h> /* for printing in malloc_stats */ + +#ifndef LACKS_ERRNO_H +#include <errno.h> /* for MALLOC_FAILURE_ACTION */ +#endif /* LACKS_ERRNO_H */ +#if FOOTERS || DEBUG +#include <time.h> /* for magic initialization */ +#endif /* FOOTERS */ +#ifndef LACKS_STDLIB_H +#include <stdlib.h> /* for abort() */ +#endif /* LACKS_STDLIB_H */ +#ifdef DEBUG +#if ABORT_ON_ASSERT_FAILURE +#undef assert +#define assert(x) if(!(x)) ABORT +#else /* ABORT_ON_ASSERT_FAILURE */ +#include <assert.h> +#endif /* ABORT_ON_ASSERT_FAILURE */ +#else /* DEBUG */ +#ifndef assert +#define assert(x) +#endif +#define DEBUG 0 +#endif /* DEBUG */ +#ifndef LACKS_STRING_H +#include <string.h> /* for memset etc */ +#endif /* LACKS_STRING_H */ +#if USE_BUILTIN_FFS +#ifndef LACKS_STRINGS_H +#include <strings.h> /* for ffs */ +#endif /* LACKS_STRINGS_H */ +#endif /* USE_BUILTIN_FFS */ +#if HAVE_MMAP +#ifndef LACKS_SYS_MMAN_H +/* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */ +#if (defined(linux) && !defined(__USE_GNU)) +#define __USE_GNU 1 +#include <sys/mman.h> /* for mmap */ +#undef __USE_GNU +#else +#include <sys/mman.h> /* for mmap */ +#endif /* linux */ +#endif /* LACKS_SYS_MMAN_H */ +#ifndef LACKS_FCNTL_H +#include <fcntl.h> +#endif /* LACKS_FCNTL_H */ +#endif /* HAVE_MMAP */ +#ifndef LACKS_UNISTD_H +#include <unistd.h> /* for sbrk, sysconf */ +#else /* LACKS_UNISTD_H */ +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) +extern void* sbrk(ptrdiff_t); +#endif /* FreeBSD etc */ +#endif /* LACKS_UNISTD_H */ + +/* Declarations for locking */ +#if USE_LOCKS +#ifndef WIN32 +#include <pthread.h> +#if defined (__SVR4) && defined (__sun) /* solaris */ +#include <thread.h> +#endif /* solaris */ +#else +#ifndef _M_AMD64 +/* These are already defined on AMD64 builds */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp); +LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value); +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _M_AMD64 */ +#pragma intrinsic (_InterlockedCompareExchange) +#pragma intrinsic (_InterlockedExchange) +#define interlockedcompareexchange _InterlockedCompareExchange +#define interlockedexchange _InterlockedExchange +#endif /* Win32 */ +#endif /* USE_LOCKS */ + +/* Declarations for bit scanning on win32 */ +#if defined(_MSC_VER) && _MSC_VER>=1300 +#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +unsigned char _BitScanForward(unsigned long *index, unsigned long mask); +unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#define BitScanForward _BitScanForward +#define BitScanReverse _BitScanReverse +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse) +#endif /* BitScanForward */ +#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */ + +#ifndef WIN32 +#ifndef malloc_getpagesize +# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ +# ifndef _SC_PAGE_SIZE +# define _SC_PAGE_SIZE _SC_PAGESIZE +# endif +# endif +# ifdef _SC_PAGE_SIZE +# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) +# else +# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) + extern size_t getpagesize(); +# define malloc_getpagesize getpagesize() +# else +# ifdef WIN32 /* use supplied emulation of getpagesize */ +# define malloc_getpagesize getpagesize() +# else +# ifndef LACKS_SYS_PARAM_H +# include <sys/param.h> +# endif +# ifdef EXEC_PAGESIZE +# define malloc_getpagesize EXEC_PAGESIZE +# else +# ifdef NBPG +# ifndef CLSIZE +# define malloc_getpagesize NBPG +# else +# define malloc_getpagesize (NBPG * CLSIZE) +# endif +# else +# ifdef NBPC +# define malloc_getpagesize NBPC +# else +# ifdef PAGESIZE +# define malloc_getpagesize PAGESIZE +# else /* just guess */ +# define malloc_getpagesize ((size_t)4096U) +# endif +# endif +# endif +# endif +# endif +# endif +# endif +#endif +#endif + + + +/* ------------------- size_t and alignment properties -------------------- */ + +/* The byte and bit size of a size_t */ +#define SIZE_T_SIZE (sizeof(size_t)) +#define SIZE_T_BITSIZE (sizeof(size_t) << 3) + +/* Some constants coerced to size_t */ +/* Annoying but necessary to avoid errors on some platforms */ +#define SIZE_T_ZERO ((size_t)0) +#define SIZE_T_ONE ((size_t)1) +#define SIZE_T_TWO ((size_t)2) +#define SIZE_T_FOUR ((size_t)4) +#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) +#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) +#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) +#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U) + +/* The bit mask value corresponding to MALLOC_ALIGNMENT */ +#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) + +/* True if address a has acceptable alignment */ +#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0) + +/* the number of bytes to offset an address to align it */ +#define align_offset(A)\ + ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ + ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) + +/* + malloc_params holds global properties, including those that can be + dynamically set using mallopt. There is a single instance, mparams, + initialized in init_mparams. Note that the non-zeroness of "magic" + also serves as an initialization flag. +*/ +typedef unsigned int flag_t; +struct malloc_params { + volatile size_t magic; + size_t page_size; + size_t granularity; + size_t mmap_threshold; + size_t trim_threshold; + flag_t default_mflags; +}; + +static struct malloc_params mparams; + +/* Ensure mparams initialized */ +#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams()) + +/* -------------------------- MMAP preliminaries ------------------------- */ + +/* + If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and + checks to fail so compiler optimizer can delete code rather than + using so many "#if"s. +*/ + + +/* MORECORE and MMAP must return MFAIL on failure */ +#define MFAIL ((void*)(MAX_SIZE_T)) +#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ + +#if HAVE_MMAP + +#ifndef WIN32 +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +#define MAP_ANONYMOUS MAP_ANON +#endif /* MAP_ANON */ +#ifdef DEFAULT_GRANULARITY_ALIGNED +#define MMAP_IMPL mmap_aligned +static void* lastAlignedmmap; /* Used as a hint */ +static void* mmap_aligned(void *start, size_t length, int prot, int flags, int fd, off_t offset) { + void* baseaddress = 0; + void* ptr = 0; + if(!start) { + baseaddress = lastAlignedmmap; + for(;;) { + if(baseaddress) flags|=MAP_FIXED; + ptr = mmap(baseaddress, length, prot, flags, fd, offset); + if(!ptr) + baseaddress = (void*)((size_t)baseaddress + mparams.granularity); + else if((size_t)ptr & (mparams.granularity - SIZE_T_ONE)) { + munmap(ptr, length); + baseaddress = (void*)(((size_t)ptr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE)); + } + else break; + } + } + else ptr = mmap(start, length, prot, flags, fd, offset); + if(ptr) lastAlignedmmap = (void*)((size_t) ptr + mparams.granularity); + return ptr; +} +#else +#define MMAP_IMPL mmap +#endif /* DEFAULT_GRANULARITY_ALIGNED */ +#define MUNMAP_DEFAULT(a, s) munmap((a), (s)) +#define MMAP_PROT (PROT_READ|PROT_WRITE) +#ifdef MAP_ANONYMOUS +#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) +#define MMAP_DEFAULT(s) MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#else /* MAP_ANONYMOUS */ +/* + Nearly all versions of mmap support MAP_ANONYMOUS, so the following + is unlikely to be needed, but is supplied just in case. +*/ +#define MMAP_FLAGS (MAP_PRIVATE) +static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ +#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ + (dev_zero_fd = open("/dev/zero", O_RDWR), \ + MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ + MMAP_IMPL(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) +#endif /* MAP_ANONYMOUS */ + +#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) + +#else /* WIN32 */ + +/* Win32 MMAP via VirtualAlloc */ +#ifdef DEFAULT_GRANULARITY_ALIGNED +static void* lastWin32mmap; /* Used as a hint */ +#endif /* DEFAULT_GRANULARITY_ALIGNED */ +#ifdef ENABLE_LARGE_PAGES +static int largepagesavailable = 1; +#endif /* ENABLE_LARGE_PAGES */ +static FORCEINLINE void* win32mmap(size_t size) { + void* baseaddress = 0; + void* ptr = 0; +#ifdef ENABLE_LARGE_PAGES + /* Note that large pages are *always* allocated on a large page boundary. + If however granularity is small then don't waste a kernel call if size + isn't around the size of a large page */ + if(largepagesavailable && size >= 1*1024*1024) { + ptr = VirtualAlloc(baseaddress, size, MEM_RESERVE|MEM_COMMIT|MEM_LARGE_PAGES, PAGE_READWRITE); + if(!ptr && ERROR_PRIVILEGE_NOT_HELD==GetLastError()) largepagesavailable=0; + } +#endif + if(!ptr) { +#ifdef DEFAULT_GRANULARITY_ALIGNED + /* We try to avoid overhead by speculatively reserving at aligned + addresses until we succeed */ + baseaddress = lastWin32mmap; + for(;;) { + void* reserveaddr = VirtualAlloc(baseaddress, size, MEM_RESERVE, PAGE_READWRITE); + if(!reserveaddr) + baseaddress = (void*)((size_t)baseaddress + mparams.granularity); + else if((size_t)reserveaddr & (mparams.granularity - SIZE_T_ONE)) { + VirtualFree(reserveaddr, 0, MEM_RELEASE); + baseaddress = (void*)(((size_t)reserveaddr + mparams.granularity) & ~(mparams.granularity - SIZE_T_ONE)); + } + else break; + } +#endif + if(!ptr) ptr = VirtualAlloc(baseaddress, size, baseaddress ? MEM_COMMIT : MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); +#if DEBUG + if(lastWin32mmap && ptr!=lastWin32mmap) printf("Non-contiguous VirtualAlloc between %p and %p\n", ptr, lastWin32mmap); +#endif +#ifdef DEFAULT_GRANULARITY_ALIGNED + if(ptr) lastWin32mmap = (void*)((size_t) ptr + mparams.granularity); +#endif + } +#if DEBUG +#ifdef ENABLE_LARGE_PAGES + printf("VirtualAlloc returns %p size %u. LargePagesAvailable=%d\n", ptr, size, largepagesavailable); +#else + printf("VirtualAlloc returns %p size %u\n", ptr, size); +#endif +#endif + return (ptr != 0)? ptr: MFAIL; +} + +/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ +static FORCEINLINE void* win32direct_mmap(size_t size) { + void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, + PAGE_READWRITE); + return (ptr != 0)? ptr: MFAIL; +} + +/* This function supports releasing coalesed segments */ +static FORCEINLINE int win32munmap(void* ptr, size_t size) { + MEMORY_BASIC_INFORMATION minfo; + char* cptr = (char*)ptr; + while (size) { + if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) + return -1; + if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || + minfo.State != MEM_COMMIT || minfo.RegionSize > size) + return -1; + if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) + return -1; + cptr += minfo.RegionSize; + size -= minfo.RegionSize; + } + return 0; +} + +#define MMAP_DEFAULT(s) win32mmap(s) +#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s)) +#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s) +#endif /* WIN32 */ +#endif /* HAVE_MMAP */ + +#if HAVE_MREMAP +#ifndef WIN32 +#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) +#endif /* WIN32 */ +#endif /* HAVE_MREMAP */ + + +/** + * Define CALL_MORECORE + */ +#if HAVE_MORECORE + #ifdef MORECORE + #define CALL_MORECORE(S) MORECORE(S) + #else /* MORECORE */ + #define CALL_MORECORE(S) MORECORE_DEFAULT(S) + #endif /* MORECORE */ +#else /* HAVE_MORECORE */ + #define CALL_MORECORE(S) MFAIL +#endif /* HAVE_MORECORE */ + +/** + * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP + */ +#if HAVE_MMAP + #define USE_MMAP_BIT (SIZE_T_ONE) + + #ifdef MMAP + #define CALL_MMAP(s) MMAP(s) + #else /* MMAP */ + #define CALL_MMAP(s) MMAP_DEFAULT(s) + #endif /* MMAP */ + #ifdef MUNMAP + #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) + #else /* MUNMAP */ + #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s)) + #endif /* MUNMAP */ + #ifdef DIRECT_MMAP + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) + #else /* DIRECT_MMAP */ + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s) + #endif /* DIRECT_MMAP */ +#else /* HAVE_MMAP */ + #define USE_MMAP_BIT (SIZE_T_ZERO) + + #define MMAP(s) MFAIL + #define MUNMAP(a, s) (-1) + #define DIRECT_MMAP(s) MFAIL + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) + #define CALL_MMAP(s) MMAP(s) + #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) +#endif /* HAVE_MMAP */ + +/** + * Define CALL_MREMAP + */ +#if HAVE_MMAP && HAVE_MREMAP + #ifdef MREMAP + #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv)) + #else /* MREMAP */ + #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv)) + #endif /* MREMAP */ +#else /* HAVE_MMAP && HAVE_MREMAP */ + #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL +#endif /* HAVE_MMAP && HAVE_MREMAP */ + +/* mstate bit set if continguous morecore disabled or failed */ +#define USE_NONCONTIGUOUS_BIT (4U) + +/* segment bit set in create_mspace_with_base */ +#define EXTERN_BIT (8U) + + +/* --------------------------- Lock preliminaries ------------------------ */ + +/* + When locks are defined, there is one global lock, plus + one per-mspace lock. + + The global lock_ensures that mparams.magic and other unique + mparams values are initialized only once. It also protects + sequences of calls to MORECORE. In many cases sys_alloc requires + two calls, that should not be interleaved with calls by other + threads. This does not protect against direct calls to MORECORE + by other threads not using this lock, so there is still code to + cope the best we can on interference. + + Per-mspace locks surround calls to malloc, free, etc. To enable use + in layered extensions, per-mspace locks are reentrant. + + Because lock-protected regions generally have bounded times, it is + OK to use the supplied simple spinlocks in the custom versions for + x86. Spinlocks are likely to improve performance for lightly + contended applications, but worsen performance under heavy + contention. + + If USE_LOCKS is > 1, the definitions of lock routines here are + bypassed, in which case you will need to define the type MLOCK_T, + and at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly + TRY_LOCK (which is not used in this malloc, but commonly needed in + extensions.) You must also declare a + static MLOCK_T malloc_global_mutex = { initialization values };. + +*/ + +#if USE_LOCKS == 1 + +#if USE_SPIN_LOCKS && SPIN_LOCKS_AVAILABLE +#ifndef WIN32 + +/* Custom pthread-style spin locks on x86 and x64 for gcc */ +struct pthread_mlock_t { + volatile unsigned int l; + char cachelinepadding[64]; + unsigned int c; + pthread_t threadid; +}; +#define MLOCK_T struct pthread_mlock_t +#define CURRENT_THREAD pthread_self() +#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0) +#define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl) +#define RELEASE_LOCK(sl) pthread_release_lock(sl) +#define TRY_LOCK(sl) pthread_try_lock(sl) +#define SPINS_PER_YIELD 63 + +static MLOCK_T malloc_global_mutex = { 0, "", 0, 0}; + +static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) { + int spins = 0; + volatile unsigned int* lp = &sl->l; + for (;;) { + if (*lp != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 0; + } + } + else { + /* place args to cmpxchgl in locals to evade oddities in some gccs */ + int cmp = 0; + int val = 1; + int ret; + __asm__ __volatile__ ("lock; cmpxchgl %1, %2" + : "=a" (ret) + : "r" (val), "m" (*(lp)), "0"(cmp) + : "memory", "cc"); + if (!ret) { + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 0; + } + } + if ((++spins & SPINS_PER_YIELD) == 0) { +#if defined (__SVR4) && defined (__sun) /* solaris */ + thr_yield(); +#else +#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) + sched_yield(); +#else /* no-op yield on unknown systems */ + ; +#endif /* __linux__ || __FreeBSD__ || __APPLE__ */ +#endif /* solaris */ + } + } +} + +static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) { + volatile unsigned int* lp = &sl->l; + assert(*lp != 0); + assert(sl->threadid == CURRENT_THREAD); + if (--sl->c == 0) { + sl->threadid = 0; + int prev = 0; + int ret; + __asm__ __volatile__ ("lock; xchgl %0, %1" + : "=r" (ret) + : "m" (*(lp)), "0"(prev) + : "memory"); + } +} + +static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) { + volatile unsigned int* lp = &sl->l; + if (*lp != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 1; + } + } + else { + int cmp = 0; + int val = 1; + int ret; + __asm__ __volatile__ ("lock; cmpxchgl %1, %2" + : "=a" (ret) + : "r" (val), "m" (*(lp)), "0"(cmp) + : "memory", "cc"); + if (!ret) { + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 1; + } + } + return 0; +} + + +#else /* WIN32 */ +/* Custom win32-style spin locks on x86 and x64 for MSC */ +struct win32_mlock_t { + volatile long l; + char cachelinepadding[64]; + unsigned int c; + long threadid; +}; + +#define MLOCK_T struct win32_mlock_t +#define CURRENT_THREAD ((long)GetCurrentThreadId()) +#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0) +#define ACQUIRE_LOCK(sl) win32_acquire_lock(sl) +#define RELEASE_LOCK(sl) win32_release_lock(sl) +#define TRY_LOCK(sl) win32_try_lock(sl) +#define SPINS_PER_YIELD 63 + +static MLOCK_T malloc_global_mutex = { 0, 0, 0}; + +static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) { + int spins = 0; + for (;;) { + if (sl->l != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 0; + } + } + else { + if (!interlockedexchange(&sl->l, 1)) { + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 0; + } + } + if ((++spins & SPINS_PER_YIELD) == 0) + SleepEx(0, FALSE); + } +} + +static FORCEINLINE void win32_release_lock (MLOCK_T *sl) { + assert(sl->threadid == CURRENT_THREAD); + assert(sl->l != 0); + if (--sl->c == 0) { + sl->threadid = 0; + interlockedexchange (&sl->l, 0); + } +} + +static FORCEINLINE int win32_try_lock (MLOCK_T *sl) { + if (sl->l != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 1; + } + } + else { + if (!interlockedexchange(&sl->l, 1)){ + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 1; + } + } + return 0; +} + +#endif /* WIN32 */ +#else /* USE_SPIN_LOCKS */ + +#ifndef WIN32 +/* pthreads-based locks */ + +#define MLOCK_T pthread_mutex_t +#define CURRENT_THREAD pthread_self() +#define INITIAL_LOCK(sl) pthread_init_lock(sl) +#define ACQUIRE_LOCK(sl) pthread_mutex_lock(sl) +#define RELEASE_LOCK(sl) pthread_mutex_unlock(sl) +#define TRY_LOCK(sl) (!pthread_mutex_trylock(sl)) + +static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* Cope with old-style linux recursive lock initialization by adding */ +/* skipped internal declaration from pthread.h */ +#ifdef linux +#ifndef PTHREAD_MUTEX_RECURSIVE +extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr, + int __kind)); +#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP +#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y) +#endif +#endif + +static int pthread_init_lock (MLOCK_T *sl) { + pthread_mutexattr_t attr; + if (pthread_mutexattr_init(&attr)) return 1; + if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1; + if (pthread_mutex_init(sl, &attr)) return 1; + if (pthread_mutexattr_destroy(&attr)) return 1; + return 0; +} + +#else /* WIN32 */ +/* Win32 critical sections */ +#define MLOCK_T CRITICAL_SECTION +#define CURRENT_THREAD GetCurrentThreadId() +#define INITIAL_LOCK(s) (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000)) +#define ACQUIRE_LOCK(s) (EnterCriticalSection(sl), 0) +#define RELEASE_LOCK(s) LeaveCriticalSection(sl) +#define TRY_LOCK(s) TryEnterCriticalSection(sl) +#define NEED_GLOBAL_LOCK_INIT + +static MLOCK_T malloc_global_mutex; +static volatile long malloc_global_mutex_status; + +/* Use spin loop to initialize global lock */ +static void init_malloc_global_mutex() { + for (;;) { + long stat = malloc_global_mutex_status; + if (stat > 0) + return; + /* transition to < 0 while initializing, then to > 0) */ + if (stat == 0 && + interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) { + InitializeCriticalSection(&malloc_global_mutex); + interlockedexchange(&malloc_global_mutex_status,1); + return; + } + SleepEx(0, FALSE); + } +} + +#endif /* WIN32 */ +#endif /* USE_SPIN_LOCKS */ +#endif /* USE_LOCKS == 1 */ + +/* ----------------------- User-defined locks ------------------------ */ + +#if USE_LOCKS > 1 +/* Define your own lock implementation here */ +/* #define INITIAL_LOCK(sl) ... */ +/* #define ACQUIRE_LOCK(sl) ... */ +/* #define RELEASE_LOCK(sl) ... */ +/* #define TRY_LOCK(sl) ... */ +/* static MLOCK_T malloc_global_mutex = ... */ +#endif /* USE_LOCKS > 1 */ + +/* ----------------------- Lock-based state ------------------------ */ + +#if USE_LOCKS +#define USE_LOCK_BIT (2U) +#else /* USE_LOCKS */ +#define USE_LOCK_BIT (0U) +#define INITIAL_LOCK(l) +#endif /* USE_LOCKS */ + +#if USE_LOCKS +#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK +#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); +#endif +#ifndef RELEASE_MALLOC_GLOBAL_LOCK +#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); +#endif +#else /* USE_LOCKS */ +#define ACQUIRE_MALLOC_GLOBAL_LOCK() +#define RELEASE_MALLOC_GLOBAL_LOCK() +#endif /* USE_LOCKS */ + + +/* ----------------------- Chunk representations ------------------------ */ + +/* + (The following includes lightly edited explanations by Colin Plumb.) + + The malloc_chunk declaration below is misleading (but accurate and + necessary). It declares a "view" into memory allowing access to + necessary fields at known offsets from a given base. + + Chunks of memory are maintained using a `boundary tag' method as + originally described by Knuth. (See the paper by Paul Wilson + ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such + techniques.) Sizes of free chunks are stored both in the front of + each chunk and at the end. This makes consolidating fragmented + chunks into bigger chunks fast. The head fields also hold bits + representing whether chunks are free or in use. + + Here are some pictures to make it clearer. They are "exploded" to + show that the state of a chunk can be thought of as extending from + the high 31 bits of the head field of its header through the + prev_foot and PINUSE_BIT bit of the following chunk header. + + A chunk that's in use looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk (if P = 0) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 1| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | | + +- -+ + | | + +- -+ + | : + +- size - sizeof(size_t) available payload bytes -+ + : | + chunk-> +- -+ + | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| + | Size of next chunk (may or may not be in use) | +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + And if it's free, it looks like this: + + chunk-> +- -+ + | User payload (must be in use, or we would have merged!) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| + | Size of this chunk 0| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Next pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Prev pointer | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- size - sizeof(struct chunk) unused bytes -+ + : | + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| + | Size of next chunk (must be in use, or we would have merged)| +-+ + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | : + +- User payload -+ + : | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + |0| + +-+ + Note that since we always merge adjacent free chunks, the chunks + adjacent to a free chunk must be in use. + + Given a pointer to a chunk (which can be derived trivially from the + payload pointer) we can, in O(1) time, find out whether the adjacent + chunks are free, and if so, unlink them from the lists that they + are on and merge them with the current chunk. + + Chunks always begin on even word boundaries, so the mem portion + (which is returned to the user) is also on an even word boundary, and + thus at least double-word aligned. + + The P (PINUSE_BIT) bit, stored in the unused low-order bit of the + chunk size (which is always a multiple of two words), is an in-use + bit for the *previous* chunk. If that bit is *clear*, then the + word before the current chunk size contains the previous chunk + size, and can be used to find the front of the previous chunk. + The very first chunk allocated always has this bit set, preventing + access to non-existent (or non-owned) memory. If pinuse is set for + any given chunk, then you CANNOT determine the size of the + previous chunk, and might even get a memory addressing fault when + trying to do so. + + The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of + the chunk size redundantly records whether the current chunk is + inuse (unless the chunk is mmapped). This redundancy enables usage + checks within free and realloc, and reduces indirection when freeing + and consolidating chunks. + + Each freshly allocated chunk must have both cinuse and pinuse set. + That is, each allocated chunk borders either a previously allocated + and still in-use chunk, or the base of its memory arena. This is + ensured by making all allocations from the the `lowest' part of any + found chunk. Further, no free chunk physically borders another one, + so each free chunk is known to be preceded and followed by either + inuse chunks or the ends of memory. + + Note that the `foot' of the current chunk is actually represented + as the prev_foot of the NEXT chunk. This makes it easier to + deal with alignments etc but can be very confusing when trying + to extend or adapt this code. + + The exceptions to all this are + + 1. The special chunk `top' is the top-most available chunk (i.e., + the one bordering the end of available memory). It is treated + specially. Top is never included in any bin, is used only if + no other chunk is available, and is released back to the + system if it is very large (see M_TRIM_THRESHOLD). In effect, + the top chunk is treated as larger (and thus less well + fitting) than any other available chunk. The top chunk + doesn't update its trailing size field since there is no next + contiguous chunk that would have to index off it. However, + space is still allocated for it (TOP_FOOT_SIZE) to enable + separation or merging when space is extended. + + 3. Chunks allocated via mmap, have both cinuse and pinuse bits + cleared in their head fields. Because they are allocated + one-by-one, each must carry its own prev_foot field, which is + also used to hold the offset this chunk has within its mmapped + region, which is needed to preserve alignment. Each mmapped + chunk is trailed by the first two fields of a fake next-chunk + for sake of usage checks. + +*/ + +struct malloc_chunk { + size_t prev_foot; /* Size of previous chunk (if free). */ + size_t head; /* Size and inuse bits. */ + struct malloc_chunk* fd; /* double links -- used only if free. */ + struct malloc_chunk* bk; +}; + +typedef struct malloc_chunk mchunk; +typedef struct malloc_chunk* mchunkptr; +typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */ +typedef unsigned int bindex_t; /* Described below */ +typedef unsigned int binmap_t; /* Described below */ + +/* ------------------- Chunks sizes and alignments ----------------------- */ + +#define MCHUNK_SIZE (sizeof(mchunk)) + +#if FOOTERS +#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +#else /* FOOTERS */ +#define CHUNK_OVERHEAD (SIZE_T_SIZE) +#endif /* FOOTERS */ + +/* MMapped chunks need a second word of overhead ... */ +#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) +/* ... and additional padding for fake next-chunk at foot */ +#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES) + +/* The smallest size we can malloc is an aligned minimal chunk */ +#define MIN_CHUNK_SIZE\ + ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* conversion from malloc headers to user pointers, and back */ +#define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES)) +#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES)) +/* chunk associated with aligned address A */ +#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) + +/* Bounds on request (not chunk) sizes. */ +#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2) +#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) + +/* pad request bytes into a usable size */ +#define pad_request(req) \ + (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) + +/* pad request, checking for minimum (but not maximum) */ +#define request2size(req) \ + (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) + + +/* ------------------ Operations on head and foot fields ----------------- */ + +/* + The head field of a chunk is or'ed with PINUSE_BIT when previous + adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in + use, unless mmapped, in which case both bits are cleared. + + FLAG4_BIT is not used by this malloc, but might be useful in extensions. +*/ + +#define PINUSE_BIT (SIZE_T_ONE) +#define CINUSE_BIT (SIZE_T_TWO) +#define FLAG4_BIT (SIZE_T_FOUR) +#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) +#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT) + +/* Head value for fenceposts */ +#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) + +/* extraction of fields from head words */ +#define cinuse(p) ((p)->head & CINUSE_BIT) +#define pinuse(p) ((p)->head & PINUSE_BIT) +#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT) +#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0) + +#define chunksize(p) ((p)->head & ~(FLAG_BITS)) + +#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) + +/* Treat space at ptr +/- offset as a chunk */ +#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) +#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) + +/* Ptr to next or previous physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS))) +#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) + +/* extract next chunk's pinuse bit */ +#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) + +/* Get/set size at footer */ +#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot) +#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s)) + +/* Set size, pinuse bit, and foot */ +#define set_size_and_pinuse_of_free_chunk(p, s)\ + ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) + +/* Set size, pinuse bit, foot, and clear next pinuse */ +#define set_free_with_pinuse(p, s, n)\ + (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) + +/* Get the internal overhead associated with chunk p */ +#define overhead_for(p)\ + (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) + +/* Return true if malloced space is not necessarily cleared */ +#if MMAP_CLEARS +#define calloc_must_clear(p) (!is_mmapped(p)) +#else /* MMAP_CLEARS */ +#define calloc_must_clear(p) (1) +#endif /* MMAP_CLEARS */ + +/* ---------------------- Overlaid data structures ----------------------- */ + +/* + When chunks are not in use, they are treated as nodes of either + lists or trees. + + "Small" chunks are stored in circular doubly-linked lists, and look + like this: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space (may be 0 bytes long) . + . . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Larger chunks are kept in a form of bitwise digital trees (aka + tries) keyed on chunksizes. Because malloc_tree_chunks are only for + free chunks greater than 256 bytes, their size doesn't impose any + constraints on user chunk sizes. Each node looks like: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk of same size | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to left child (child[0]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to right child (child[1]) | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Pointer to parent | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | bin index of this chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + Each tree holding treenodes is a tree of unique chunk sizes. Chunks + of the same size are arranged in a circularly-linked list, with only + the oldest chunk (the next to be used, in our FIFO ordering) + actually in the tree. (Tree members are distinguished by a non-null + parent pointer.) If a chunk with the same size an an existing node + is inserted, it is linked off the existing node using pointers that + work in the same way as fd/bk pointers of small chunks. + + Each tree contains a power of 2 sized range of chunk sizes (the + smallest is 0x100 <= x < 0x180), which is is divided in half at each + tree level, with the chunks in the smaller half of the range (0x100 + <= x < 0x140 for the top nose) in the left subtree and the larger + half (0x140 <= x < 0x180) in the right subtree. This is, of course, + done by inspecting individual bits. + + Using these rules, each node's left subtree contains all smaller + sizes than its right subtree. However, the node at the root of each + subtree has no particular ordering relationship to either. (The + dividing line between the subtree sizes is based on trie relation.) + If we remove the last chunk of a given size from the interior of the + tree, we need to replace it with a leaf node. The tree ordering + rules permit a node to be replaced by any leaf below it. + + The smallest chunk in a tree (a common operation in a best-fit + allocator) can be found by walking a path to the leftmost leaf in + the tree. Unlike a usual binary tree, where we follow left child + pointers until we reach a null, here we follow the right child + pointer any time the left one is null, until we reach a leaf with + both child pointers null. The smallest chunk in the tree will be + somewhere along that path. + + The worst case number of steps to add, find, or remove a node is + bounded by the number of bits differentiating chunks within + bins. Under current bin calculations, this ranges from 6 up to 21 + (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case + is of course much better. +*/ + +struct malloc_tree_chunk { + /* The first four fields must be compatible with malloc_chunk */ + size_t prev_foot; + size_t head; + struct malloc_tree_chunk* fd; + struct malloc_tree_chunk* bk; + + struct malloc_tree_chunk* child[2]; + struct malloc_tree_chunk* parent; + bindex_t index; +}; + +typedef struct malloc_tree_chunk tchunk; +typedef struct malloc_tree_chunk* tchunkptr; +typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ + +/* A little helper macro for trees */ +#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) + +/* ----------------------------- Segments -------------------------------- */ + +/* + Each malloc space may include non-contiguous segments, held in a + list headed by an embedded malloc_segment record representing the + top-most space. Segments also include flags holding properties of + the space. Large chunks that are directly allocated by mmap are not + included in this list. They are instead independently created and + destroyed without otherwise keeping track of them. + + Segment management mainly comes into play for spaces allocated by + MMAP. Any call to MMAP might or might not return memory that is + adjacent to an existing segment. MORECORE normally contiguously + extends the current space, so this space is almost always adjacent, + which is simpler and faster to deal with. (This is why MORECORE is + used preferentially to MMAP when both are available -- see + sys_alloc.) When allocating using MMAP, we don't use any of the + hinting mechanisms (inconsistently) supported in various + implementations of unix mmap, or distinguish reserving from + committing memory. Instead, we just ask for space, and exploit + contiguity when we get it. It is probably possible to do + better than this on some systems, but no general scheme seems + to be significantly better. + + Management entails a simpler variant of the consolidation scheme + used for chunks to reduce fragmentation -- new adjacent memory is + normally prepended or appended to an existing segment. However, + there are limitations compared to chunk consolidation that mostly + reflect the fact that segment processing is relatively infrequent + (occurring only when getting memory from system) and that we + don't expect to have huge numbers of segments: + + * Segments are not indexed, so traversal requires linear scans. (It + would be possible to index these, but is not worth the extra + overhead and complexity for most programs on most platforms.) + * New segments are only appended to old ones when holding top-most + memory; if they cannot be prepended to others, they are held in + different segments. + + Except for the top-most segment of an mstate, each segment record + is kept at the tail of its segment. Segments are added by pushing + segment records onto the list headed by &mstate.seg for the + containing mstate. + + Segment flags control allocation/merge/deallocation policies: + * If EXTERN_BIT set, then we did not allocate this segment, + and so should not try to deallocate or merge with others. + (This currently holds only for the initial segment passed + into create_mspace_with_base.) + * If USE_MMAP_BIT set, the segment may be merged with + other surrounding mmapped segments and trimmed/de-allocated + using munmap. + * If neither bit is set, then the segment was obtained using + MORECORE so can be merged with surrounding MORECORE'd segments + and deallocated/trimmed using MORECORE with negative arguments. +*/ + +struct malloc_segment { + char* base; /* base address */ + size_t size; /* allocated size */ + struct malloc_segment* next; /* ptr to next segment */ + flag_t sflags; /* mmap and extern flag */ +}; + +#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT) +#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT) + +typedef struct malloc_segment msegment; +typedef struct malloc_segment* msegmentptr; + +/* ---------------------------- malloc_state ----------------------------- */ + +/* + A malloc_state holds all of the bookkeeping for a space. + The main fields are: + + Top + The topmost chunk of the currently active segment. Its size is + cached in topsize. The actual size of topmost space is + topsize+TOP_FOOT_SIZE, which includes space reserved for adding + fenceposts and segment records if necessary when getting more + space from the system. The size at which to autotrim top is + cached from mparams in trim_check, except that it is disabled if + an autotrim fails. + + Designated victim (dv) + This is the preferred chunk for servicing small requests that + don't have exact fits. It is normally the chunk split off most + recently to service another small request. Its size is cached in + dvsize. The link fields of this chunk are not maintained since it + is not kept in a bin. + + SmallBins + An array of bin headers for free chunks. These bins hold chunks + with sizes less than MIN_LARGE_SIZE bytes. Each bin contains + chunks of all the same size, spaced 8 bytes apart. To simplify + use in double-linked lists, each bin header acts as a malloc_chunk + pointing to the real first node, if it exists (else pointing to + itself). This avoids special-casing for headers. But to avoid + waste, we allocate only the fd/bk pointers of bins, and then use + repositioning tricks to treat these as the fields of a chunk. + + TreeBins + Treebins are pointers to the roots of trees holding a range of + sizes. There are 2 equally spaced treebins for each power of two + from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything + larger. + + Bin maps + There is one bit map for small bins ("smallmap") and one for + treebins ("treemap). Each bin sets its bit when non-empty, and + clears the bit when empty. Bit operations are then used to avoid + bin-by-bin searching -- nearly all "search" is done without ever + looking at bins that won't be selected. The bit maps + conservatively use 32 bits per map word, even if on 64bit system. + For a good description of some of the bit-based techniques used + here, see Henry S. Warren Jr's book "Hacker's Delight" (and + supplement at http://hackersdelight.org/). Many of these are + intended to reduce the branchiness of paths through malloc etc, as + well as to reduce the number of memory locations read or written. + + Segments + A list of segments headed by an embedded malloc_segment record + representing the initial space. + + Address check support + The least_addr field is the least address ever obtained from + MORECORE or MMAP. Attempted frees and reallocs of any address less + than this are trapped (unless INSECURE is defined). + + Magic tag + A cross-check field that should always hold same value as mparams.magic. + + Flags + Bits recording whether to use MMAP, locks, or contiguous MORECORE + + Statistics + Each space keeps track of current and maximum system memory + obtained via MORECORE or MMAP. + + Trim support + Fields holding the amount of unused topmost memory that should trigger + timming, and a counter to force periodic scanning to release unused + non-topmost segments. + + Locking + If USE_LOCKS is defined, the "mutex" lock is acquired and released + around every public call using this mspace. + + Extension support + A void* pointer and a size_t field that can be used to help implement + extensions to this malloc. +*/ + +/* Bin types, widths and sizes */ +#define NSMALLBINS (32U) +#define NTREEBINS (32U) +#define SMALLBIN_SHIFT (3U) +#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) +#define TREEBIN_SHIFT (8U) +#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) +#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) +#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) + +struct malloc_state { + binmap_t smallmap; + binmap_t treemap; + size_t dvsize; + size_t topsize; + char* least_addr; + mchunkptr dv; + mchunkptr top; + size_t trim_check; + size_t release_checks; + size_t magic; + mchunkptr smallbins[(NSMALLBINS+1)*2]; + tbinptr treebins[NTREEBINS]; + size_t footprint; + size_t max_footprint; + flag_t mflags; + msegment seg; +#if USE_LOCKS + MLOCK_T mutex; /* locate lock among fields that rarely change */ +#endif /* USE_LOCKS */ + void* extp; /* Unused but available for extensions */ + size_t exts; +}; + +typedef struct malloc_state* mstate; + +/* ------------- Global malloc_state and malloc_params ------------------- */ + +#if !ONLY_MSPACES + +/* The global malloc_state used for all non-"mspace" calls */ +static struct malloc_state _gm_; +#define gm (&_gm_) +#define is_global(M) ((M) == &_gm_) + +#endif /* !ONLY_MSPACES */ + +#define is_initialized(M) ((M)->top != 0) + +/* -------------------------- system alloc setup ------------------------- */ + +/* Operations on mflags */ + +#define use_lock(M) ((M)->mflags & USE_LOCK_BIT) +#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT) +#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT) + +#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT) +#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT) +#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT) + +#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT) +#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT) + +#define set_lock(M,L)\ + ((M)->mflags = (L)?\ + ((M)->mflags | USE_LOCK_BIT) :\ + ((M)->mflags & ~USE_LOCK_BIT)) + +/* page-align a size */ +#define page_align(S)\ + (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE)) + +/* granularity-align a size */ +#define granularity_align(S)\ + (((S) + (mparams.granularity - SIZE_T_ONE))\ + & ~(mparams.granularity - SIZE_T_ONE)) + + +/* For mmap, use granularity alignment on windows, else page-align */ +#ifdef WIN32 +#define mmap_align(S) granularity_align(S) +#else +#define mmap_align(S) page_align(S) +#endif + +/* For sys_alloc, enough padding to ensure can malloc request on success */ +#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT) + +#define is_page_aligned(S)\ + (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) +#define is_granularity_aligned(S)\ + (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0) + +/* True if segment S holds address A */ +#define segment_holds(S, A)\ + ((char*)(A) >= S->base && (char*)(A) < S->base + S->size) + +/* Return segment holding given address */ +static msegmentptr segment_holding(mstate m, char* addr) { + msegmentptr sp = &m->seg; + for (;;) { + if (addr >= sp->base && addr < sp->base + sp->size) + return sp; + if ((sp = sp->next) == 0) + return 0; + } +} + +/* Return true if segment contains a segment link */ +static int has_segment_link(mstate m, msegmentptr ss) { + msegmentptr sp = &m->seg; + for (;;) { + if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size) + return 1; + if ((sp = sp->next) == 0) + return 0; + } +} + +#ifndef MORECORE_CANNOT_TRIM +#define should_trim(M,s) ((s) > (M)->trim_check) +#else /* MORECORE_CANNOT_TRIM */ +#define should_trim(M,s) (0) +#endif /* MORECORE_CANNOT_TRIM */ + +/* + TOP_FOOT_SIZE is padding at the end of a segment, including space + that may be needed to place segment records and fenceposts when new + noncontiguous segments are added. +*/ +#define TOP_FOOT_SIZE\ + (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + + +/* ------------------------------- Hooks -------------------------------- */ + +/* + PREACTION should be defined to return 0 on success, and nonzero on + failure. If you are not using locking, you can redefine these to do + anything you like. +*/ + +#if USE_LOCKS + +#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) +#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } +#else /* USE_LOCKS */ + +#ifndef PREACTION +#define PREACTION(M) (0) +#endif /* PREACTION */ + +#ifndef POSTACTION +#define POSTACTION(M) +#endif /* POSTACTION */ + +#endif /* USE_LOCKS */ + +/* + CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. + USAGE_ERROR_ACTION is triggered on detected bad frees and + reallocs. The argument p is an address that might have triggered the + fault. It is ignored by the two predefined actions, but might be + useful in custom actions that try to help diagnose errors. +*/ + +#if PROCEED_ON_ERROR + +/* A count of the number of corruption errors causing resets */ +int malloc_corruption_error_count; + +/* default corruption action */ +static void reset_on_error(mstate m); + +#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m) +#define USAGE_ERROR_ACTION(m, p) + +#else /* PROCEED_ON_ERROR */ + +#ifndef CORRUPTION_ERROR_ACTION +#define CORRUPTION_ERROR_ACTION(m) ABORT +#endif /* CORRUPTION_ERROR_ACTION */ + +#ifndef USAGE_ERROR_ACTION +#define USAGE_ERROR_ACTION(m,p) ABORT +#endif /* USAGE_ERROR_ACTION */ + +#endif /* PROCEED_ON_ERROR */ + +/* -------------------------- Debugging setup ---------------------------- */ + +#if ! DEBUG + +#define check_free_chunk(M,P) +#define check_inuse_chunk(M,P) +#define check_malloced_chunk(M,P,N) +#define check_mmapped_chunk(M,P) +#define check_malloc_state(M) +#define check_top_chunk(M,P) + +#else /* DEBUG */ +#define check_free_chunk(M,P) do_check_free_chunk(M,P) +#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P) +#define check_top_chunk(M,P) do_check_top_chunk(M,P) +#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N) +#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P) +#define check_malloc_state(M) do_check_malloc_state(M) + +static void do_check_any_chunk(mstate m, mchunkptr p); +static void do_check_top_chunk(mstate m, mchunkptr p); +static void do_check_mmapped_chunk(mstate m, mchunkptr p); +static void do_check_inuse_chunk(mstate m, mchunkptr p); +static void do_check_free_chunk(mstate m, mchunkptr p); +static void do_check_malloced_chunk(mstate m, void* mem, size_t s); +static void do_check_tree(mstate m, tchunkptr t); +static void do_check_treebin(mstate m, bindex_t i); +static void do_check_smallbin(mstate m, bindex_t i); +static void do_check_malloc_state(mstate m); +static int bin_find(mstate m, mchunkptr x); +static size_t traverse_and_check(mstate m); +#endif /* DEBUG */ + +/* ---------------------------- Indexing Bins ---------------------------- */ + +#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) +#define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT) +#define small_index2size(i) ((i) << SMALLBIN_SHIFT) +#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) + +/* addressing by index. See above about smallbin repositioning */ +#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1]))) +#define treebin_at(M,i) (&((M)->treebins[i])) + +/* assign tree index for size S to variable I. Use x86 asm if possible */ +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define compute_tree_index(S, I)\ +{\ + unsigned int X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K;\ + __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "g" (X));\ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#elif defined (__INTEL_COMPILER) +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K = _bit_scan_reverse (X); \ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#elif defined(_MSC_VER) && _MSC_VER>=1300 +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K;\ + _BitScanReverse((DWORD *) &K, (DWORD) X);\ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#else /* GNUC */ +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int Y = (unsigned int)X;\ + unsigned int N = ((Y - 0x100) >> 16) & 8;\ + unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\ + N += K;\ + N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\ + K = 14 - N + ((Y <<= K) >> 15);\ + I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ + }\ +} +#endif /* GNUC */ + +/* Bit representing maximum resolved size in a treebin at i */ +#define bit_for_tree_index(i) \ + (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) + +/* Shift placing maximum resolved bit in a treebin at i as sign bit */ +#define leftshift_for_tree_index(i) \ + ((i == NTREEBINS-1)? 0 : \ + ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) + +/* The size of the smallest chunk held in bin with index i */ +#define minsize_for_tree_index(i) \ + ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ + (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) + + +/* ------------------------ Operations on bin maps ----------------------- */ + +/* bit corresponding to given index */ +#define idx2bit(i) ((binmap_t)(1) << (i)) + +/* Mark/Clear bits with given index */ +#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) +#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) +#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) + +#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) +#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) +#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) + +/* isolate the least set bit of a bitmap */ +#define least_bit(x) ((x) & -(x)) + +/* mask with all bits to left of least bit of x on */ +#define left_bits(x) ((x<<1) | -(x<<1)) + +/* mask with all bits to left of or equal to least bit of x on */ +#define same_or_left_bits(x) ((x) | -(x)) + +/* index corresponding to given bit. Use x86 asm if possible */ + +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "g" (X));\ + I = (bindex_t)J;\ +} + +#elif defined (__INTEL_COMPILER) +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + J = _bit_scan_forward (X); \ + I = (bindex_t)J;\ +} + +#elif defined(_MSC_VER) && _MSC_VER>=1300 +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + _BitScanForward((DWORD *) &J, X);\ + I = (bindex_t)J;\ +} + +#elif USE_BUILTIN_FFS +#define compute_bit2idx(X, I) I = ffs(X)-1 + +#else +#define compute_bit2idx(X, I)\ +{\ + unsigned int Y = X - 1;\ + unsigned int K = Y >> (16-4) & 16;\ + unsigned int N = K; Y >>= K;\ + N += K = Y >> (8-3) & 8; Y >>= K;\ + N += K = Y >> (4-2) & 4; Y >>= K;\ + N += K = Y >> (2-1) & 2; Y >>= K;\ + N += K = Y >> (1-0) & 1; Y >>= K;\ + I = (bindex_t)(N + Y);\ +} +#endif /* GNUC */ + + +/* ----------------------- Runtime Check Support ------------------------- */ + +/* + For security, the main invariant is that malloc/free/etc never + writes to a static address other than malloc_state, unless static + malloc_state itself has been corrupted, which cannot occur via + malloc (because of these checks). In essence this means that we + believe all pointers, sizes, maps etc held in malloc_state, but + check all of those linked or offsetted from other embedded data + structures. These checks are interspersed with main code in a way + that tends to minimize their run-time cost. + + When FOOTERS is defined, in addition to range checking, we also + verify footer fields of inuse chunks, which can be used guarantee + that the mstate controlling malloc/free is intact. This is a + streamlined version of the approach described by William Robertson + et al in "Run-time Detection of Heap-based Overflows" LISA'03 + http://www.usenix.org/events/lisa03/tech/robertson.html The footer + of an inuse chunk holds the xor of its mstate and a random seed, + that is checked upon calls to free() and realloc(). This is + (probablistically) unguessable from outside the program, but can be + computed by any code successfully malloc'ing any chunk, so does not + itself provide protection against code that has already broken + security through some other means. Unlike Robertson et al, we + always dynamically check addresses of all offset chunks (previous, + next, etc). This turns out to be cheaper than relying on hashes. +*/ + +#if !INSECURE +/* Check if address a is at least as high as any from MORECORE or MMAP */ +#define ok_address(M, a) ((char*)(a) >= (M)->least_addr) +/* Check if address of next chunk n is higher than base chunk p */ +#define ok_next(p, n) ((char*)(p) < (char*)(n)) +/* Check if p has inuse status */ +#define ok_inuse(p) is_inuse(p) +/* Check if p has its pinuse bit on */ +#define ok_pinuse(p) pinuse(p) + +#else /* !INSECURE */ +#define ok_address(M, a) (1) +#define ok_next(b, n) (1) +#define ok_inuse(p) (1) +#define ok_pinuse(p) (1) +#endif /* !INSECURE */ + +#if (FOOTERS && !INSECURE) +/* Check if (alleged) mstate m has expected magic field */ +#define ok_magic(M) ((M)->magic == mparams.magic) +#else /* (FOOTERS && !INSECURE) */ +#define ok_magic(M) (1) +#endif /* (FOOTERS && !INSECURE) */ + + +/* In gcc, use __builtin_expect to minimize impact of checks */ +#if !INSECURE +#if defined(__GNUC__) && __GNUC__ >= 3 +#define RTCHECK(e) __builtin_expect(e, 1) +#else /* GNUC */ +#define RTCHECK(e) (e) +#endif /* GNUC */ +#else /* !INSECURE */ +#define RTCHECK(e) (1) +#endif /* !INSECURE */ + +/* macros to set up inuse chunks with or without footers */ + +#if !FOOTERS + +#define mark_inuse_foot(M,p,s) + +/* Macros for setting head/foot of non-mmapped chunks */ + +/* Set cinuse bit and pinuse bit of next chunk */ +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) + +/* Set size, cinuse and pinuse bit of this chunk */ +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) + +#else /* FOOTERS */ + +/* Set foot of inuse chunk to be xor of mstate and seed */ +#define mark_inuse_foot(M,p,s)\ + (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic)) + +#define get_mstate_for(p)\ + ((mstate)(((mchunkptr)((char*)(p) +\ + (chunksize(p))))->prev_foot ^ mparams.magic)) + +#define set_inuse(M,p,s)\ + ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ + (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \ + mark_inuse_foot(M,p,s)) + +#define set_inuse_and_pinuse(M,p,s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\ + mark_inuse_foot(M,p,s)) + +#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ + ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ + mark_inuse_foot(M, p, s)) + +#endif /* !FOOTERS */ + +/* ---------------------------- setting mparams -------------------------- */ + +#ifdef ENABLE_LARGE_PAGES +typedef size_t (WINAPI *GetLargePageMinimum_t)(void); +#endif + +/* Initialize mparams */ +static int init_mparams(void) { +#ifdef NEED_GLOBAL_LOCK_INIT + if (malloc_global_mutex_status <= 0) + init_malloc_global_mutex(); +#endif + + ACQUIRE_MALLOC_GLOBAL_LOCK(); + if (mparams.magic == 0) { + size_t magic; + size_t psize; + size_t gsize; + +#ifndef WIN32 + psize = malloc_getpagesize; + gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize); +#else /* WIN32 */ + { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + psize = system_info.dwPageSize; + gsize = ((DEFAULT_GRANULARITY != 0)? + DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); +#ifdef ENABLE_LARGE_PAGES + { + GetLargePageMinimum_t GetLargePageMinimum_ = (GetLargePageMinimum_t) GetProcAddress(GetModuleHandle(__T("kernel32.dll")), "GetLargePageMinimum"); + if(GetLargePageMinimum_) { + size_t largepagesize = GetLargePageMinimum_(); + if(largepagesize) { + psize = largepagesize; + gsize = ((DEFAULT_GRANULARITY != 0)? + DEFAULT_GRANULARITY : largepagesize); + if(gsize < largepagesize) gsize = largepagesize; + } + } + } +#endif + } +#endif /* WIN32 */ + + /* Sanity-check configuration: + size_t must be unsigned and as wide as pointer type. + ints must be at least 4 bytes. + alignment must be at least 8. + Alignment, min chunk size, and page size must all be powers of 2. + */ + if ((sizeof(size_t) != sizeof(char*)) || + (MAX_SIZE_T < MIN_CHUNK_SIZE) || + (sizeof(int) < 4) || + (MALLOC_ALIGNMENT < (size_t)8U) || + ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || + ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || + ((gsize & (gsize-SIZE_T_ONE)) != 0) || + ((psize & (psize-SIZE_T_ONE)) != 0)) + ABORT; + + mparams.granularity = gsize; + mparams.page_size = psize; + mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; + mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; +#if MORECORE_CONTIGUOUS + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; +#else /* MORECORE_CONTIGUOUS */ + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; +#endif /* MORECORE_CONTIGUOUS */ + +#if !ONLY_MSPACES + /* Set up lock for main malloc area */ + gm->mflags = mparams.default_mflags; + INITIAL_LOCK(&gm->mutex); +#endif + + { +#if USE_DEV_RANDOM + int fd; + unsigned char buf[sizeof(size_t)]; + /* Try to use /dev/urandom, else fall back on using time */ + if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && + read(fd, buf, sizeof(buf)) == sizeof(buf)) { + magic = *((size_t *) buf); + close(fd); + } + else +#endif /* USE_DEV_RANDOM */ +#ifdef WIN32 + magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); +#else + magic = (size_t)(time(0) ^ (size_t)0x55555555U); +#endif + magic |= (size_t)8U; /* ensure nonzero */ + magic &= ~(size_t)7U; /* improve chances of fault for bad values */ + mparams.magic = magic; + } + } + + RELEASE_MALLOC_GLOBAL_LOCK(); + return 1; +} + +/* support for mallopt */ +static int change_mparam(int param_number, int value) { + size_t val; + ensure_initialization(); + val = (value == -1)? MAX_SIZE_T : (size_t)value; + switch(param_number) { + case M_TRIM_THRESHOLD: + mparams.trim_threshold = val; + return 1; + case M_GRANULARITY: + if (val >= mparams.page_size && ((val & (val-1)) == 0)) { + mparams.granularity = val; + return 1; + } + else + return 0; + case M_MMAP_THRESHOLD: + mparams.mmap_threshold = val; + return 1; + default: + return 0; + } +} + +#if DEBUG +/* ------------------------- Debugging Support --------------------------- */ + +/* Check properties of any chunk, whether free, inuse, mmapped etc */ +static void do_check_any_chunk(mstate m, mchunkptr p) { + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); +} + +/* Check properties of top chunk */ +static void do_check_top_chunk(mstate m, mchunkptr p) { + msegmentptr sp = segment_holding(m, (char*)p); + size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */ + assert(sp != 0); + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); + assert(sz == m->topsize); + assert(sz > 0); + assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); + assert(pinuse(p)); + assert(!pinuse(chunk_plus_offset(p, sz))); +} + +/* Check properties of (inuse) mmapped chunks */ +static void do_check_mmapped_chunk(mstate m, mchunkptr p) { + size_t sz = chunksize(p); + size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD); + assert(is_mmapped(p)); + assert(use_mmap(m)); + assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); + assert(ok_address(m, p)); + assert(!is_small(sz)); + assert((len & (mparams.page_size-SIZE_T_ONE)) == 0); + assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD); + assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0); +} + +/* Check properties of inuse chunks */ +static void do_check_inuse_chunk(mstate m, mchunkptr p) { + do_check_any_chunk(m, p); + assert(is_inuse(p)); + assert(next_pinuse(p)); + /* If not pinuse and not mmapped, previous chunk has OK offset */ + assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); + if (is_mmapped(p)) + do_check_mmapped_chunk(m, p); +} + +/* Check properties of free chunks */ +static void do_check_free_chunk(mstate m, mchunkptr p) { + size_t sz = chunksize(p); + mchunkptr next = chunk_plus_offset(p, sz); + do_check_any_chunk(m, p); + assert(!is_inuse(p)); + assert(!next_pinuse(p)); + assert (!is_mmapped(p)); + if (p != m->dv && p != m->top) { + if (sz >= MIN_CHUNK_SIZE) { + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(is_aligned(chunk2mem(p))); + assert(next->prev_foot == sz); + assert(pinuse(p)); + assert (next == m->top || is_inuse(next)); + assert(p->fd->bk == p); + assert(p->bk->fd == p); + } + else /* markers are always of size SIZE_T_SIZE */ + assert(sz == SIZE_T_SIZE); + } +} + +/* Check properties of malloced chunks at the point they are malloced */ +static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + size_t sz = p->head & ~INUSE_BITS; + do_check_inuse_chunk(m, p); + assert((sz & CHUNK_ALIGN_MASK) == 0); + assert(sz >= MIN_CHUNK_SIZE); + assert(sz >= s); + /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */ + assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE)); + } +} + +/* Check a tree and its subtrees. */ +static void do_check_tree(mstate m, tchunkptr t) { + tchunkptr head = 0; + tchunkptr u = t; + bindex_t tindex = t->index; + size_t tsize = chunksize(t); + bindex_t idx; + compute_tree_index(tsize, idx); + assert(tindex == idx); + assert(tsize >= MIN_LARGE_SIZE); + assert(tsize >= minsize_for_tree_index(idx)); + assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1)))); + + do { /* traverse through chain of same-sized nodes */ + do_check_any_chunk(m, ((mchunkptr)u)); + assert(u->index == tindex); + assert(chunksize(u) == tsize); + assert(!is_inuse(u)); + assert(!next_pinuse(u)); + assert(u->fd->bk == u); + assert(u->bk->fd == u); + if (u->parent == 0) { + assert(u->child[0] == 0); + assert(u->child[1] == 0); + } + else { + assert(head == 0); /* only one node on chain has parent */ + head = u; + assert(u->parent != u); + assert (u->parent->child[0] == u || + u->parent->child[1] == u || + *((tbinptr*)(u->parent)) == u); + if (u->child[0] != 0) { + assert(u->child[0]->parent == u); + assert(u->child[0] != u); + do_check_tree(m, u->child[0]); + } + if (u->child[1] != 0) { + assert(u->child[1]->parent == u); + assert(u->child[1] != u); + do_check_tree(m, u->child[1]); + } + if (u->child[0] != 0 && u->child[1] != 0) { + assert(chunksize(u->child[0]) < chunksize(u->child[1])); + } + } + u = u->fd; + } while (u != t); + assert(head != 0); +} + +/* Check all the chunks in a treebin. */ +static void do_check_treebin(mstate m, bindex_t i) { + tbinptr* tb = treebin_at(m, i); + tchunkptr t = *tb; + int empty = (m->treemap & (1U << i)) == 0; + if (t == 0) + assert(empty); + if (!empty) + do_check_tree(m, t); +} + +/* Check all the chunks in a smallbin. */ +static void do_check_smallbin(mstate m, bindex_t i) { + sbinptr b = smallbin_at(m, i); + mchunkptr p = b->bk; + unsigned int empty = (m->smallmap & (1U << i)) == 0; + if (p == b) + assert(empty); + if (!empty) { + for (; p != b; p = p->bk) { + size_t size = chunksize(p); + mchunkptr q; + /* each chunk claims to be free */ + do_check_free_chunk(m, p); + /* chunk belongs in bin */ + assert(small_index(size) == i); + assert(p->bk == b || chunksize(p->bk) == chunksize(p)); + /* chunk is followed by an inuse chunk */ + q = next_chunk(p); + if (q->head != FENCEPOST_HEAD) + do_check_inuse_chunk(m, q); + } + } +} + +/* Find x in a bin. Used in other check functions. */ +static int bin_find(mstate m, mchunkptr x) { + size_t size = chunksize(x); + if (is_small(size)) { + bindex_t sidx = small_index(size); + sbinptr b = smallbin_at(m, sidx); + if (smallmap_is_marked(m, sidx)) { + mchunkptr p = b; + do { + if (p == x) + return 1; + } while ((p = p->fd) != b); + } + } + else { + bindex_t tidx; + compute_tree_index(size, tidx); + if (treemap_is_marked(m, tidx)) { + tchunkptr t = *treebin_at(m, tidx); + size_t sizebits = size << leftshift_for_tree_index(tidx); + while (t != 0 && chunksize(t) != size) { + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + sizebits <<= 1; + } + if (t != 0) { + tchunkptr u = t; + do { + if (u == (tchunkptr)x) + return 1; + } while ((u = u->fd) != t); + } + } + } + return 0; +} + +/* Traverse each chunk and check it; return total */ +static size_t traverse_and_check(mstate m) { + size_t sum = 0; + if (is_initialized(m)) { + msegmentptr s = &m->seg; + sum += m->topsize + TOP_FOOT_SIZE; + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + mchunkptr lastq = 0; + assert(pinuse(q)); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + sum += chunksize(q); + if (is_inuse(q)) { + assert(!bin_find(m, q)); + do_check_inuse_chunk(m, q); + } + else { + assert(q == m->dv || bin_find(m, q)); + assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */ + do_check_free_chunk(m, q); + } + lastq = q; + q = next_chunk(q); + } + s = s->next; + } + } + return sum; +} + +/* Check all properties of malloc_state. */ +static void do_check_malloc_state(mstate m) { + bindex_t i; + size_t total; + /* check bins */ + for (i = 0; i < NSMALLBINS; ++i) + do_check_smallbin(m, i); + for (i = 0; i < NTREEBINS; ++i) + do_check_treebin(m, i); + + if (m->dvsize != 0) { /* check dv chunk */ + do_check_any_chunk(m, m->dv); + assert(m->dvsize == chunksize(m->dv)); + assert(m->dvsize >= MIN_CHUNK_SIZE); + assert(bin_find(m, m->dv) == 0); + } + + if (m->top != 0) { /* check top chunk */ + do_check_top_chunk(m, m->top); + /*assert(m->topsize == chunksize(m->top)); redundant */ + assert(m->topsize > 0); + assert(bin_find(m, m->top) == 0); + } + + total = traverse_and_check(m); + assert(total <= m->footprint); + assert(m->footprint <= m->max_footprint); +} +#endif /* DEBUG */ + +/* ----------------------------- statistics ------------------------------ */ + +#if !NO_MALLINFO +static struct mallinfo internal_mallinfo(mstate m) { + struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + ensure_initialization(); + if (!PREACTION(m)) { + check_malloc_state(m); + if (is_initialized(m)) { + size_t nfree = SIZE_T_ONE; /* top always free */ + size_t mfree = m->topsize + TOP_FOOT_SIZE; + size_t sum = mfree; + msegmentptr s = &m->seg; + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + size_t sz = chunksize(q); + sum += sz; + if (!is_inuse(q)) { + mfree += sz; + ++nfree; + } + q = next_chunk(q); + } + s = s->next; + } + + nm.arena = sum; + nm.ordblks = nfree; + nm.hblkhd = m->footprint - sum; + nm.usmblks = m->max_footprint; + nm.uordblks = m->footprint - mfree; + nm.fordblks = mfree; + nm.keepcost = m->topsize; + } + + POSTACTION(m); + } + return nm; +} +#endif /* !NO_MALLINFO */ + +static void internal_malloc_stats(mstate m) { + ensure_initialization(); + if (!PREACTION(m)) { + size_t maxfp = 0; + size_t fp = 0; + size_t used = 0; + check_malloc_state(m); + if (is_initialized(m)) { + msegmentptr s = &m->seg; + maxfp = m->max_footprint; + fp = m->footprint; + used = fp - (m->topsize + TOP_FOOT_SIZE); + + while (s != 0) { + mchunkptr q = align_as_chunk(s->base); + while (segment_holds(s, q) && + q != m->top && q->head != FENCEPOST_HEAD) { + if (!is_inuse(q)) + used -= chunksize(q); + q = next_chunk(q); + } + s = s->next; + } + } + + fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp)); + fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp)); + fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used)); + + POSTACTION(m); + } +} + +/* ----------------------- Operations on smallbins ----------------------- */ + +/* + Various forms of linking and unlinking are defined as macros. Even + the ones for trees, which are very long but have very short typical + paths. This is ugly but reduces reliance on inlining support of + compilers. +*/ + +/* Link a free chunk into a smallbin */ +#define insert_small_chunk(M, P, S) {\ + bindex_t I = small_index(S);\ + mchunkptr B = smallbin_at(M, I);\ + mchunkptr F = B;\ + assert(S >= MIN_CHUNK_SIZE);\ + if (!smallmap_is_marked(M, I))\ + mark_smallmap(M, I);\ + else if (RTCHECK(ok_address(M, B->fd)))\ + F = B->fd;\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + B->fd = P;\ + F->bk = P;\ + P->fd = F;\ + P->bk = B;\ +} + +/* Unlink a chunk from a smallbin */ +#define unlink_small_chunk(M, P, S) {\ + mchunkptr F = P->fd;\ + mchunkptr B = P->bk;\ + bindex_t I = small_index(S);\ + assert(P != B);\ + assert(P != F);\ + assert(chunksize(P) == small_index2size(I));\ + if (F == B)\ + clear_smallmap(M, I);\ + else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\ + (B == smallbin_at(M,I) || ok_address(M, B)))) {\ + F->bk = B;\ + B->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ +} + +/* Unlink the first chunk from a smallbin */ +#define unlink_first_small_chunk(M, B, P, I) {\ + mchunkptr F = P->fd;\ + assert(P != B);\ + assert(P != F);\ + assert(chunksize(P) == small_index2size(I));\ + if (B == F)\ + clear_smallmap(M, I);\ + else if (RTCHECK(ok_address(M, F))) {\ + B->fd = F;\ + F->bk = B;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ +} + + + +/* Replace dv node, binning the old one */ +/* Used only when dvsize known to be small */ +#define replace_dv(M, P, S) {\ + size_t DVS = M->dvsize;\ + if (DVS != 0) {\ + mchunkptr DV = M->dv;\ + assert(is_small(DVS));\ + insert_small_chunk(M, DV, DVS);\ + }\ + M->dvsize = S;\ + M->dv = P;\ +} + +/* ------------------------- Operations on trees ------------------------- */ + +/* Insert chunk into tree */ +#define insert_large_chunk(M, X, S) {\ + tbinptr* H;\ + bindex_t I;\ + compute_tree_index(S, I);\ + H = treebin_at(M, I);\ + X->index = I;\ + X->child[0] = X->child[1] = 0;\ + if (!treemap_is_marked(M, I)) {\ + mark_treemap(M, I);\ + *H = X;\ + X->parent = (tchunkptr)H;\ + X->fd = X->bk = X;\ + }\ + else {\ + tchunkptr T = *H;\ + size_t K = S << leftshift_for_tree_index(I);\ + for (;;) {\ + if (chunksize(T) != S) {\ + tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ + K <<= 1;\ + if (*C != 0)\ + T = *C;\ + else if (RTCHECK(ok_address(M, C))) {\ + *C = X;\ + X->parent = T;\ + X->fd = X->bk = X;\ + break;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + break;\ + }\ + }\ + else {\ + tchunkptr F = T->fd;\ + if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\ + T->fd = F->bk = X;\ + X->fd = F;\ + X->bk = T;\ + X->parent = 0;\ + break;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + break;\ + }\ + }\ + }\ + }\ +} + +/* + Unlink steps: + + 1. If x is a chained node, unlink it from its same-sized fd/bk links + and choose its bk node as its replacement. + 2. If x was the last node of its size, but not a leaf node, it must + be replaced with a leaf node (not merely one with an open left or + right), to make sure that lefts and rights of descendents + correspond properly to bit masks. We use the rightmost descendent + of x. We could use any other leaf, but this is easy to locate and + tends to counteract removal of leftmosts elsewhere, and so keeps + paths shorter than minimally guaranteed. This doesn't loop much + because on average a node in a tree is near the bottom. + 3. If x is the base of a chain (i.e., has parent links) relink + x's parent and children to x's replacement (or null if none). +*/ + +#define unlink_large_chunk(M, X) {\ + tchunkptr XP = X->parent;\ + tchunkptr R;\ + if (X->bk != X) {\ + tchunkptr F = X->fd;\ + R = X->bk;\ + if (RTCHECK(ok_address(M, F))) {\ + F->bk = R;\ + R->fd = F;\ + }\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else {\ + tchunkptr* RP;\ + if (((R = *(RP = &(X->child[1]))) != 0) ||\ + ((R = *(RP = &(X->child[0]))) != 0)) {\ + tchunkptr* CP;\ + while ((*(CP = &(R->child[1])) != 0) ||\ + (*(CP = &(R->child[0])) != 0)) {\ + R = *(RP = CP);\ + }\ + if (RTCHECK(ok_address(M, RP)))\ + *RP = 0;\ + else {\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + }\ + if (XP != 0) {\ + tbinptr* H = treebin_at(M, X->index);\ + if (X == *H) {\ + if ((*H = R) == 0) \ + clear_treemap(M, X->index);\ + }\ + else if (RTCHECK(ok_address(M, XP))) {\ + if (XP->child[0] == X) \ + XP->child[0] = R;\ + else \ + XP->child[1] = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + if (R != 0) {\ + if (RTCHECK(ok_address(M, R))) {\ + tchunkptr C0, C1;\ + R->parent = XP;\ + if ((C0 = X->child[0]) != 0) {\ + if (RTCHECK(ok_address(M, C0))) {\ + R->child[0] = C0;\ + C0->parent = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + if ((C1 = X->child[1]) != 0) {\ + if (RTCHECK(ok_address(M, C1))) {\ + R->child[1] = C1;\ + C1->parent = R;\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ + else\ + CORRUPTION_ERROR_ACTION(M);\ + }\ + }\ +} + +/* Relays to large vs small bin operations */ + +#define insert_chunk(M, P, S)\ + if (is_small(S)) insert_small_chunk(M, P, S)\ + else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } + +#define unlink_chunk(M, P, S)\ + if (is_small(S)) unlink_small_chunk(M, P, S)\ + else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } + + +/* Relays to internal calls to malloc/free from realloc, memalign etc */ + +#if ONLY_MSPACES +#define internal_malloc(m, b) mspace_malloc(m, b) +#define internal_free(m, mem) mspace_free(m,mem); +#else /* ONLY_MSPACES */ +#if MSPACES +#define internal_malloc(m, b)\ + (m == gm)? dlmalloc(b) : mspace_malloc(m, b) +#define internal_free(m, mem)\ + if (m == gm) dlfree(mem); else mspace_free(m,mem); +#else /* MSPACES */ +#define internal_malloc(m, b) dlmalloc(b) +#define internal_free(m, mem) dlfree(mem) +#endif /* MSPACES */ +#endif /* ONLY_MSPACES */ + +/* ----------------------- Direct-mmapping chunks ----------------------- */ + +/* + Directly mmapped chunks are set up with an offset to the start of + the mmapped region stored in the prev_foot field of the chunk. This + allows reconstruction of the required argument to MUNMAP when freed, + and also allows adjustment of the returned chunk to meet alignment + requirements (especially in memalign). +*/ + +/* Malloc using mmap */ +static void* mmap_alloc(mstate m, size_t nb) { + size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + if (mmsize > nb) { /* Check for wrap around 0 */ + char* mm = (char*)(CALL_DIRECT_MMAP(mmsize)); + if (mm != CMFAIL) { + size_t offset = align_offset(chunk2mem(mm)); + size_t psize = mmsize - offset - MMAP_FOOT_PAD; + mchunkptr p = (mchunkptr)(mm + offset); + p->prev_foot = offset; + p->head = psize; + mark_inuse_foot(m, p, psize); + chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; + + if (m->least_addr == 0 || mm < m->least_addr) + m->least_addr = mm; + if ((m->footprint += mmsize) > m->max_footprint) + m->max_footprint = m->footprint; + assert(is_aligned(chunk2mem(p))); + check_mmapped_chunk(m, p); + return chunk2mem(p); + } + } + return 0; +} + +/* Realloc using mmap */ +static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) { + size_t oldsize = chunksize(oldp); + if (is_small(nb)) /* Can't shrink mmap regions below small size */ + return 0; + /* Keep old chunk if big enough but not too big */ + if (oldsize >= nb + SIZE_T_SIZE && + (oldsize - nb) <= (mparams.granularity << 1)) + return oldp; + else { + size_t offset = oldp->prev_foot; + size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; + size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + char* cp = (char*)CALL_MREMAP((char*)oldp - offset, + oldmmsize, newmmsize, 1); + if (cp != CMFAIL) { + mchunkptr newp = (mchunkptr)(cp + offset); + size_t psize = newmmsize - offset - MMAP_FOOT_PAD; + newp->head = psize; + mark_inuse_foot(m, newp, psize); + chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; + chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; + + if (cp < m->least_addr) + m->least_addr = cp; + if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint) + m->max_footprint = m->footprint; + check_mmapped_chunk(m, newp); + return newp; + } + } + return 0; +} + +/* -------------------------- mspace management -------------------------- */ + +/* Initialize top chunk and its size */ +static void init_top(mstate m, mchunkptr p, size_t psize) { + /* Ensure alignment */ + size_t offset = align_offset(chunk2mem(p)); + p = (mchunkptr)((char*)p + offset); + psize -= offset; + + m->top = p; + m->topsize = psize; + p->head = psize | PINUSE_BIT; + /* set size of fake trailing chunk holding overhead space only once */ + chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; + m->trim_check = mparams.trim_threshold; /* reset on each update */ +} + +/* Initialize bins for a new mstate that is otherwise zeroed out */ +static void init_bins(mstate m) { + /* Establish circular links for smallbins */ + bindex_t i; + for (i = 0; i < NSMALLBINS; ++i) { + sbinptr bin = smallbin_at(m,i); + bin->fd = bin->bk = bin; + } +} + +#if PROCEED_ON_ERROR + +/* default corruption action */ +static void reset_on_error(mstate m) { + int i; + ++malloc_corruption_error_count; + /* Reinitialize fields to forget about all memory */ + m->smallbins = m->treebins = 0; + m->dvsize = m->topsize = 0; + m->seg.base = 0; + m->seg.size = 0; + m->seg.next = 0; + m->top = m->dv = 0; + for (i = 0; i < NTREEBINS; ++i) + *treebin_at(m, i) = 0; + init_bins(m); +} +#endif /* PROCEED_ON_ERROR */ + +/* Allocate chunk and prepend remainder with chunk in successor base. */ +static void* prepend_alloc(mstate m, char* newbase, char* oldbase, + size_t nb) { + mchunkptr p = align_as_chunk(newbase); + mchunkptr oldfirst = align_as_chunk(oldbase); + size_t psize = (char*)oldfirst - (char*)p; + mchunkptr q = chunk_plus_offset(p, nb); + size_t qsize = psize - nb; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + + assert((char*)oldfirst > (char*)q); + assert(pinuse(oldfirst)); + assert(qsize >= MIN_CHUNK_SIZE); + + /* consolidate remainder with first chunk of old base */ + if (oldfirst == m->top) { + size_t tsize = m->topsize += qsize; + m->top = q; + q->head = tsize | PINUSE_BIT; + check_top_chunk(m, q); + } + else if (oldfirst == m->dv) { + size_t dsize = m->dvsize += qsize; + m->dv = q; + set_size_and_pinuse_of_free_chunk(q, dsize); + } + else { + if (!is_inuse(oldfirst)) { + size_t nsize = chunksize(oldfirst); + unlink_chunk(m, oldfirst, nsize); + oldfirst = chunk_plus_offset(oldfirst, nsize); + qsize += nsize; + } + set_free_with_pinuse(q, qsize, oldfirst); + insert_chunk(m, q, qsize); + check_free_chunk(m, q); + } + + check_malloced_chunk(m, chunk2mem(p), nb); + return chunk2mem(p); +} + +/* Add a segment to hold a new noncontiguous region */ +static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { + /* Determine locations and sizes of segment, fenceposts, old top */ + char* old_top = (char*)m->top; + msegmentptr oldsp = segment_holding(m, old_top); + char* old_end = oldsp->base + oldsp->size; + size_t ssize = pad_request(sizeof(struct malloc_segment)); + char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + size_t offset = align_offset(chunk2mem(rawsp)); + char* asp = rawsp + offset; + char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; + mchunkptr sp = (mchunkptr)csp; + msegmentptr ss = (msegmentptr)(chunk2mem(sp)); + mchunkptr tnext = chunk_plus_offset(sp, ssize); + mchunkptr p = tnext; + int nfences = 0; + + /* reset top to new space */ + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + + /* Set up segment record */ + assert(is_aligned(ss)); + set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); + *ss = m->seg; /* Push current record */ + m->seg.base = tbase; + m->seg.size = tsize; + m->seg.sflags = mmapped; + m->seg.next = ss; + + /* Insert trailing fenceposts */ + for (;;) { + mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); + p->head = FENCEPOST_HEAD; + ++nfences; + if ((char*)(&(nextp->head)) < old_end) + p = nextp; + else + break; + } + assert(nfences >= 2); + + /* Insert the rest of old top into a bin as an ordinary free chunk */ + if (csp != old_top) { + mchunkptr q = (mchunkptr)old_top; + size_t psize = csp - old_top; + mchunkptr tn = chunk_plus_offset(q, psize); + set_free_with_pinuse(q, psize, tn); + insert_chunk(m, q, psize); + } + + check_top_chunk(m, m->top); +} + +/* -------------------------- System allocation -------------------------- */ + +/* Get memory from system using MORECORE or MMAP */ +static void* sys_alloc(mstate m, size_t nb) { + char* tbase = CMFAIL; + size_t tsize = 0; + flag_t mmap_flag = 0; + + ensure_initialization(); + + /* Directly map large chunks, but only if already initialized */ + if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) { + void* mem = mmap_alloc(m, nb); + if (mem != 0) + return mem; + } + + /* + Try getting memory in any of three ways (in most-preferred to + least-preferred order): + 1. A call to MORECORE that can normally contiguously extend memory. + (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or + or main space is mmapped or a previous contiguous call failed) + 2. A call to MMAP new space (disabled if not HAVE_MMAP). + Note that under the default settings, if MORECORE is unable to + fulfill a request, and HAVE_MMAP is true, then mmap is + used as a noncontiguous system allocator. This is a useful backup + strategy for systems with holes in address spaces -- in this case + sbrk cannot contiguously expand the heap, but mmap may be able to + find space. + 3. A call to MORECORE that cannot usually contiguously extend memory. + (disabled if not HAVE_MORECORE) + + In all cases, we need to request enough bytes from system to ensure + we can malloc nb bytes upon success, so pad with enough space for + top_foot, plus alignment-pad to make sure we don't lose bytes if + not on boundary, and round this up to a granularity unit. + */ + + if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { + char* br = CMFAIL; + msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top); + size_t asize = 0; + ACQUIRE_MALLOC_GLOBAL_LOCK(); + + if (ss == 0) { /* First time through or recovery */ + char* base = (char*)CALL_MORECORE(0); + if (base != CMFAIL) { + asize = granularity_align(nb + SYS_ALLOC_PADDING); + /* Adjust to end on a page boundary */ + if (!is_page_aligned(base)) + asize += (page_align((size_t)base) - (size_t)base); + /* Can't call MORECORE if size is negative when treated as signed */ + if (asize < HALF_MAX_SIZE_T && + (br = (char*)(CALL_MORECORE(asize))) == base) { + tbase = base; + tsize = asize; + } + } + } + else { + /* Subtract out existing available top space from MORECORE request. */ + asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING); + /* Use mem here only if it did continuously extend old space */ + if (asize < HALF_MAX_SIZE_T && + (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) { + tbase = br; + tsize = asize; + } + } + + if (tbase == CMFAIL) { /* Cope with partial failure */ + if (br != CMFAIL) { /* Try to use/extend the space we did get */ + if (asize < HALF_MAX_SIZE_T && + asize < nb + SYS_ALLOC_PADDING) { + size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize); + if (esize < HALF_MAX_SIZE_T) { + char* end = (char*)CALL_MORECORE(esize); + if (end != CMFAIL) + asize += esize; + else { /* Can't use; try to release */ + (void) CALL_MORECORE(-asize); + br = CMFAIL; + } + } + } + } + if (br != CMFAIL) { /* Use the space we did get */ + tbase = br; + tsize = asize; + } + else + disable_contiguous(m); /* Don't try contiguous path in the future */ + } + + RELEASE_MALLOC_GLOBAL_LOCK(); + } + + if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */ + size_t rsize = granularity_align(nb + SYS_ALLOC_PADDING); + if (rsize > nb) { /* Fail if wraps around zero */ + char* mp = (char*)(CALL_MMAP(rsize)); + if (mp != CMFAIL) { + tbase = mp; + tsize = rsize; + mmap_flag = USE_MMAP_BIT; + } + } + } + + if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ + size_t asize = granularity_align(nb + SYS_ALLOC_PADDING); + if (asize < HALF_MAX_SIZE_T) { + char* br = CMFAIL; + char* end = CMFAIL; + ACQUIRE_MALLOC_GLOBAL_LOCK(); + br = (char*)(CALL_MORECORE(asize)); + end = (char*)(CALL_MORECORE(0)); + RELEASE_MALLOC_GLOBAL_LOCK(); + if (br != CMFAIL && end != CMFAIL && br < end) { + size_t ssize = end - br; + if (ssize > nb + TOP_FOOT_SIZE) { + tbase = br; + tsize = ssize; + } + } + } + } + + if (tbase != CMFAIL) { + + if ((m->footprint += tsize) > m->max_footprint) + m->max_footprint = m->footprint; + + if (!is_initialized(m)) { /* first-time initialization */ + if (m->least_addr == 0 || tbase < m->least_addr) + m->least_addr = tbase; + m->seg.base = tbase; + m->seg.size = tsize; + m->seg.sflags = mmap_flag; + m->magic = mparams.magic; + m->release_checks = MAX_RELEASE_CHECK_RATE; + init_bins(m); +#if !ONLY_MSPACES + if (is_global(m)) + init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); + else +#endif + { + /* Offset top by embedded malloc_state */ + mchunkptr mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); + } + } + + else { + /* Try to merge with an existing segment */ + msegmentptr sp = &m->seg; + /* Only consider most recent segment if traversal suppressed */ + while (sp != 0 && tbase != sp->base + sp->size) + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; + if (sp != 0 && + !is_extern_segment(sp) && + (sp->sflags & USE_MMAP_BIT) == mmap_flag && + segment_holds(sp, m->top)) { /* append */ + sp->size += tsize; + init_top(m, m->top, m->topsize + tsize); + } + else { + if (tbase < m->least_addr) + m->least_addr = tbase; + sp = &m->seg; + while (sp != 0 && sp->base != tbase + tsize) + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; + if (sp != 0 && + !is_extern_segment(sp) && + (sp->sflags & USE_MMAP_BIT) == mmap_flag) { + char* oldbase = sp->base; + sp->base = tbase; + sp->size += tsize; + return prepend_alloc(m, tbase, oldbase, nb); + } + else + add_segment(m, tbase, tsize, mmap_flag); + } + } + + if (nb < m->topsize) { /* Allocate from new or extended top space */ + size_t rsize = m->topsize -= nb; + mchunkptr p = m->top; + mchunkptr r = m->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(m, p, nb); + check_top_chunk(m, m->top); + check_malloced_chunk(m, chunk2mem(p), nb); + return chunk2mem(p); + } + } + + MALLOC_FAILURE_ACTION; + return 0; +} + +/* ----------------------- system deallocation -------------------------- */ + +/* Unmap and unlink any mmapped segments that don't contain used chunks */ +static size_t release_unused_segments(mstate m) { + size_t released = 0; + int nsegs = 0; + msegmentptr pred = &m->seg; + msegmentptr sp = pred->next; + while (sp != 0) { + char* base = sp->base; + size_t size = sp->size; + msegmentptr next = sp->next; + ++nsegs; + if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { + mchunkptr p = align_as_chunk(base); + size_t psize = chunksize(p); + /* Can unmap if first chunk holds entire segment and not pinned */ + if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { + tchunkptr tp = (tchunkptr)p; + assert(segment_holds(sp, (char*)sp)); + if (p == m->dv) { + m->dv = 0; + m->dvsize = 0; + } + else { + unlink_large_chunk(m, tp); + } + if (CALL_MUNMAP(base, size) == 0) { + released += size; + m->footprint -= size; + /* unlink obsoleted record */ + sp = pred; + sp->next = next; + } + else { /* back out if cannot unmap */ + insert_large_chunk(m, tp, psize); + } + } + } + if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */ + break; + pred = sp; + sp = next; + } + /* Reset check counter */ + m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)? + nsegs : MAX_RELEASE_CHECK_RATE); + return released; +} + +static int sys_trim(mstate m, size_t pad) { + size_t released = 0; + ensure_initialization(); + if (pad < MAX_REQUEST && is_initialized(m)) { + pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ + + if (m->topsize > pad) { + /* Shrink top space in granularity-size units, keeping at least one */ + size_t unit = mparams.granularity; + size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - + SIZE_T_ONE) * unit; + msegmentptr sp = segment_holding(m, (char*)m->top); + + if (!is_extern_segment(sp)) { + if (is_mmapped_segment(sp)) { + if (HAVE_MMAP && + sp->size >= extra && + !has_segment_link(m, sp)) { /* can't shrink if pinned */ + size_t newsize = sp->size - extra; + /* Prefer mremap, fall back to munmap */ + if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) || + (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { + released = extra; + } + } + } + else if (HAVE_MORECORE) { + if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ + extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; + ACQUIRE_MALLOC_GLOBAL_LOCK(); + { + /* Make sure end of memory is where we last set it. */ + char* old_br = (char*)(CALL_MORECORE(0)); + if (old_br == sp->base + sp->size) { + char* rel_br = (char*)(CALL_MORECORE(-extra)); + char* new_br = (char*)(CALL_MORECORE(0)); + if (rel_br != CMFAIL && new_br < old_br) + released = old_br - new_br; + } + } + RELEASE_MALLOC_GLOBAL_LOCK(); + } + } + + if (released != 0) { + sp->size -= released; + m->footprint -= released; + init_top(m, m->top, m->topsize - released); + check_top_chunk(m, m->top); + } + } + + /* Unmap any unused mmapped segments */ + if (HAVE_MMAP) + released += release_unused_segments(m); + + /* On failure, disable autotrim to avoid repeated failed future calls */ + if (released == 0 && m->topsize > m->trim_check) + m->trim_check = MAX_SIZE_T; + } + + return (released != 0)? 1 : 0; +} + + +/* ---------------------------- malloc support --------------------------- */ + +/* allocate a large request from the best fitting chunk in a treebin */ +static void* tmalloc_large(mstate m, size_t nb) { + tchunkptr v = 0; + size_t rsize = -nb; /* Unsigned negation */ + tchunkptr t; + bindex_t idx; + compute_tree_index(nb, idx); + if ((t = *treebin_at(m, idx)) != 0) { + /* Traverse tree for this bin looking for node with size == nb */ + size_t sizebits = nb << leftshift_for_tree_index(idx); + tchunkptr rst = 0; /* The deepest untaken right subtree */ + for (;;) { + tchunkptr rt; + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + v = t; + if ((rsize = trem) == 0) + break; + } + rt = t->child[1]; + t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; + if (rt != 0 && rt != t) + rst = rt; + if (t == 0) { + t = rst; /* set t to least subtree holding sizes > nb */ + break; + } + sizebits <<= 1; + } + } + if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ + binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; + if (leftbits != 0) { + bindex_t i; + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + t = *treebin_at(m, i); + } + } + + while (t != 0) { /* find smallest of tree or subtree */ + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + t = leftmost_child(t); + } + + /* If dv is a better fit, return 0 so malloc will use it */ + if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { + if (RTCHECK(ok_address(m, v))) { /* split */ + mchunkptr r = chunk_plus_offset(v, nb); + assert(chunksize(v) == rsize + nb); + if (RTCHECK(ok_next(v, r))) { + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(m, v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + insert_chunk(m, r, rsize); + } + return chunk2mem(v); + } + } + CORRUPTION_ERROR_ACTION(m); + } + return 0; +} + +/* allocate a small request from the best fitting chunk in a treebin */ +static void* tmalloc_small(mstate m, size_t nb) { + tchunkptr t, v; + size_t rsize; + bindex_t i; + binmap_t leastbit = least_bit(m->treemap); + compute_bit2idx(leastbit, i); + v = t = *treebin_at(m, i); + rsize = chunksize(t) - nb; + + while ((t = leftmost_child(t)) != 0) { + size_t trem = chunksize(t) - nb; + if (trem < rsize) { + rsize = trem; + v = t; + } + } + + if (RTCHECK(ok_address(m, v))) { + mchunkptr r = chunk_plus_offset(v, nb); + assert(chunksize(v) == rsize + nb); + if (RTCHECK(ok_next(v, r))) { + unlink_large_chunk(m, v); + if (rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(m, v, (rsize + nb)); + else { + set_size_and_pinuse_of_inuse_chunk(m, v, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(m, r, rsize); + } + return chunk2mem(v); + } + } + + CORRUPTION_ERROR_ACTION(m); + return 0; +} + +/* --------------------------- realloc support --------------------------- */ + +static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { + if (bytes >= MAX_REQUEST) { + MALLOC_FAILURE_ACTION; + return 0; + } + if (!PREACTION(m)) { + mchunkptr oldp = mem2chunk(oldmem); + size_t oldsize = chunksize(oldp); + mchunkptr next = chunk_plus_offset(oldp, oldsize); + mchunkptr newp = 0; + void* extra = 0; + + /* Try to either shrink or extend into top. Else malloc-copy-free */ + + if (RTCHECK(ok_address(m, oldp) && ok_inuse(oldp) && + ok_next(oldp, next) && ok_pinuse(next))) { + size_t nb = request2size(bytes); + if (is_mmapped(oldp)) + newp = mmap_resize(m, oldp, nb); + else if (oldsize >= nb) { /* already big enough */ + size_t rsize = oldsize - nb; + newp = oldp; + if (rsize >= MIN_CHUNK_SIZE) { + mchunkptr remainder = chunk_plus_offset(newp, nb); + set_inuse(m, newp, nb); + set_inuse_and_pinuse(m, remainder, rsize); + extra = chunk2mem(remainder); + } + } + else if (next == m->top && oldsize + m->topsize > nb) { + /* Expand into top */ + size_t newsize = oldsize + m->topsize; + size_t newtopsize = newsize - nb; + mchunkptr newtop = chunk_plus_offset(oldp, nb); + set_inuse(m, oldp, nb); + newtop->head = newtopsize |PINUSE_BIT; + m->top = newtop; + m->topsize = newtopsize; + newp = oldp; + } + } + else { + USAGE_ERROR_ACTION(m, oldmem); + POSTACTION(m); + return 0; + } +#if DEBUG + if (newp != 0) { + check_inuse_chunk(m, newp); /* Check requires lock */ + } +#endif + + POSTACTION(m); + + if (newp != 0) { + if (extra != 0) { + internal_free(m, extra); + } + return chunk2mem(newp); + } + else { + void* newmem = internal_malloc(m, bytes); + if (newmem != 0) { + size_t oc = oldsize - overhead_for(oldp); + memcpy(newmem, oldmem, (oc < bytes)? oc : bytes); + internal_free(m, oldmem); + } + return newmem; + } + } + return 0; +} + +/* --------------------------- memalign support -------------------------- */ + +static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { + if (alignment <= MALLOC_ALIGNMENT) /* Can just use malloc */ + return internal_malloc(m, bytes); + if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ + alignment = MIN_CHUNK_SIZE; + if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ + size_t a = MALLOC_ALIGNMENT << 1; + while (a < alignment) a <<= 1; + alignment = a; + } + + if (bytes >= MAX_REQUEST - alignment) { + if (m != 0) { /* Test isn't needed but avoids compiler warning */ + MALLOC_FAILURE_ACTION; + } + } + else { + size_t nb = request2size(bytes); + size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; + char* mem = (char*)internal_malloc(m, req); + if (mem != 0) { + void* leader = 0; + void* trailer = 0; + mchunkptr p = mem2chunk(mem); + + if (PREACTION(m)) return 0; + if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */ + /* + Find an aligned spot inside chunk. Since we need to give + back leading space in a chunk of at least MIN_CHUNK_SIZE, if + the first calculation places us at a spot with less than + MIN_CHUNK_SIZE leader, we can move to the next aligned spot. + We've allocated enough total room so that this is always + possible. + */ + char* br = (char*)mem2chunk((size_t)(((size_t)(mem + + alignment - + SIZE_T_ONE)) & + -alignment)); + char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? + br : br+alignment; + mchunkptr newp = (mchunkptr)pos; + size_t leadsize = pos - (char*)(p); + size_t newsize = chunksize(p) - leadsize; + + if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ + newp->prev_foot = p->prev_foot + leadsize; + newp->head = newsize; + } + else { /* Otherwise, give back leader, use the rest */ + set_inuse(m, newp, newsize); + set_inuse(m, p, leadsize); + leader = chunk2mem(p); + } + p = newp; + } + + /* Give back spare room at the end */ + if (!is_mmapped(p)) { + size_t size = chunksize(p); + if (size > nb + MIN_CHUNK_SIZE) { + size_t remainder_size = size - nb; + mchunkptr remainder = chunk_plus_offset(p, nb); + set_inuse(m, p, nb); + set_inuse(m, remainder, remainder_size); + trailer = chunk2mem(remainder); + } + } + + assert (chunksize(p) >= nb); + assert((((size_t)(chunk2mem(p))) % alignment) == 0); + check_inuse_chunk(m, p); + POSTACTION(m); + if (leader != 0) { + internal_free(m, leader); + } + if (trailer != 0) { + internal_free(m, trailer); + } + return chunk2mem(p); + } + } + return 0; +} + +/* ------------------------ comalloc/coalloc support --------------------- */ + +static void** ialloc(mstate m, + size_t n_elements, + size_t* sizes, + int opts, + void* chunks[]) { + /* + This provides common support for independent_X routines, handling + all of the combinations that can result. + + The opts arg has: + bit 0 set if all elements are same size (using sizes[0]) + bit 1 set if elements should be zeroed + */ + + size_t element_size; /* chunksize of each element, if all same */ + size_t contents_size; /* total size of elements */ + size_t array_size; /* request size of pointer array */ + void* mem; /* malloced aggregate space */ + mchunkptr p; /* corresponding chunk */ + size_t remainder_size; /* remaining bytes while splitting */ + void** marray; /* either "chunks" or malloced ptr array */ + mchunkptr array_chunk; /* chunk for malloced ptr array */ + flag_t was_enabled; /* to disable mmap */ + size_t size; + size_t i; + + ensure_initialization(); + /* compute array length, if needed */ + if (chunks != 0) { + if (n_elements == 0) + return chunks; /* nothing to do */ + marray = chunks; + array_size = 0; + } + else { + /* if empty req, must still return chunk representing empty array */ + if (n_elements == 0) + return (void**)internal_malloc(m, 0); + marray = 0; + array_size = request2size(n_elements * (sizeof(void*))); + } + + /* compute total element size */ + if (opts & 0x1) { /* all-same-size */ + element_size = request2size(*sizes); + contents_size = n_elements * element_size; + } + else { /* add up all the sizes */ + element_size = 0; + contents_size = 0; + for (i = 0; i != n_elements; ++i) + contents_size += request2size(sizes[i]); + } + + size = contents_size + array_size; + + /* + Allocate the aggregate chunk. First disable direct-mmapping so + malloc won't use it, since we would not be able to later + free/realloc space internal to a segregated mmap region. + */ + was_enabled = use_mmap(m); + disable_mmap(m); + mem = internal_malloc(m, size - CHUNK_OVERHEAD); + if (was_enabled) + enable_mmap(m); + if (mem == 0) + return 0; + + if (PREACTION(m)) return 0; + p = mem2chunk(mem); + remainder_size = chunksize(p); + + assert(!is_mmapped(p)); + + if (opts & 0x2) { /* optionally clear the elements */ + memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size); + } + + /* If not provided, allocate the pointer array as final part of chunk */ + if (marray == 0) { + size_t array_chunk_size; + array_chunk = chunk_plus_offset(p, contents_size); + array_chunk_size = remainder_size - contents_size; + marray = (void**) (chunk2mem(array_chunk)); + set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size); + remainder_size = contents_size; + } + + /* split out elements */ + for (i = 0; ; ++i) { + marray[i] = chunk2mem(p); + if (i != n_elements-1) { + if (element_size != 0) + size = element_size; + else + size = request2size(sizes[i]); + remainder_size -= size; + set_size_and_pinuse_of_inuse_chunk(m, p, size); + p = chunk_plus_offset(p, size); + } + else { /* the final element absorbs any overallocation slop */ + set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size); + break; + } + } + +#if DEBUG + if (marray != chunks) { + /* final element must have exactly exhausted chunk */ + if (element_size != 0) { + assert(remainder_size == element_size); + } + else { + assert(remainder_size == request2size(sizes[i])); + } + check_inuse_chunk(m, mem2chunk(marray)); + } + for (i = 0; i != n_elements; ++i) + check_inuse_chunk(m, mem2chunk(marray[i])); + +#endif /* DEBUG */ + + POSTACTION(m); + return marray; +} + + +/* -------------------------- public routines ---------------------------- */ + +#if !ONLY_MSPACES + +void* dlmalloc(size_t bytes) { + /* + Basic algorithm: + If a small request (< 256 bytes minus per-chunk overhead): + 1. If one exists, use a remainderless chunk in associated smallbin. + (Remainderless means that there are too few excess bytes to + represent as a chunk.) + 2. If it is big enough, use the dv chunk, which is normally the + chunk adjacent to the one used for the most recent small request. + 3. If one exists, split the smallest available chunk in a bin, + saving remainder in dv. + 4. If it is big enough, use the top chunk. + 5. If available, get memory from system and use it + Otherwise, for a large request: + 1. Find the smallest available binned chunk that fits, and use it + if it is better fitting than dv chunk, splitting if necessary. + 2. If better fitting than any binned chunk, use the dv chunk. + 3. If it is big enough, use the top chunk. + 4. If request size >= mmap threshold, try to directly mmap this chunk. + 5. If available, get memory from system and use it + + The ugly goto's here ensure that postaction occurs along all paths. + */ + +#if USE_LOCKS + ensure_initialization(); /* initialize in sys_alloc if not using locks */ +#endif + + if (!PREACTION(gm)) { + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = gm->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(gm, idx); + p = b->fd; + assert(chunksize(p) == small_index2size(idx)); + unlink_first_small_chunk(gm, b, p, idx); + set_inuse_and_pinuse(gm, p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (nb > gm->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + bindex_t i; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + b = smallbin_at(gm, i); + p = b->fd; + assert(chunksize(p) == small_index2size(i)); + unlink_first_small_chunk(gm, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(gm, p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(gm, r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) { + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + else { + nb = pad_request(bytes); + if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) { + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + } + + if (nb <= gm->dvsize) { + size_t rsize = gm->dvsize - nb; + mchunkptr p = gm->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = gm->dv = chunk_plus_offset(p, nb); + gm->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + } + else { /* exhaust dv */ + size_t dvs = gm->dvsize; + gm->dvsize = 0; + gm->dv = 0; + set_inuse_and_pinuse(gm, p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + else if (nb < gm->topsize) { /* Split top */ + size_t rsize = gm->topsize -= nb; + mchunkptr p = gm->top; + mchunkptr r = gm->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(gm, p, nb); + mem = chunk2mem(p); + check_top_chunk(gm, gm->top); + check_malloced_chunk(gm, mem, nb); + goto postaction; + } + + mem = sys_alloc(gm, nb); + + postaction: + POSTACTION(gm); + return mem; + } + + return 0; +} + +void dlfree(void* mem) { + /* + Consolidate freed chunks with preceeding or succeeding bordering + free chunks, if they exist, and then place in a bin. Intermixed + with special cases for top, dv, mmapped chunks, and usage errors. + */ + + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); + if (!ok_magic(fm)) { + USAGE_ERROR_ACTION(fm, p); + return; + } +#else /* FOOTERS */ +#define fm gm +#endif /* FOOTERS */ + if (!PREACTION(fm)) { + check_inuse_chunk(fm, p); + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if (is_mmapped(p)) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + fm->footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (should_trim(fm, tsize)) + sys_trim(fm, 0); + goto postaction; + } + else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + goto postaction; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + goto postaction; + } + } + } + else + set_free_with_pinuse(p, psize, next); + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + check_free_chunk(fm, p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + check_free_chunk(fm, p); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(fm, p); + postaction: + POSTACTION(fm); + } + } +#if !FOOTERS +#undef fm +#endif /* FOOTERS */ +} + +void* dlcalloc(size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; /* force downstream failure on overflow */ + } + mem = dlmalloc(req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) + memset(mem, 0, req); + return mem; +} + +void* dlrealloc(void* oldmem, size_t bytes) { + if (oldmem == 0) + return dlmalloc(bytes); +#ifdef REALLOC_ZERO_BYTES_FREES + if (bytes == 0) { + dlfree(oldmem); + return 0; + } +#endif /* REALLOC_ZERO_BYTES_FREES */ + else { +#if ! FOOTERS + mstate m = gm; +#else /* FOOTERS */ + mstate m = get_mstate_for(mem2chunk(oldmem)); + if (!ok_magic(m)) { + USAGE_ERROR_ACTION(m, oldmem); + return 0; + } +#endif /* FOOTERS */ + return internal_realloc(m, oldmem, bytes); + } +} + +void* dlmemalign(size_t alignment, size_t bytes) { + return internal_memalign(gm, alignment, bytes); +} + +void** dlindependent_calloc(size_t n_elements, size_t elem_size, + void* chunks[]) { + size_t sz = elem_size; /* serves as 1-element array */ + return ialloc(gm, n_elements, &sz, 3, chunks); +} + +void** dlindependent_comalloc(size_t n_elements, size_t sizes[], + void* chunks[]) { + return ialloc(gm, n_elements, sizes, 0, chunks); +} + +void* dlvalloc(size_t bytes) { + size_t pagesz; + ensure_initialization(); + pagesz = mparams.page_size; + return dlmemalign(pagesz, bytes); +} + +void* dlpvalloc(size_t bytes) { + size_t pagesz; + ensure_initialization(); + pagesz = mparams.page_size; + return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); +} + +int dlmalloc_trim(size_t pad) { + int result = 0; + ensure_initialization(); + if (!PREACTION(gm)) { + result = sys_trim(gm, pad); + POSTACTION(gm); + } + return result; +} + +size_t dlmalloc_footprint(void) { + return gm->footprint; +} + +size_t dlmalloc_max_footprint(void) { + return gm->max_footprint; +} + +#if !NO_MALLINFO +struct mallinfo dlmallinfo(void) { + return internal_mallinfo(gm); +} +#endif /* NO_MALLINFO */ + +void dlmalloc_stats() { + internal_malloc_stats(gm); +} + +int dlmallopt(int param_number, int value) { + return change_mparam(param_number, value); +} + +#endif /* !ONLY_MSPACES */ + +size_t dlmalloc_usable_size(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (is_inuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + +/* ----------------------------- user mspaces ---------------------------- */ + +#if MSPACES + +static mstate init_user_mstate(char* tbase, size_t tsize) { + size_t msize = pad_request(sizeof(struct malloc_state)); + mchunkptr mn; + mchunkptr msp = align_as_chunk(tbase); + mstate m = (mstate)(chunk2mem(msp)); + memset(m, 0, msize); + INITIAL_LOCK(&m->mutex); + msp->head = (msize|INUSE_BITS); + m->seg.base = m->least_addr = tbase; + m->seg.size = m->footprint = m->max_footprint = tsize; + m->magic = mparams.magic; + m->release_checks = MAX_RELEASE_CHECK_RATE; + m->mflags = mparams.default_mflags; + m->extp = 0; + m->exts = 0; + disable_contiguous(m); + init_bins(m); + mn = next_chunk(mem2chunk(m)); + init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); + check_top_chunk(m, m->top); + return m; +} + +mspace create_mspace(size_t capacity, int locked) { + mstate m = 0; + size_t msize; + ensure_initialization(); + msize = pad_request(sizeof(struct malloc_state)); + if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { + size_t rs = ((capacity == 0)? mparams.granularity : + (capacity + TOP_FOOT_SIZE + msize)); + size_t tsize = granularity_align(rs); + char* tbase = (char*)(CALL_MMAP(tsize)); + if (tbase != CMFAIL) { + m = init_user_mstate(tbase, tsize); + m->seg.sflags = USE_MMAP_BIT; + set_lock(m, locked); + } + } + return (mspace)m; +} + +mspace create_mspace_with_base(void* base, size_t capacity, int locked) { + mstate m = 0; + size_t msize; + ensure_initialization(); + msize = pad_request(sizeof(struct malloc_state)); + if (capacity > msize + TOP_FOOT_SIZE && + capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { + m = init_user_mstate((char*)base, capacity); + m->seg.sflags = EXTERN_BIT; + set_lock(m, locked); + } + return (mspace)m; +} + +int mspace_track_large_chunks(mspace msp, int enable) { + int ret = 0; + mstate ms = (mstate)msp; + if (!PREACTION(ms)) { + if (!use_mmap(ms)) + ret = 1; + if (!enable) + enable_mmap(ms); + else + disable_mmap(ms); + POSTACTION(ms); + } + return ret; +} + +size_t destroy_mspace(mspace msp) { + size_t freed = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + msegmentptr sp = &ms->seg; + while (sp != 0) { + char* base = sp->base; + size_t size = sp->size; + flag_t flag = sp->sflags; + sp = sp->next; + if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) && + CALL_MUNMAP(base, size) == 0) + freed += size; + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return freed; +} + +/* + mspace versions of routines are near-clones of the global + versions. This is not so nice but better than the alternatives. +*/ + + +void* mspace_malloc(mspace msp, size_t bytes) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (!PREACTION(ms)) { + void* mem; + size_t nb; + if (bytes <= MAX_SMALL_REQUEST) { + bindex_t idx; + binmap_t smallbits; + nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); + idx = small_index(nb); + smallbits = ms->smallmap >> idx; + + if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ + mchunkptr b, p; + idx += ~smallbits & 1; /* Uses next bin if idx empty */ + b = smallbin_at(ms, idx); + p = b->fd; + assert(chunksize(p) == small_index2size(idx)); + unlink_first_small_chunk(ms, b, p, idx); + set_inuse_and_pinuse(ms, p, small_index2size(idx)); + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (nb > ms->dvsize) { + if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ + mchunkptr b, p, r; + size_t rsize; + bindex_t i; + binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); + binmap_t leastbit = least_bit(leftbits); + compute_bit2idx(leastbit, i); + b = smallbin_at(ms, i); + p = b->fd; + assert(chunksize(p) == small_index2size(i)); + unlink_first_small_chunk(ms, b, p, i); + rsize = small_index2size(i) - nb; + /* Fit here cannot be remainderless if 4byte sizes */ + if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) + set_inuse_and_pinuse(ms, p, small_index2size(i)); + else { + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + r = chunk_plus_offset(p, nb); + set_size_and_pinuse_of_free_chunk(r, rsize); + replace_dv(ms, r, rsize); + } + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + } + } + else if (bytes >= MAX_REQUEST) + nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ + else { + nb = pad_request(bytes); + if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + } + + if (nb <= ms->dvsize) { + size_t rsize = ms->dvsize - nb; + mchunkptr p = ms->dv; + if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ + mchunkptr r = ms->dv = chunk_plus_offset(p, nb); + ms->dvsize = rsize; + set_size_and_pinuse_of_free_chunk(r, rsize); + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + } + else { /* exhaust dv */ + size_t dvs = ms->dvsize; + ms->dvsize = 0; + ms->dv = 0; + set_inuse_and_pinuse(ms, p, dvs); + } + mem = chunk2mem(p); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + else if (nb < ms->topsize) { /* Split top */ + size_t rsize = ms->topsize -= nb; + mchunkptr p = ms->top; + mchunkptr r = ms->top = chunk_plus_offset(p, nb); + r->head = rsize | PINUSE_BIT; + set_size_and_pinuse_of_inuse_chunk(ms, p, nb); + mem = chunk2mem(p); + check_top_chunk(ms, ms->top); + check_malloced_chunk(ms, mem, nb); + goto postaction; + } + + mem = sys_alloc(ms, nb); + + postaction: + POSTACTION(ms); + return mem; + } + + return 0; +} + +void mspace_free(mspace msp, void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); +#if FOOTERS + mstate fm = get_mstate_for(p); + msp = msp; /* placate people compiling -Wunused */ +#else /* FOOTERS */ + mstate fm = (mstate)msp; +#endif /* FOOTERS */ + if (!ok_magic(fm)) { + USAGE_ERROR_ACTION(fm, p); + return; + } + if (!PREACTION(fm)) { + check_inuse_chunk(fm, p); + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { + size_t psize = chunksize(p); + mchunkptr next = chunk_plus_offset(p, psize); + if (!pinuse(p)) { + size_t prevsize = p->prev_foot; + if (is_mmapped(p)) { + psize += prevsize + MMAP_FOOT_PAD; + if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) + fm->footprint -= psize; + goto postaction; + } + else { + mchunkptr prev = chunk_minus_offset(p, prevsize); + psize += prevsize; + p = prev; + if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ + if (p != fm->dv) { + unlink_chunk(fm, p, prevsize); + } + else if ((next->head & INUSE_BITS) == INUSE_BITS) { + fm->dvsize = psize; + set_free_with_pinuse(p, psize, next); + goto postaction; + } + } + else + goto erroraction; + } + } + + if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { + if (!cinuse(next)) { /* consolidate forward */ + if (next == fm->top) { + size_t tsize = fm->topsize += psize; + fm->top = p; + p->head = tsize | PINUSE_BIT; + if (p == fm->dv) { + fm->dv = 0; + fm->dvsize = 0; + } + if (should_trim(fm, tsize)) + sys_trim(fm, 0); + goto postaction; + } + else if (next == fm->dv) { + size_t dsize = fm->dvsize += psize; + fm->dv = p; + set_size_and_pinuse_of_free_chunk(p, dsize); + goto postaction; + } + else { + size_t nsize = chunksize(next); + psize += nsize; + unlink_chunk(fm, next, nsize); + set_size_and_pinuse_of_free_chunk(p, psize); + if (p == fm->dv) { + fm->dvsize = psize; + goto postaction; + } + } + } + else + set_free_with_pinuse(p, psize, next); + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + check_free_chunk(fm, p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + check_free_chunk(fm, p); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } + goto postaction; + } + } + erroraction: + USAGE_ERROR_ACTION(fm, p); + postaction: + POSTACTION(fm); + } + } +} + +void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) { + void* mem; + size_t req = 0; + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + if (n_elements != 0) { + req = n_elements * elem_size; + if (((n_elements | elem_size) & ~(size_t)0xffff) && + (req / n_elements != elem_size)) + req = MAX_SIZE_T; /* force downstream failure on overflow */ + } + mem = internal_malloc(ms, req); + if (mem != 0 && calloc_must_clear(mem2chunk(mem))) + memset(mem, 0, req); + return mem; +} + +void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) { + if (oldmem == 0) + return mspace_malloc(msp, bytes); +#ifdef REALLOC_ZERO_BYTES_FREES + if (bytes == 0) { + mspace_free(msp, oldmem); + return 0; + } +#endif /* REALLOC_ZERO_BYTES_FREES */ + else { +#if FOOTERS + mchunkptr p = mem2chunk(oldmem); + mstate ms = get_mstate_for(p); +#else /* FOOTERS */ + mstate ms = (mstate)msp; +#endif /* FOOTERS */ + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return internal_realloc(ms, oldmem, bytes); + } +} + +void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return internal_memalign(ms, alignment, bytes); +} + +void** mspace_independent_calloc(mspace msp, size_t n_elements, + size_t elem_size, void* chunks[]) { + size_t sz = elem_size; /* serves as 1-element array */ + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ialloc(ms, n_elements, &sz, 3, chunks); +} + +void** mspace_independent_comalloc(mspace msp, size_t n_elements, + size_t sizes[], void* chunks[]) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + return 0; + } + return ialloc(ms, n_elements, sizes, 0, chunks); +} + +int mspace_trim(mspace msp, size_t pad) { + int result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + if (!PREACTION(ms)) { + result = sys_trim(ms, pad); + POSTACTION(ms); + } + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + +void mspace_malloc_stats(mspace msp) { + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + internal_malloc_stats(ms); + } + else { + USAGE_ERROR_ACTION(ms,ms); + } +} + +size_t mspace_footprint(mspace msp) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + result = ms->footprint; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + + +size_t mspace_max_footprint(mspace msp) { + size_t result = 0; + mstate ms = (mstate)msp; + if (ok_magic(ms)) { + result = ms->max_footprint; + } + else { + USAGE_ERROR_ACTION(ms,ms); + } + return result; +} + + +#if !NO_MALLINFO +struct mallinfo mspace_mallinfo(mspace msp) { + mstate ms = (mstate)msp; + if (!ok_magic(ms)) { + USAGE_ERROR_ACTION(ms,ms); + } + return internal_mallinfo(ms); +} +#endif /* NO_MALLINFO */ + +size_t mspace_usable_size(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (is_inuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + +int mspace_mallopt(int param_number, int value) { + return change_mparam(param_number, value); +} + +#endif /* MSPACES */ + + +/* -------------------- Alternative MORECORE functions ------------------- */ + +/* + Guidelines for creating a custom version of MORECORE: + + * For best performance, MORECORE should allocate in multiples of pagesize. + * MORECORE may allocate more memory than requested. (Or even less, + but this will usually result in a malloc failure.) + * MORECORE must not allocate memory when given argument zero, but + instead return one past the end address of memory from previous + nonzero call. + * For best performance, consecutive calls to MORECORE with positive + arguments should return increasing addresses, indicating that + space has been contiguously extended. + * Even though consecutive calls to MORECORE need not return contiguous + addresses, it must be OK for malloc'ed chunks to span multiple + regions in those cases where they do happen to be contiguous. + * MORECORE need not handle negative arguments -- it may instead + just return MFAIL when given negative arguments. + Negative arguments are always multiples of pagesize. MORECORE + must not misinterpret negative args as large positive unsigned + args. You can suppress all such calls from even occurring by defining + MORECORE_CANNOT_TRIM, + + As an example alternative MORECORE, here is a custom allocator + kindly contributed for pre-OSX macOS. It uses virtually but not + necessarily physically contiguous non-paged memory (locked in, + present and won't get swapped out). You can use it by uncommenting + this section, adding some #includes, and setting up the appropriate + defines above: + + #define MORECORE osMoreCore + + There is also a shutdown routine that should somehow be called for + cleanup upon program exit. + + #define MAX_POOL_ENTRIES 100 + #define MINIMUM_MORECORE_SIZE (64 * 1024U) + static int next_os_pool; + void *our_os_pools[MAX_POOL_ENTRIES]; + + void *osMoreCore(int size) + { + void *ptr = 0; + static void *sbrk_top = 0; + + if (size > 0) + { + if (size < MINIMUM_MORECORE_SIZE) + size = MINIMUM_MORECORE_SIZE; + if (CurrentExecutionLevel() == kTaskLevel) + ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); + if (ptr == 0) + { + return (void *) MFAIL; + } + // save ptrs so they can be freed during cleanup + our_os_pools[next_os_pool] = ptr; + next_os_pool++; + ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); + sbrk_top = (char *) ptr + size; + return ptr; + } + else if (size < 0) + { + // we don't currently support shrink behavior + return (void *) MFAIL; + } + else + { + return sbrk_top; + } + } + + // cleanup any allocated memory pools + // called as last thing before shutting down driver + + void osCleanupMem(void) + { + void **ptr; + + for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) + if (*ptr) + { + PoolDeallocate(*ptr); + *ptr = 0; + } + } + +*/ + + +/* ----------------------------------------------------------------------- +History: + V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) + * Use zeros instead of prev foot for is_mmapped + * Add mspace_track_large_chunks; thanks to Jean Brouwers + * Fix set_inuse in internal_realloc; thanks to Jean Brouwers + * Fix insufficient sys_alloc padding when using 16byte alignment + * Fix bad error check in mspace_footprint + * Adaptations for ptmalloc; thanks to Wolfram Gloger. + * Reentrant spin locks; thanks to Earl Chew and others + * Win32 improvements; thanks to Niall Douglas and Earl Chew + * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options + * Extension hook in malloc_state + * Various small adjustments to reduce warnings on some compilers + * Various configuration extensions/changes for more platforms. Thanks + to all who contributed these. + + V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee) + * Add max_footprint functions + * Ensure all appropriate literals are size_t + * Fix conditional compilation problem for some #define settings + * Avoid concatenating segments with the one provided + in create_mspace_with_base + * Rename some variables to avoid compiler shadowing warnings + * Use explicit lock initialization. + * Better handling of sbrk interference. + * Simplify and fix segment insertion, trimming and mspace_destroy + * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x + * Thanks especially to Dennis Flanagan for help on these. + + V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee) + * Fix memalign brace error. + + V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee) + * Fix improper #endif nesting in C++ + * Add explicit casts needed for C++ + + V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee) + * Use trees for large bins + * Support mspaces + * Use segments to unify sbrk-based and mmap-based system allocation, + removing need for emulation on most platforms without sbrk. + * Default safety checks + * Optional footer checks. Thanks to William Robertson for the idea. + * Internal code refactoring + * Incorporate suggestions and platform-specific changes. + Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas, + Aaron Bachmann, Emery Berger, and others. + * Speed up non-fastbin processing enough to remove fastbins. + * Remove useless cfree() to avoid conflicts with other apps. + * Remove internal memcpy, memset. Compilers handle builtins better. + * Remove some options that no one ever used and rename others. + + V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) + * Fix malloc_state bitmap array misdeclaration + + V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) + * Allow tuning of FIRST_SORTED_BIN_SIZE + * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. + * Better detection and support for non-contiguousness of MORECORE. + Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger + * Bypass most of malloc if no frees. Thanks To Emery Berger. + * Fix freeing of old top non-contiguous chunk im sysmalloc. + * Raised default trim and map thresholds to 256K. + * Fix mmap-related #defines. Thanks to Lubos Lunak. + * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. + * Branch-free bin calculation + * Default trim and mmap thresholds now 256K. + + V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) + * Introduce independent_comalloc and independent_calloc. + Thanks to Michael Pachos for motivation and help. + * Make optional .h file available + * Allow > 2GB requests on 32bit systems. + * new WIN32 sbrk, mmap, munmap, lock code from <Walter@GeNeSys-e.de>. + Thanks also to Andreas Mueller <a.mueller at paradatec.de>, + and Anonymous. + * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for + helping test this.) + * memalign: check alignment arg + * realloc: don't try to shift chunks backwards, since this + leads to more fragmentation in some programs and doesn't + seem to help in any others. + * Collect all cases in malloc requiring system memory into sysmalloc + * Use mmap as backup to sbrk + * Place all internal state in malloc_state + * Introduce fastbins (although similar to 2.5.1) + * Many minor tunings and cosmetic improvements + * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK + * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS + Thanks to Tony E. Bennett <tbennett@nvidia.com> and others. + * Include errno.h to support default failure action. + + V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) + * return null for negative arguments + * Added Several WIN32 cleanups from Martin C. Fong <mcfong at yahoo.com> + * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' + (e.g. WIN32 platforms) + * Cleanup header file inclusion for WIN32 platforms + * Cleanup code to avoid Microsoft Visual C++ compiler complaints + * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing + memory allocation routines + * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) + * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to + usage of 'assert' in non-WIN32 code + * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to + avoid infinite loop + * Always call 'fREe()' rather than 'free()' + + V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) + * Fixed ordering problem with boundary-stamping + + V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) + * Added pvalloc, as recommended by H.J. Liu + * Added 64bit pointer support mainly from Wolfram Gloger + * Added anonymously donated WIN32 sbrk emulation + * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen + * malloc_extend_top: fix mask error that caused wastage after + foreign sbrks + * Add linux mremap support code from HJ Liu + + V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) + * Integrated most documentation with the code. + * Add support for mmap, with help from + Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Use last_remainder in more cases. + * Pack bins using idea from colin@nyx10.cs.du.edu + * Use ordered bins instead of best-fit threshhold + * Eliminate block-local decls to simplify tracing and debugging. + * Support another case of realloc via move into top + * Fix error occuring when initial sbrk_base not word-aligned. + * Rely on page size for units instead of SBRK_UNIT to + avoid surprises about sbrk alignment conventions. + * Add mallinfo, mallopt. Thanks to Raymond Nijssen + (raymond@es.ele.tue.nl) for the suggestion. + * Add `pad' argument to malloc_trim and top_pad mallopt parameter. + * More precautions for cases where other routines call sbrk, + courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). + * Added macros etc., allowing use in linux libc from + H.J. Lu (hjl@gnu.ai.mit.edu) + * Inverted this history list + + V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) + * Re-tuned and fixed to behave more nicely with V2.6.0 changes. + * Removed all preallocation code since under current scheme + the work required to undo bad preallocations exceeds + the work saved in good cases for most test programs. + * No longer use return list or unconsolidated bins since + no scheme using them consistently outperforms those that don't + given above changes. + * Use best fit for very large chunks to prevent some worst-cases. + * Added some support for debugging + + V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) + * Removed footers when chunks are in use. Thanks to + Paul Wilson (wilson@cs.texas.edu) for the suggestion. + + V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) + * Added malloc_trim, with help from Wolfram Gloger + (wmglo@Dent.MED.Uni-Muenchen.DE). + + V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) + + V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) + * realloc: try to expand in both directions + * malloc: swap order of clean-bin strategy; + * realloc: only conditionally expand backwards + * Try not to scavenge used bins + * Use bin counts as a guide to preallocation + * Occasionally bin return list chunks in first scan + * Add a few optimizations from colin@nyx10.cs.du.edu + + V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) + * faster bin computation & slightly different binning + * merged all consolidations to one part of malloc proper + (eliminating old malloc_find_space & malloc_clean_bin) + * Scan 2 returns chunks (not just 1) + * Propagate failure in realloc if malloc returns 0 + * Add stuff to allow compilation on non-ANSI compilers + from kpv@research.att.com + + V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) + * removed potential for odd address access in prev_chunk + * removed dependency on getpagesize.h + * misc cosmetics and a bit more internal documentation + * anticosmetics: mangled names in macros to evade debugger strangeness + * tested on sparc, hp-700, dec-mips, rs6000 + with gcc & native cc (hp, dec only) allowing + Detlefs & Zorn comparison study (in SIGPLAN Notices.) + + Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) + * Based loosely on libg++-1.2X malloc. (It retains some of the overall + structure of old version, but most details differ.) + +*/ + +#endif diff --git a/drivers/nedmalloc/nedmalloc.cpp b/drivers/nedmalloc/nedmalloc.cpp index 8845d96549..9aac277a2a 100644 --- a/drivers/nedmalloc/nedmalloc.cpp +++ b/drivers/nedmalloc/nedmalloc.cpp @@ -1,1467 +1,1467 @@ -#ifdef NEDMALLOC_ENABLED
-/* Alternative malloc implementation for multiple threads without
-lock contention based on dlmalloc. (C) 2005-2009 Niall Douglas
-
-Boost Software License - Version 1.0 - August 17th, 2003
-
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-#ifdef _MSC_VER
-/* Enable full aliasing on MSVC */
-/*#pragma optimize("a", on)*/
-#pragma warning(push)
-#pragma warning(disable:4100) /* unreferenced formal parameter */
-#pragma warning(disable:4127) /* conditional expression is constant */
-#pragma warning(disable:4706) /* assignment within conditional expression */
-#endif
-
-/*#define ENABLE_TOLERANT_NEDMALLOC 1*/
-/*#define ENABLE_FAST_HEAP_DETECTION 1*/
-/*#define NEDMALLOC_DEBUG 1*/
-
-/*#define FULLSANITYCHECKS*/
-/* If link time code generation is on, don't force or prevent inlining */
-#if defined(_MSC_VER) && defined(NEDMALLOC_DLL_EXPORTS)
-#define FORCEINLINE
-#define NOINLINE
-#endif
-
-
-#include "nedmalloc.h"
-#ifdef WIN32
- #include <malloc.h>
- #include <stddef.h>
-#endif
-#if USE_ALLOCATOR==1
- #define MSPACES 1
- #define ONLY_MSPACES 1
-#endif
-#define USE_DL_PREFIX 1
-#ifndef USE_LOCKS
- #define USE_LOCKS 1
-#endif
-#define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */
-#ifndef NEDMALLOC_DEBUG
- #if defined(DEBUG) || defined(_DEBUG)
- #define NEDMALLOC_DEBUG 1
- #else
- #define NEDMALLOC_DEBUG 0
- #endif
-#endif
-/* We need to consistently define DEBUG=0|1, _DEBUG and NDEBUG for dlmalloc */
-#undef DEBUG
-#undef _DEBUG
-#if NEDMALLOC_DEBUG
- #define _DEBUG
- #define DEBUG 1
-#else
- #define DEBUG 0
-#endif
-#ifdef NDEBUG /* Disable assert checking on release builds */
- #undef DEBUG
- #undef _DEBUG
-#endif
-/* The default of 64Kb means we spend too much time kernel-side */
-#ifndef DEFAULT_GRANULARITY
-#define DEFAULT_GRANULARITY (1*1024*1024)
-#if DEBUG
-#define DEFAULT_GRANULARITY_ALIGNED
-#endif
-#endif
-/*#define USE_SPIN_LOCKS 0*/
-
-
-#include "malloc.c.h"
-#ifdef NDEBUG /* Disable assert checking on release builds */
- #undef DEBUG
-#elif !NEDMALLOC_DEBUG
- #ifdef __GNUC__
- #warning DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.
- #elif defined(_MSC_VER)
- #pragma message(__FILE__ ": WARNING: DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.")
- #endif
-#endif
-
-/* The maximum concurrent threads in a pool possible */
-#ifndef MAXTHREADSINPOOL
-#define MAXTHREADSINPOOL 16
-#endif
-/* The maximum number of threadcaches which can be allocated */
-#ifndef THREADCACHEMAXCACHES
-#define THREADCACHEMAXCACHES 256
-#endif
-/* The maximum size to be allocated from the thread cache */
-#ifndef THREADCACHEMAX
-#define THREADCACHEMAX 8192
-#endif
-#if 0
-/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
-#define THREADCACHEMAXBINS ((13-4)*2)
-#else
-/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
-#define THREADCACHEMAXBINS (13-4)
-#endif
-/* Point at which the free space in a thread cache is garbage collected */
-#ifndef THREADCACHEMAXFREESPACE
-#define THREADCACHEMAXFREESPACE (512*1024)
-#endif
-
-
-#ifdef WIN32
- #define TLSVAR DWORD
- #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
- #define TLSFREE(k) (!TlsFree(k))
- #define TLSGET(k) TlsGetValue(k)
- #define TLSSET(k, a) (!TlsSetValue(k, a))
- #ifdef DEBUG
-static LPVOID ChkedTlsGetValue(DWORD idx)
-{
- LPVOID ret=TlsGetValue(idx);
- assert(S_OK==GetLastError());
- return ret;
-}
- #undef TLSGET
- #define TLSGET(k) ChkedTlsGetValue(k)
- #endif
-#else
- #define TLSVAR pthread_key_t
- #define TLSALLOC(k) pthread_key_create(k, 0)
- #define TLSFREE(k) pthread_key_delete(k)
- #define TLSGET(k) pthread_getspecific(k)
- #define TLSSET(k, a) pthread_setspecific(k, a)
-#endif
-
-#if defined(__cplusplus)
-#if !defined(NO_NED_NAMESPACE)
-namespace nedalloc {
-#else
-extern "C" {
-#endif
-#endif
-
-#if USE_ALLOCATOR==0
-static void *unsupported_operation(const char *opname) THROWSPEC
-{
- fprintf(stderr, "nedmalloc: The operation %s is not supported under this build configuration\n", opname);
- abort();
- return 0;
-}
-static size_t mspacecounter=(size_t) 0xdeadbeef;
-#endif
-#ifndef ENABLE_FAST_HEAP_DETECTION
-static void *RESTRICT leastusedaddress;
-static size_t largestusedblock;
-#endif
-
-static FORCEINLINE void *CallMalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
-{
- void *RESTRICT ret=0;
- size_t _alignment=alignment;
-#if USE_MAGIC_HEADERS
- size_t *_ret=0;
- size+=alignment+3*sizeof(size_t);
- _alignment=0;
-#endif
-#if USE_ALLOCATOR==0
- ret=_alignment ?
-#ifdef _MSC_VER
- /* This is the MSVCRT equivalent */
- _aligned_malloc(size, _alignment)
-#elif defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__)
- /* This is the glibc/ptmalloc2/dlmalloc/BSD libc equivalent. */
- memalign(_alignment, size)
-#else
-#error Cannot aligned allocate with the memory allocator of an unknown system!
-#endif
- : malloc(size);
-#elif USE_ALLOCATOR==1
- ret=_alignment ? mspace_memalign((mstate) mspace, _alignment, size) : mspace_malloc((mstate) mspace, size);
-#ifndef ENABLE_FAST_HEAP_DETECTION
- if(ret)
- {
- size_t truesize=chunksize(mem2chunk(ret));
- if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
- if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
- }
-#endif
-#endif
- if(!ret) return 0;
-#if USE_MAGIC_HEADERS
- _ret=(size_t *) ret;
- ret=(void *)(_ret+3);
- if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
- for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *)"NEDMALOC";
- _ret[0]=(size_t) mspace;
- _ret[1]=size-3*sizeof(size_t);
-#endif
- return ret;
-}
-
-static FORCEINLINE void *CallCalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC
-{
- void *RESTRICT ret=0;
-#if USE_MAGIC_HEADERS
- size_t *_ret=0;
- size+=alignment+3*sizeof(size_t);
-#endif
-#if USE_ALLOCATOR==0
- ret=calloc(1, size);
-#elif USE_ALLOCATOR==1
- ret=mspace_calloc((mstate) mspace, 1, size);
-#ifndef ENABLE_FAST_HEAP_DETECTION
- if(ret)
- {
- size_t truesize=chunksize(mem2chunk(ret));
- if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr;
- if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
- }
-#endif
-#endif
- if(!ret) return 0;
-#if USE_MAGIC_HEADERS
- _ret=(size_t *) ret;
- ret=(void *)(_ret+3);
- if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1));
- for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
- _ret[0]=(size_t) mspace;
- _ret[1]=size-3*sizeof(size_t);
-#endif
- return ret;
-}
-
-static FORCEINLINE void *CallRealloc(void *RESTRICT mspace, void *RESTRICT mem, int isforeign, size_t oldsize, size_t newsize) THROWSPEC
-{
- void *RESTRICT ret=0;
-#if USE_MAGIC_HEADERS
- mstate oldmspace=0;
- size_t *_ret=0, *_mem=(size_t *) mem-3;
-#endif
- if(isforeign)
- { /* Transfer */
-#if USE_MAGIC_HEADERS
- assert(_mem[0]!=*(size_t *) "NEDMALOC");
-#endif
- if((ret=CallMalloc(mspace, newsize, 0)))
- {
-#if defined(DEBUG)
- printf("*** nedmalloc frees system allocated block %p\n", mem);
-#endif
- memcpy(ret, mem, oldsize<newsize ? oldsize : newsize);
- free(mem);
- }
- return ret;
- }
-#if USE_MAGIC_HEADERS
- assert(_mem[0]==*(size_t *) "NEDMALOC");
- newsize+=3*sizeof(size_t);
- oldmspace=(mstate) _mem[1];
- assert(oldsize>=_mem[2]);
- for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
- mem=(void *)(++_mem);
-#endif
-#if USE_ALLOCATOR==0
- ret=realloc(mem, newsize);
-#elif USE_ALLOCATOR==1
- ret=mspace_realloc((mstate) mspace, mem, newsize);
-#ifndef ENABLE_FAST_HEAP_DETECTION
- if(ret)
- {
- size_t truesize=chunksize(mem2chunk(ret));
- if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1);
- }
-#endif
-#endif
- if(!ret)
- { /* Put it back the way it was */
-#if USE_MAGIC_HEADERS
- for(; *_mem==0; *_mem++=*(size_t *) "NEDMALOC");
-#endif
- return 0;
- }
-#if USE_MAGIC_HEADERS
- _ret=(size_t *) ret;
- ret=(void *)(_ret+3);
- for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC";
- _ret[0]=(size_t) mspace;
- _ret[1]=newsize-3*sizeof(size_t);
-#endif
- return ret;
-}
-
-static FORCEINLINE void CallFree(void *RESTRICT mspace, void *RESTRICT mem, int isforeign) THROWSPEC
-{
-#if USE_MAGIC_HEADERS
- mstate oldmspace=0;
- size_t *_mem=(size_t *) mem-3, oldsize=0;
-#endif
- if(isforeign)
- {
-#if USE_MAGIC_HEADERS
- assert(_mem[0]!=*(size_t *) "NEDMALOC");
-#endif
-#if defined(DEBUG)
- printf("*** nedmalloc frees system allocated block %p\n", mem);
-#endif
- free(mem);
- return;
- }
-#if USE_MAGIC_HEADERS
- assert(_mem[0]==*(size_t *) "NEDMALOC");
- oldmspace=(mstate) _mem[1];
- oldsize=_mem[2];
- for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc");
- mem=(void *)(++_mem);
-#endif
-#if USE_ALLOCATOR==0
- free(mem);
-#elif USE_ALLOCATOR==1
- mspace_free((mstate) mspace, mem);
-#endif
-}
-
-static NEDMALLOCNOALIASATTR mstate nedblkmstate(void *RESTRICT mem) THROWSPEC
-{
- if(mem)
- {
-#if USE_MAGIC_HEADERS
- size_t *_mem=(size_t *) mem-3;
- if(_mem[0]==*(size_t *) "NEDMALOC")
- {
- return (mstate) _mem[1];
- }
- else return 0;
-#else
-#if USE_ALLOCATOR==0
- /* Fail everything */
- return 0;
-#elif USE_ALLOCATOR==1
-#ifdef ENABLE_FAST_HEAP_DETECTION
-#ifdef WIN32
- /* On Windows for RELEASE both x86 and x64 the NT heap precedes each block with an eight byte header
- which looks like:
- normal: 4 bytes of size, 4 bytes of [char < 64, char < 64, char < 64 bit 0 always set, char random ]
- mmaped: 4 bytes of size 4 bytes of [zero, zero, 0xb, zero ]
-
- On Windows for DEBUG both x86 and x64 the preceding four bytes is always 0xfdfdfdfd (no man's land).
- */
-#pragma pack(push, 1)
- struct _HEAP_ENTRY
- {
- USHORT Size;
- USHORT PreviousSize;
- UCHAR Cookie; /* SegmentIndex */
- UCHAR Flags; /* always bit 0 (HEAP_ENTRY_BUSY). bit 1=(HEAP_ENTRY_EXTRA_PRESENT), bit 2=normal block (HEAP_ENTRY_FILL_PATTERN), bit 3=mmap block (HEAP_ENTRY_VIRTUAL_ALLOC). Bit 4 (HEAP_ENTRY_LAST_ENTRY) could be set */
- UCHAR UnusedBytes;
- UCHAR SmallTagIndex; /* fastbin index. Always one of 0x02, 0x03, 0x04 < 0x80 */
- } *RESTRICT he=((struct _HEAP_ENTRY *) mem)-1;
-#pragma pack(pop)
- unsigned int header=((unsigned int *)mem)[-1], mask1=0x8080E100, result1, mask2=0xFFFFFF06, result2;
- result1=header & mask1; /* Positive testing for NT heap */
- result2=header & mask2; /* Positive testing for dlmalloc */
- if(result1==0x00000100 && result2!=0x00000102)
- { /* This is likely a NT heap block */
- return 0;
- }
-#endif
-#ifdef __linux__
- /* On Linux glibc uses ptmalloc2 (really dlmalloc) just as we do, but prev_foot contains rubbish
- when the preceding block is allocated because ptmalloc2 finds the local mstate by rounding the ptr
- down to the nearest megabyte. It's like dlmalloc with FOOTERS disabled. */
- mchunkptr p=mem2chunk(mem);
- mstate fm=get_mstate_for(p);
- /* If it's a ptmalloc2 block, fm is likely to be some crazy value */
- if(!is_aligned(fm)) return 0;
- if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
- if(ok_magic(fm))
- return fm;
- else
- return 0;
- if(1) { }
-#endif
- else
- {
- mchunkptr p=mem2chunk(mem);
- mstate fm=get_mstate_for(p);
- assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */
- if(ok_magic(fm))
- return fm;
- }
-#else
-//#ifdef WIN32
-// __try
-//#endif
- {
- /* We try to return zero here if it isn't one of our own blocks, however
- the current block annotation scheme used by dlmalloc makes it impossible
- to be absolutely sure of avoiding a segfault.
-
- mchunkptr->prev_foot = mem-(2*size_t) = mstate ^ mparams.magic for PRECEDING block;
- mchunkptr->head = mem-(1*size_t) = 8 multiple size of this block with bottom three bits = FLAG_BITS
- FLAG_BITS = bit 0 is CINUSE (currently in use unless is mmap), bit 1 is PINUSE (previous block currently
- in use unless mmap), bit 2 is UNUSED and currently is always zero.
- */
- register void *RESTRICT leastusedaddress_=leastusedaddress; /* Cache these to avoid register reloading */
- register size_t largestusedblock_=largestusedblock;
- if(!is_aligned(mem)) return 0; /* Would fail very rarely as all allocators return aligned blocks */
- if(mem<leastusedaddress_) return 0; /* Simple but effective */
- {
- mchunkptr p=mem2chunk(mem);
- mstate fm=0;
- int ismmapped=is_mmapped(p);
- if((!ismmapped && !is_inuse(p)) || (p->head & FLAG4_BIT)) return 0;
- /* Reduced uncertainty by 0.5^2 = 25.0% */
- /* size should never exceed largestusedblock */
- if(chunksize(p)>largestusedblock_) return 0;
- /* Reduced uncertainty by a minimum of 0.5^3 = 12.5%, maximum 0.5^16 = 0.0015% */
- /* Having sanity checked prev_foot and head, check next block */
- if(!ismmapped && (!next_pinuse(p) || (next_chunk(p)->head & FLAG4_BIT))) return 0;
- /* Reduced uncertainty by 0.5^5 = 3.13% or 0.5^18 = 0.00038% */
- #if 0
- /* If previous block is free, check that its next block pointer equals us */
- if(!ismmapped && !pinuse(p))
- if(next_chunk(prev_chunk(p))!=p) return 0;
- /* We could start comparing prev_foot's for similarity but it starts getting slow. */
- #endif
- fm = get_mstate_for(p);
- if(!is_aligned(fm) || (void *)fm<leastusedaddress_) return 0;
- if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0;
- assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */
- if(ok_magic(fm))
- return fm;
- }
- }
-//#ifdef WIN32
-// __except(1) { }
-//#endif
-#endif
-#endif
-#endif
- }
- return 0;
-}
-NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC
-{
- if(mem)
- {
- if(isforeign) *isforeign=1;
-#if USE_MAGIC_HEADERS
- {
- size_t *_mem=(size_t *) mem-3;
- if(_mem[0]==*(size_t *) "NEDMALOC")
- {
- mstate mspace=(mstate) _mem[1];
- size_t size=_mem[2];
- if(isforeign) *isforeign=0;
- return size;
- }
- }
-#elif USE_ALLOCATOR==1
- if(nedblkmstate(mem))
- {
- mchunkptr p=mem2chunk(mem);
- if(isforeign) *isforeign=0;
- return chunksize(p)-overhead_for(p);
- }
-#ifdef DEBUG
- else
- {
- int a=1; /* Set breakpoints here if needed */
- }
-#endif
-#endif
-#if defined(ENABLE_TOLERANT_NEDMALLOC) || USE_ALLOCATOR==0
-#ifdef _MSC_VER
- /* This is the MSVCRT equivalent */
- return _msize(mem);
-#elif defined(__linux__)
- /* This is the glibc/ptmalloc2/dlmalloc equivalent. */
- return malloc_usable_size(mem);
-#elif defined(__FreeBSD__) || defined(__APPLE__)
- /* This is the BSD libc equivalent. */
- return malloc_size(mem);
-#else
-#error Cannot tolerate the memory allocator of an unknown system!
-#endif
-#endif
- }
- return 0;
-}
-
-NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC { nedpsetvalue((nedpool *) 0, v); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc((nedpool *) 0, size); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc((nedpool *) 0, no, size); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc((nedpool *) 0, mem, size); }
-NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC { nedpfree((nedpool *) 0, mem); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign((nedpool *) 0, alignment, bytes); }
-NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo((nedpool *) 0); }
-NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt((nedpool *) 0, parno, value); }
-NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim((nedpool *) 0, pad); }
-void nedmalloc_stats() THROWSPEC { nedpmalloc_stats((nedpool *) 0); }
-NEDMALLOCNOALIASATTR size_t nedmalloc_footprint() THROWSPEC { return nedpmalloc_footprint((nedpool *) 0); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks); }
-NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks); }
-
-struct threadcacheblk_t;
-typedef struct threadcacheblk_t threadcacheblk;
-struct threadcacheblk_t
-{ /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
-#ifdef FULLSANITYCHECKS
- unsigned int magic;
-#endif
- unsigned int lastUsed, size;
- threadcacheblk *next, *prev;
-};
-typedef struct threadcache_t
-{
-#ifdef FULLSANITYCHECKS
- unsigned int magic1;
-#endif
- int mymspace; /* Last mspace entry this thread used */
- long threadid;
- unsigned int mallocs, frees, successes;
- size_t freeInCache; /* How much free space is stored in this cache */
- threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
-#ifdef FULLSANITYCHECKS
- unsigned int magic2;
-#endif
-} threadcache;
-struct nedpool_t
-{
- MLOCK_T mutex;
- void *uservalue;
- int threads; /* Max entries in m to use */
- threadcache *caches[THREADCACHEMAXCACHES];
- TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
- mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */
-};
-static nedpool syspool;
-
-static FORCEINLINE NEDMALLOCNOALIASATTR unsigned int size2binidx(size_t _size) THROWSPEC
-{ /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */
- unsigned int topbit, size=(unsigned int)(_size>>4);
- /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */
-
-#if defined(__GNUC__)
- topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
-#elif defined(_MSC_VER) && _MSC_VER>=1300
- {
- unsigned long bsrTopBit;
-
- _BitScanReverse(&bsrTopBit, size);
-
- topbit = bsrTopBit;
- }
-#else
-#if 0
- union {
- unsigned asInt[2];
- double asDouble;
- };
- int n;
-
- asDouble = (double)size + 0.5;
- topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
-#else
- {
- unsigned int x=size;
- x = x | (x >> 1);
- x = x | (x >> 2);
- x = x | (x >> 4);
- x = x | (x >> 8);
- x = x | (x >>16);
- x = ~x;
- x = x - ((x >> 1) & 0x55555555);
- x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
- x = (x + (x >> 4)) & 0x0F0F0F0F;
- x = x + (x << 8);
- x = x + (x << 16);
- topbit=31 - (x >> 24);
- }
-#endif
-#endif
- return topbit;
-}
-
-
-#ifdef FULLSANITYCHECKS
-static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
-{
- assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
- if(ptr[0] && ptr[1])
- {
- assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
- assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
- assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
- assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
- assert(!ptr[0]->prev);
- assert(!ptr[1]->next);
- if(ptr[0]==ptr[1])
- {
- assert(!ptr[0]->next);
- assert(!ptr[1]->prev);
- }
- }
-}
-static void tcfullsanitycheck(threadcache *tc) THROWSPEC
-{
- threadcacheblk **tcbptr=tc->bins;
- int n;
- for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
- {
- threadcacheblk *b, *ob=0;
- tcsanitycheck(tcbptr);
- for(b=tcbptr[0]; b; ob=b, b=b->next)
- {
- assert(*(unsigned int *) "NEDN"==b->magic);
- assert(!ob || ob->next==b);
- assert(!ob || b->prev==ob);
- }
- }
-}
-#endif
-
-static NOINLINE void RemoveCacheEntries(nedpool *RESTRICT p, threadcache *RESTRICT tc, unsigned int age) THROWSPEC
-{
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
- if(tc->freeInCache)
- {
- threadcacheblk **tcbptr=tc->bins;
- int n;
- for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
- {
- threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */
- /*tcsanitycheck(tcbptr);*/
- for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
- {
- threadcacheblk *f=*tcb;
- size_t blksize=f->size; /*nedblksize(f);*/
- assert(blksize<=nedblksize(0, f));
- assert(blksize);
-#ifdef FULLSANITYCHECKS
- assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
-#endif
- *tcb=(*tcb)->prev;
- if(*tcb)
- (*tcb)->next=0;
- else
- *tcbptr=0;
- tc->freeInCache-=blksize;
- assert((long) tc->freeInCache>=0);
- CallFree(0, f, 0);
- /*tcsanitycheck(tcbptr);*/
- }
- }
- }
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
-}
-static void DestroyCaches(nedpool *RESTRICT p) THROWSPEC
-{
- if(p->caches)
- {
- threadcache *tc;
- int n;
- for(n=0; n<THREADCACHEMAXCACHES; n++)
- {
- if((tc=p->caches[n]))
- {
- tc->frees++;
- RemoveCacheEntries(p, tc, 0);
- assert(!tc->freeInCache);
- tc->mymspace=-1;
- tc->threadid=0;
- CallFree(0, tc, 0);
- p->caches[n]=0;
- }
- }
- }
-}
-
-static NOINLINE threadcache *AllocCache(nedpool *RESTRICT p) THROWSPEC
-{
- threadcache *tc=0;
- int n, end;
- ACQUIRE_LOCK(&p->mutex);
- for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
- if(THREADCACHEMAXCACHES==n)
- { /* List exhausted, so disable for this thread */
- RELEASE_LOCK(&p->mutex);
- return 0;
- }
- tc=p->caches[n]=(threadcache *) CallCalloc(p->m[0], sizeof(threadcache), 0);
- if(!tc)
- {
- RELEASE_LOCK(&p->mutex);
- return 0;
- }
-#ifdef FULLSANITYCHECKS
- tc->magic1=*(unsigned int *)"NEDMALC1";
- tc->magic2=*(unsigned int *)"NEDMALC2";
-#endif
- tc->threadid=(long)(size_t)CURRENT_THREAD;
- for(end=0; p->m[end]; end++);
- tc->mymspace=abs(tc->threadid) % end;
- RELEASE_LOCK(&p->mutex);
- if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
- return tc;
-}
-
-static void *threadcache_malloc(nedpool *RESTRICT p, threadcache *RESTRICT tc, size_t *RESTRICT _size) THROWSPEC
-{
- void *RESTRICT ret=0;
- size_t size=*_size, blksize=0;
- unsigned int bestsize;
- unsigned int idx=size2binidx(size);
- threadcacheblk *RESTRICT blk, **RESTRICT binsptr;
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
- /* Calculate best fit bin size */
- bestsize=1<<(idx+4);
-#if 0
- /* Finer grained bin fit */
- idx<<=1;
- if(size>bestsize)
- {
- idx++;
- bestsize+=bestsize>>1;
- }
- if(size>bestsize)
- {
- idx++;
- bestsize=1<<(4+(idx>>1));
- }
-#else
- if(size>bestsize)
- {
- idx++;
- bestsize<<=1;
- }
-#endif
- assert(bestsize>=size);
- if(size<bestsize) size=bestsize;
- assert(size<=THREADCACHEMAX);
- assert(idx<=THREADCACHEMAXBINS);
- binsptr=&tc->bins[idx*2];
- /* Try to match close, but move up a bin if necessary */
- blk=*binsptr;
- if(!blk || blk->size<size)
- { /* Bump it up a bin */
- if(idx<THREADCACHEMAXBINS)
- {
- idx++;
- binsptr+=2;
- blk=*binsptr;
- }
- }
- if(blk)
- {
- blksize=blk->size; /*nedblksize(blk);*/
- assert(nedblksize(0, blk)>=blksize);
- assert(blksize>=size);
- if(blk->next)
- blk->next->prev=0;
- *binsptr=blk->next;
- if(!*binsptr)
- binsptr[1]=0;
-#ifdef FULLSANITYCHECKS
- blk->magic=0;
-#endif
- assert(binsptr[0]!=blk && binsptr[1]!=blk);
- assert(nedblksize(0, blk)>=sizeof(threadcacheblk) && nedblksize(0, blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
- /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) _size);*/
- ret=(void *) blk;
- }
- ++tc->mallocs;
- if(ret)
- {
- assert(blksize>=size);
- ++tc->successes;
- tc->freeInCache-=blksize;
- assert((long) tc->freeInCache>=0);
- }
-#if defined(DEBUG) && 0
- if(!(tc->mallocs & 0xfff))
- {
- printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
- (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
- }
-#endif
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
- *_size=size;
- return ret;
-}
-static NOINLINE void ReleaseFreeInCache(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace) THROWSPEC
-{
- unsigned int age=THREADCACHEMAXFREESPACE/8192;
- /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
- while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
- {
- RemoveCacheEntries(p, tc, age);
- /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
- age>>=1;
- }
- /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
-}
-static void threadcache_free(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, void *RESTRICT mem, size_t size) THROWSPEC
-{
- unsigned int bestsize;
- unsigned int idx=size2binidx(size);
- threadcacheblk **RESTRICT binsptr, *RESTRICT tck=(threadcacheblk *) mem;
- assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
-#ifdef DEBUG
- /* Make sure this is a valid memory block */
- assert(nedblksize(0, mem));
-#endif
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
- /* Calculate best fit bin size */
- bestsize=1<<(idx+4);
-#if 0
- /* Finer grained bin fit */
- idx<<=1;
- if(size>bestsize)
- {
- unsigned int biggerbestsize=bestsize+bestsize<<1;
- if(size>=biggerbestsize)
- {
- idx++;
- bestsize=biggerbestsize;
- }
- }
-#endif
- if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */
- size=bestsize;
- binsptr=&tc->bins[idx*2];
- assert(idx<=THREADCACHEMAXBINS);
- if(tck==*binsptr)
- {
- fprintf(stderr, "nedmalloc: Attempt to free already freed memory block %p - aborting!\n", tck);
- abort();
- }
-#ifdef FULLSANITYCHECKS
- tck->magic=*(unsigned int *) "NEDN";
-#endif
- tck->lastUsed=++tc->frees;
- tck->size=(unsigned int) size;
- tck->next=*binsptr;
- tck->prev=0;
- if(tck->next)
- tck->next->prev=tck;
- else
- binsptr[1]=tck;
- assert(!*binsptr || (*binsptr)->size==tck->size);
- *binsptr=tck;
- assert(tck==tc->bins[idx*2]);
- assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
- /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
- tc->freeInCache+=size;
-#ifdef FULLSANITYCHECKS
- tcfullsanitycheck(tc);
-#endif
-#if 1
- if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
- ReleaseFreeInCache(p, tc, mymspace);
-#endif
-}
-
-
-
-
-static NOINLINE int InitPool(nedpool *RESTRICT p, size_t capacity, int threads) THROWSPEC
-{ /* threads is -1 for system pool */
- ensure_initialization();
- ACQUIRE_MALLOC_GLOBAL_LOCK();
- if(p->threads) goto done;
- if(INITIAL_LOCK(&p->mutex)) goto err;
- if(TLSALLOC(&p->mycache)) goto err;
-#if USE_ALLOCATOR==0
- p->m[0]=(mstate) mspacecounter++;
-#elif USE_ALLOCATOR==1
- if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
- p->m[0]->extp=p;
-#endif
- p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
-done:
- RELEASE_MALLOC_GLOBAL_LOCK();
- return 1;
-err:
- if(threads<0)
- abort(); /* If you can't allocate for system pool, we're screwed */
- DestroyCaches(p);
- if(p->m[0])
- {
-#if USE_ALLOCATOR==1
- destroy_mspace(p->m[0]);
-#endif
- p->m[0]=0;
- }
- if(p->mycache)
- {
- if(TLSFREE(p->mycache)) abort();
- p->mycache=0;
- }
- RELEASE_MALLOC_GLOBAL_LOCK();
- return 0;
-}
-static NOINLINE mstate FindMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int *RESTRICT lastUsed, size_t size) THROWSPEC
-{ /* Gets called when thread's last used mspace is in use. The strategy
- is to run through the list of all available mspaces looking for an
- unlocked one and if we fail, we create a new one so long as we don't
- exceed p->threads */
- int n, end;
- for(n=end=*lastUsed+1; p->m[n]; end=++n)
- {
- if(TRY_LOCK(&p->m[n]->mutex)) goto found;
- }
- for(n=0; n<*lastUsed && p->m[n]; n++)
- {
- if(TRY_LOCK(&p->m[n]->mutex)) goto found;
- }
- if(end<p->threads)
- {
- mstate temp;
-#if USE_ALLOCATOR==0
- temp=(mstate) mspacecounter++;
-#elif USE_ALLOCATOR==1
- if(!(temp=(mstate) create_mspace(size, 1)))
- goto badexit;
-#endif
- /* Now we're ready to modify the lists, we lock */
- ACQUIRE_LOCK(&p->mutex);
- while(p->m[end] && end<p->threads)
- end++;
- if(end>=p->threads)
- { /* Drat, must destroy it now */
- RELEASE_LOCK(&p->mutex);
-#if USE_ALLOCATOR==1
- destroy_mspace((mstate) temp);
-#endif
- goto badexit;
- }
- /* We really want to make sure this goes into memory now but we
- have to be careful of breaking aliasing rules, so write it twice */
- *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp;
- ACQUIRE_LOCK(&p->m[end]->mutex);
- /*printf("Created mspace idx %d\n", end);*/
- RELEASE_LOCK(&p->mutex);
- n=end;
- goto found;
- }
- /* Let it lock on the last one it used */
-badexit:
- ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
- return p->m[*lastUsed];
-found:
- *lastUsed=n;
- if(tc)
- tc->mymspace=n;
- else
- {
- if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
- }
- return p->m[n];
-}
-
-typedef struct PoolList_t
-{
- size_t size; /* Size of list */
- size_t length; /* Actual entries in list */
-#ifdef DEBUG
- nedpool *list[1]; /* Force testing of list expansion */
-#else
- nedpool *list[16];
-#endif
-} PoolList;
-static MLOCK_T poollistlock;
-static PoolList *poollist;
-NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
-{
- nedpool *ret=0;
- if(!poollist)
- {
- PoolList *newpoollist=0;
- if(!(newpoollist=(PoolList *) nedpcalloc(0, 1, sizeof(PoolList)+sizeof(nedpool *)))) return 0;
- INITIAL_LOCK(&poollistlock);
- ACQUIRE_LOCK(&poollistlock);
- poollist=newpoollist;
- poollist->size=sizeof(poollist->list)/sizeof(nedpool *);
- }
- else
- ACQUIRE_LOCK(&poollistlock);
- if(poollist->length==poollist->size)
- {
- PoolList *newpoollist=0;
- size_t newsize=0;
- newsize=sizeof(PoolList)+(poollist->size+1)*sizeof(nedpool *);
- if(!(newpoollist=(PoolList *) nedprealloc(0, poollist, newsize))) goto badexit;
- poollist=newpoollist;
- memset(&poollist->list[poollist->size], 0, newsize-((size_t)&poollist->list[poollist->size]-(size_t)&poollist->list[0]));
- poollist->size=((newsize-((char *)&poollist->list[0]-(char *)poollist))/sizeof(nedpool *))-1;
- assert(poollist->size>poollist->length);
- }
- if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) goto badexit;
- if(!InitPool(ret, capacity, threads))
- {
- nedpfree(0, ret);
- goto badexit;
- }
- poollist->list[poollist->length++]=ret;
-badexit:
- RELEASE_LOCK(&poollistlock);
- return ret;
-}
-void neddestroypool(nedpool *p) THROWSPEC
-{
- unsigned int n;
- ACQUIRE_LOCK(&p->mutex);
- DestroyCaches(p);
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1
- destroy_mspace(p->m[n]);
-#endif
- p->m[n]=0;
- }
- RELEASE_LOCK(&p->mutex);
- if(TLSFREE(p->mycache)) abort();
- nedpfree(0, p);
- ACQUIRE_LOCK(&poollistlock);
- assert(poollist);
- for(n=0; n<poollist->length && poollist->list[n]!=p; n++);
- assert(n!=poollist->length);
- memmove(&poollist->list[n], &poollist->list[n+1], (size_t)&poollist->list[poollist->length]-(size_t)&poollist->list[n]);
- if(!--poollist->length)
- {
- assert(!poollist->list[0]);
- nedpfree(0, poollist);
- poollist=0;
- }
- RELEASE_LOCK(&poollistlock);
-}
-void neddestroysyspool() THROWSPEC
-{
- nedpool *p=&syspool;
- int n;
- ACQUIRE_LOCK(&p->mutex);
- DestroyCaches(p);
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1
- destroy_mspace(p->m[n]);
-#endif
- p->m[n]=0;
- }
- /* Render syspool unusable */
- for(n=0; n<THREADCACHEMAXCACHES; n++)
- p->caches[n]=(threadcache *)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
- for(n=0; n<MAXTHREADSINPOOL+1; n++)
- p->m[n]=(mstate)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL);
- if(TLSFREE(p->mycache)) abort();
- RELEASE_LOCK(&p->mutex);
-}
-nedpool **nedpoollist() THROWSPEC
-{
- nedpool **ret=0;
- if(poollist)
- {
- ACQUIRE_LOCK(&poollistlock);
- if(!(ret=(nedpool **) nedmalloc((poollist->length+1)*sizeof(nedpool *)))) goto badexit;
- memcpy(ret, poollist->list, (poollist->length+1)*sizeof(nedpool *));
-badexit:
- RELEASE_LOCK(&poollistlock);
- }
- return ret;
-}
-
-void nedpsetvalue(nedpool *p, void *v) THROWSPEC
-{
- if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
- p->uservalue=v;
-}
-void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
-{
- nedpool *np=0;
- mstate fm=nedblkmstate(mem);
- if(!fm || !fm->extp) return 0;
- np=(nedpool *) fm->extp;
- if(p) *p=np;
- return np->uservalue;
-}
-
-void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC
-{
- int mycache;
- if(!p)
- {
- p=&syspool;
- if(!syspool.threads) InitPool(&syspool, 0, -1);
- }
- mycache=(int)(size_t) TLSGET(p->mycache);
- if(!mycache)
- { /* Set to mspace 0 */
- if(disable && TLSSET(p->mycache, (void *)(size_t)-1)) abort();
- }
- else if(mycache>0)
- { /* Set to last used mspace */
- threadcache *tc=p->caches[mycache-1];
-#if defined(DEBUG)
- printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
- 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
-#endif
- if(disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
- tc->frees++;
- RemoveCacheEntries(p, tc, 0);
- assert(!tc->freeInCache);
- if(disable)
- {
- tc->mymspace=-1;
- tc->threadid=0;
- CallFree(0, p->caches[mycache-1], 0);
- p->caches[mycache-1]=0;
- }
- }
-}
-void neddisablethreadcache(nedpool *p) THROWSPEC
-{
- nedtrimthreadcache(p, 1);
-}
-
-#define GETMSPACE(m,p,tc,ms,s,action) \
- do \
- { \
- mstate m = GetMSpace((p),(tc),(ms),(s)); \
- action; \
- if(USE_ALLOCATOR==1) { RELEASE_LOCK(&m->mutex); } \
- } while (0)
-
-static FORCEINLINE mstate GetMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, size_t size) THROWSPEC
-{ /* Returns a locked and ready for use mspace */
- mstate m=p->m[mymspace];
- assert(m);
-#if USE_ALLOCATOR==1
- if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);
- /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
-#endif
- return m;
-}
-static NOINLINE void GetThreadCache_cold1(nedpool *RESTRICT *RESTRICT p) THROWSPEC
-{
- *p=&syspool;
- if(!syspool.threads) InitPool(&syspool, 0, -1);
-}
-static NOINLINE void GetThreadCache_cold2(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, int mycache) THROWSPEC
-{
- if(!mycache)
- { /* Need to allocate a new cache */
- *tc=AllocCache(*p);
- if(!*tc)
- { /* Disable */
- if(TLSSET((*p)->mycache, (void *)(size_t)-1)) abort();
- *mymspace=0;
- }
- else
- *mymspace=(*tc)->mymspace;
- }
- else
- { /* Cache disabled, but we do have an assigned thread pool */
- *tc=0;
- *mymspace=-mycache-1;
- }
-}
-static FORCEINLINE void GetThreadCache(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, size_t *RESTRICT size) THROWSPEC
-{
- int mycache;
- if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
- if(!*p)
- GetThreadCache_cold1(p);
- mycache=(int)(size_t) TLSGET((*p)->mycache);
- if(mycache>0)
- { /* Already have a cache */
- *tc=(*p)->caches[mycache-1];
- *mymspace=(*tc)->mymspace;
- }
- else GetThreadCache_cold2(p, tc, mymspace, mycache);
- assert(*mymspace>=0);
- assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid);
-#ifdef FULLSANITYCHECKS
- if(*tc)
- {
- if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
- {
- abort();
- }
- }
-#endif
-}
-
-NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
-{
- void *ret=0;
- threadcache *tc;
- int mymspace;
- GetThreadCache(&p, &tc, &mymspace, &size);
-#if THREADCACHEMAX
- if(tc && size<=THREADCACHEMAX)
- { /* Use the thread cache */
- ret=threadcache_malloc(p, tc, &size);
- }
-#endif
- if(!ret)
- { /* Use this thread's mspace */
- GETMSPACE(m, p, tc, mymspace, size,
- ret=CallMalloc(m, size, 0));
- }
- return ret;
-}
-NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
-{
- size_t rsize=size*no;
- void *ret=0;
- threadcache *tc;
- int mymspace;
- GetThreadCache(&p, &tc, &mymspace, &rsize);
-#if THREADCACHEMAX
- if(tc && rsize<=THREADCACHEMAX)
- { /* Use the thread cache */
- if((ret=threadcache_malloc(p, tc, &rsize)))
- memset(ret, 0, rsize);
- }
-#endif
- if(!ret)
- { /* Use this thread's mspace */
- GETMSPACE(m, p, tc, mymspace, rsize,
- ret=CallCalloc(m, rsize, 0));
- }
- return ret;
-}
-NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
-{
- void *ret=0;
- threadcache *tc;
- int mymspace, isforeign=1;
- size_t memsize;
- if(!mem) return nedpmalloc(p, size);
- memsize=nedblksize(&isforeign, mem);
- assert(memsize);
- if(!memsize)
- {
- fprintf(stderr, "nedmalloc: nedprealloc() called with a block not created by nedmalloc!\n");
- abort();
- }
- else if(size<=memsize && memsize-size<
-#ifdef DEBUG
- 32
-#else
- 1024
-#endif
- ) /* If realloc size is within 1Kb smaller than existing, noop it */
- return mem;
- GetThreadCache(&p, &tc, &mymspace, &size);
-#if THREADCACHEMAX
- if(tc && size && size<=THREADCACHEMAX)
- { /* Use the thread cache */
- if((ret=threadcache_malloc(p, tc, &size)))
- {
- memcpy(ret, mem, memsize<size ? memsize : size);
- if(memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
- threadcache_free(p, tc, mymspace, mem, memsize);
- else
- CallFree(0, mem, isforeign);
- }
- }
-#endif
- if(!ret)
- { /* Reallocs always happen in the mspace they happened in, so skip
- locking the preferred mspace for this thread */
- ret=CallRealloc(p->m[mymspace], mem, isforeign, memsize, size);
- }
- return ret;
-}
-void nedpfree(nedpool *p, void *mem) THROWSPEC
-{ /* Frees always happen in the mspace they happened in, so skip
- locking the preferred mspace for this thread */
- threadcache *tc;
- int mymspace, isforeign=1;
- size_t memsize;
- if(!mem)
- { /* If you tried this on FreeBSD you'd be sorry! */
-#ifdef DEBUG
- fprintf(stderr, "nedmalloc: WARNING nedpfree() called with zero. This is not portable behaviour!\n");
-#endif
- return;
- }
- memsize=nedblksize(&isforeign, mem);
- assert(memsize);
- if(!memsize)
- {
- fprintf(stderr, "nedmalloc: nedpfree() called with a block not created by nedmalloc!\n");
- abort();
- }
- GetThreadCache(&p, &tc, &mymspace, 0);
-#if THREADCACHEMAX
- if(mem && tc && memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
- threadcache_free(p, tc, mymspace, mem, memsize);
- else
-#endif
- CallFree(0, mem, isforeign);
-}
-NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
-{
- void *ret;
- threadcache *tc;
- int mymspace;
- GetThreadCache(&p, &tc, &mymspace, &bytes);
- { /* Use this thread's mspace */
- GETMSPACE(m, p, tc, mymspace, bytes,
- ret=CallMalloc(m, bytes, alignment));
- }
- return ret;
-}
-struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC
-{
- int n;
- struct nedmallinfo ret={0};
- if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1 && !NO_MALLINFO
- struct mallinfo t=mspace_mallinfo(p->m[n]);
- ret.arena+=t.arena;
- ret.ordblks+=t.ordblks;
- ret.hblkhd+=t.hblkhd;
- ret.usmblks+=t.usmblks;
- ret.uordblks+=t.uordblks;
- ret.fordblks+=t.fordblks;
- ret.keepcost+=t.keepcost;
-#endif
- }
- return ret;
-}
-int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
-{
-#if USE_ALLOCATOR==1
- return mspace_mallopt(parno, value);
-#else
- return 0;
-#endif
-}
-NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC
-{
-#if USE_ALLOCATOR==1
- if(granularity) *granularity=mparams.granularity;
- if(magic) *magic=mparams.magic;
- return (void *) &syspool;
-#else
- if(granularity) *granularity=0;
- if(magic) *magic=0;
- return 0;
-#endif
-}
-int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
-{
- int n, ret=0;
- if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1
- ret+=mspace_trim(p->m[n], pad);
-#endif
- }
- return ret;
-}
-void nedpmalloc_stats(nedpool *p) THROWSPEC
-{
- int n;
- if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1
- mspace_malloc_stats(p->m[n]);
-#endif
- }
-}
-size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
-{
- size_t ret=0;
- int n;
- if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
- for(n=0; p->m[n]; n++)
- {
-#if USE_ALLOCATOR==1
- ret+=mspace_footprint(p->m[n]);
-#endif
- }
- return ret;
-}
-NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
-{
- void **ret;
- threadcache *tc;
- int mymspace;
- GetThreadCache(&p, &tc, &mymspace, &elemsize);
-#if USE_ALLOCATOR==0
- GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
- ret=unsupported_operation("independent_calloc"));
-#elif USE_ALLOCATOR==1
- GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
- ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
-#endif
- return ret;
-}
-NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
-{
- void **ret;
- threadcache *tc;
- int mymspace;
- size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
- if(!adjustedsizes) return 0;
- for(i=0; i<elems; i++)
- adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
- GetThreadCache(&p, &tc, &mymspace, 0);
-#if USE_ALLOCATOR==0
- GETMSPACE(m, p, tc, mymspace, 0,
- ret=unsupported_operation("independent_comalloc"));
-#elif USE_ALLOCATOR==1
- GETMSPACE(m, p, tc, mymspace, 0,
- ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
-#endif
- return ret;
-}
-
-#if defined(__cplusplus)
-}
-#endif
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-#endif
+#ifdef NEDMALLOC_ENABLED +/* Alternative malloc implementation for multiple threads without +lock contention based on dlmalloc. (C) 2005-2009 Niall Douglas + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + +#ifdef _MSC_VER +/* Enable full aliasing on MSVC */ +/*#pragma optimize("a", on)*/ +#pragma warning(push) +#pragma warning(disable:4100) /* unreferenced formal parameter */ +#pragma warning(disable:4127) /* conditional expression is constant */ +#pragma warning(disable:4706) /* assignment within conditional expression */ +#endif + +/*#define ENABLE_TOLERANT_NEDMALLOC 1*/ +/*#define ENABLE_FAST_HEAP_DETECTION 1*/ +/*#define NEDMALLOC_DEBUG 1*/ + +/*#define FULLSANITYCHECKS*/ +/* If link time code generation is on, don't force or prevent inlining */ +#if defined(_MSC_VER) && defined(NEDMALLOC_DLL_EXPORTS) +#define FORCEINLINE +#define NOINLINE +#endif + + +#include "nedmalloc.h" +#ifdef WIN32 + #include <malloc.h> + #include <stddef.h> +#endif +#if USE_ALLOCATOR==1 + #define MSPACES 1 + #define ONLY_MSPACES 1 +#endif +#define USE_DL_PREFIX 1 +#ifndef USE_LOCKS + #define USE_LOCKS 1 +#endif +#define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */ +#ifndef NEDMALLOC_DEBUG + #if defined(DEBUG) || defined(_DEBUG) + #define NEDMALLOC_DEBUG 1 + #else + #define NEDMALLOC_DEBUG 0 + #endif +#endif +/* We need to consistently define DEBUG=0|1, _DEBUG and NDEBUG for dlmalloc */ +#undef DEBUG +#undef _DEBUG +#if NEDMALLOC_DEBUG + #define _DEBUG + #define DEBUG 1 +#else + #define DEBUG 0 +#endif +#ifdef NDEBUG /* Disable assert checking on release builds */ + #undef DEBUG + #undef _DEBUG +#endif +/* The default of 64Kb means we spend too much time kernel-side */ +#ifndef DEFAULT_GRANULARITY +#define DEFAULT_GRANULARITY (1*1024*1024) +#if DEBUG +#define DEFAULT_GRANULARITY_ALIGNED +#endif +#endif +/*#define USE_SPIN_LOCKS 0*/ + + +#include "malloc.c.h" +#ifdef NDEBUG /* Disable assert checking on release builds */ + #undef DEBUG +#elif !NEDMALLOC_DEBUG + #ifdef __GNUC__ + #warning DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed. + #elif defined(_MSC_VER) + #pragma message(__FILE__ ": WARNING: DEBUG is defined so allocator will run with assert checking! Define NDEBUG to run at full speed.") + #endif +#endif + +/* The maximum concurrent threads in a pool possible */ +#ifndef MAXTHREADSINPOOL +#define MAXTHREADSINPOOL 16 +#endif +/* The maximum number of threadcaches which can be allocated */ +#ifndef THREADCACHEMAXCACHES +#define THREADCACHEMAXCACHES 256 +#endif +/* The maximum size to be allocated from the thread cache */ +#ifndef THREADCACHEMAX +#define THREADCACHEMAX 8192 +#endif +#if 0 +/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */ +#define THREADCACHEMAXBINS ((13-4)*2) +#else +/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */ +#define THREADCACHEMAXBINS (13-4) +#endif +/* Point at which the free space in a thread cache is garbage collected */ +#ifndef THREADCACHEMAXFREESPACE +#define THREADCACHEMAXFREESPACE (512*1024) +#endif + + +#ifdef WIN32 + #define TLSVAR DWORD + #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k)) + #define TLSFREE(k) (!TlsFree(k)) + #define TLSGET(k) TlsGetValue(k) + #define TLSSET(k, a) (!TlsSetValue(k, a)) + #ifdef DEBUG +static LPVOID ChkedTlsGetValue(DWORD idx) +{ + LPVOID ret=TlsGetValue(idx); + assert(S_OK==GetLastError()); + return ret; +} + #undef TLSGET + #define TLSGET(k) ChkedTlsGetValue(k) + #endif +#else + #define TLSVAR pthread_key_t + #define TLSALLOC(k) pthread_key_create(k, 0) + #define TLSFREE(k) pthread_key_delete(k) + #define TLSGET(k) pthread_getspecific(k) + #define TLSSET(k, a) pthread_setspecific(k, a) +#endif + +#if defined(__cplusplus) +#if !defined(NO_NED_NAMESPACE) +namespace nedalloc { +#else +extern "C" { +#endif +#endif + +#if USE_ALLOCATOR==0 +static void *unsupported_operation(const char *opname) THROWSPEC +{ + fprintf(stderr, "nedmalloc: The operation %s is not supported under this build configuration\n", opname); + abort(); + return 0; +} +static size_t mspacecounter=(size_t) 0xdeadbeef; +#endif +#ifndef ENABLE_FAST_HEAP_DETECTION +static void *RESTRICT leastusedaddress; +static size_t largestusedblock; +#endif + +static FORCEINLINE void *CallMalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC +{ + void *RESTRICT ret=0; + size_t _alignment=alignment; +#if USE_MAGIC_HEADERS + size_t *_ret=0; + size+=alignment+3*sizeof(size_t); + _alignment=0; +#endif +#if USE_ALLOCATOR==0 + ret=_alignment ? +#ifdef _MSC_VER + /* This is the MSVCRT equivalent */ + _aligned_malloc(size, _alignment) +#elif defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) + /* This is the glibc/ptmalloc2/dlmalloc/BSD libc equivalent. */ + memalign(_alignment, size) +#else +#error Cannot aligned allocate with the memory allocator of an unknown system! +#endif + : malloc(size); +#elif USE_ALLOCATOR==1 + ret=_alignment ? mspace_memalign((mstate) mspace, _alignment, size) : mspace_malloc((mstate) mspace, size); +#ifndef ENABLE_FAST_HEAP_DETECTION + if(ret) + { + size_t truesize=chunksize(mem2chunk(ret)); + if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr; + if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1); + } +#endif +#endif + if(!ret) return 0; +#if USE_MAGIC_HEADERS + _ret=(size_t *) ret; + ret=(void *)(_ret+3); + if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1)); + for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *)"NEDMALOC"; + _ret[0]=(size_t) mspace; + _ret[1]=size-3*sizeof(size_t); +#endif + return ret; +} + +static FORCEINLINE void *CallCalloc(void *RESTRICT mspace, size_t size, size_t alignment) THROWSPEC +{ + void *RESTRICT ret=0; +#if USE_MAGIC_HEADERS + size_t *_ret=0; + size+=alignment+3*sizeof(size_t); +#endif +#if USE_ALLOCATOR==0 + ret=calloc(1, size); +#elif USE_ALLOCATOR==1 + ret=mspace_calloc((mstate) mspace, 1, size); +#ifndef ENABLE_FAST_HEAP_DETECTION + if(ret) + { + size_t truesize=chunksize(mem2chunk(ret)); + if(!leastusedaddress || (void *)((mstate) mspace)->least_addr<leastusedaddress) leastusedaddress=(void *)((mstate) mspace)->least_addr; + if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1); + } +#endif +#endif + if(!ret) return 0; +#if USE_MAGIC_HEADERS + _ret=(size_t *) ret; + ret=(void *)(_ret+3); + if(alignment) ret=(void *)(((size_t) ret+alignment-1)&~(alignment-1)); + for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC"; + _ret[0]=(size_t) mspace; + _ret[1]=size-3*sizeof(size_t); +#endif + return ret; +} + +static FORCEINLINE void *CallRealloc(void *RESTRICT mspace, void *RESTRICT mem, int isforeign, size_t oldsize, size_t newsize) THROWSPEC +{ + void *RESTRICT ret=0; +#if USE_MAGIC_HEADERS + mstate oldmspace=0; + size_t *_ret=0, *_mem=(size_t *) mem-3; +#endif + if(isforeign) + { /* Transfer */ +#if USE_MAGIC_HEADERS + assert(_mem[0]!=*(size_t *) "NEDMALOC"); +#endif + if((ret=CallMalloc(mspace, newsize, 0))) + { +#if defined(DEBUG) + printf("*** nedmalloc frees system allocated block %p\n", mem); +#endif + memcpy(ret, mem, oldsize<newsize ? oldsize : newsize); + free(mem); + } + return ret; + } +#if USE_MAGIC_HEADERS + assert(_mem[0]==*(size_t *) "NEDMALOC"); + newsize+=3*sizeof(size_t); + oldmspace=(mstate) _mem[1]; + assert(oldsize>=_mem[2]); + for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc"); + mem=(void *)(++_mem); +#endif +#if USE_ALLOCATOR==0 + ret=realloc(mem, newsize); +#elif USE_ALLOCATOR==1 + ret=mspace_realloc((mstate) mspace, mem, newsize); +#ifndef ENABLE_FAST_HEAP_DETECTION + if(ret) + { + size_t truesize=chunksize(mem2chunk(ret)); + if(!largestusedblock || truesize>largestusedblock) largestusedblock=(truesize+mparams.page_size) & ~(mparams.page_size-1); + } +#endif +#endif + if(!ret) + { /* Put it back the way it was */ +#if USE_MAGIC_HEADERS + for(; *_mem==0; *_mem++=*(size_t *) "NEDMALOC"); +#endif + return 0; + } +#if USE_MAGIC_HEADERS + _ret=(size_t *) ret; + ret=(void *)(_ret+3); + for(; _ret<(size_t *)ret-2; _ret++) *_ret=*(size_t *) "NEDMALOC"; + _ret[0]=(size_t) mspace; + _ret[1]=newsize-3*sizeof(size_t); +#endif + return ret; +} + +static FORCEINLINE void CallFree(void *RESTRICT mspace, void *RESTRICT mem, int isforeign) THROWSPEC +{ +#if USE_MAGIC_HEADERS + mstate oldmspace=0; + size_t *_mem=(size_t *) mem-3, oldsize=0; +#endif + if(isforeign) + { +#if USE_MAGIC_HEADERS + assert(_mem[0]!=*(size_t *) "NEDMALOC"); +#endif +#if defined(DEBUG) + printf("*** nedmalloc frees system allocated block %p\n", mem); +#endif + free(mem); + return; + } +#if USE_MAGIC_HEADERS + assert(_mem[0]==*(size_t *) "NEDMALOC"); + oldmspace=(mstate) _mem[1]; + oldsize=_mem[2]; + for(; *_mem==*(size_t *) "NEDMALOC"; *_mem--=*(size_t *) "nedmaloc"); + mem=(void *)(++_mem); +#endif +#if USE_ALLOCATOR==0 + free(mem); +#elif USE_ALLOCATOR==1 + mspace_free((mstate) mspace, mem); +#endif +} + +static NEDMALLOCNOALIASATTR mstate nedblkmstate(void *RESTRICT mem) THROWSPEC +{ + if(mem) + { +#if USE_MAGIC_HEADERS + size_t *_mem=(size_t *) mem-3; + if(_mem[0]==*(size_t *) "NEDMALOC") + { + return (mstate) _mem[1]; + } + else return 0; +#else +#if USE_ALLOCATOR==0 + /* Fail everything */ + return 0; +#elif USE_ALLOCATOR==1 +#ifdef ENABLE_FAST_HEAP_DETECTION +#ifdef WIN32 + /* On Windows for RELEASE both x86 and x64 the NT heap precedes each block with an eight byte header + which looks like: + normal: 4 bytes of size, 4 bytes of [char < 64, char < 64, char < 64 bit 0 always set, char random ] + mmaped: 4 bytes of size 4 bytes of [zero, zero, 0xb, zero ] + + On Windows for DEBUG both x86 and x64 the preceding four bytes is always 0xfdfdfdfd (no man's land). + */ +#pragma pack(push, 1) + struct _HEAP_ENTRY + { + USHORT Size; + USHORT PreviousSize; + UCHAR Cookie; /* SegmentIndex */ + UCHAR Flags; /* always bit 0 (HEAP_ENTRY_BUSY). bit 1=(HEAP_ENTRY_EXTRA_PRESENT), bit 2=normal block (HEAP_ENTRY_FILL_PATTERN), bit 3=mmap block (HEAP_ENTRY_VIRTUAL_ALLOC). Bit 4 (HEAP_ENTRY_LAST_ENTRY) could be set */ + UCHAR UnusedBytes; + UCHAR SmallTagIndex; /* fastbin index. Always one of 0x02, 0x03, 0x04 < 0x80 */ + } *RESTRICT he=((struct _HEAP_ENTRY *) mem)-1; +#pragma pack(pop) + unsigned int header=((unsigned int *)mem)[-1], mask1=0x8080E100, result1, mask2=0xFFFFFF06, result2; + result1=header & mask1; /* Positive testing for NT heap */ + result2=header & mask2; /* Positive testing for dlmalloc */ + if(result1==0x00000100 && result2!=0x00000102) + { /* This is likely a NT heap block */ + return 0; + } +#endif +#ifdef __linux__ + /* On Linux glibc uses ptmalloc2 (really dlmalloc) just as we do, but prev_foot contains rubbish + when the preceding block is allocated because ptmalloc2 finds the local mstate by rounding the ptr + down to the nearest megabyte. It's like dlmalloc with FOOTERS disabled. */ + mchunkptr p=mem2chunk(mem); + mstate fm=get_mstate_for(p); + /* If it's a ptmalloc2 block, fm is likely to be some crazy value */ + if(!is_aligned(fm)) return 0; + if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0; + if(ok_magic(fm)) + return fm; + else + return 0; + if(1) { } +#endif + else + { + mchunkptr p=mem2chunk(mem); + mstate fm=get_mstate_for(p); + assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */ + if(ok_magic(fm)) + return fm; + } +#else +//#ifdef WIN32 +// __try +//#endif + { + /* We try to return zero here if it isn't one of our own blocks, however + the current block annotation scheme used by dlmalloc makes it impossible + to be absolutely sure of avoiding a segfault. + + mchunkptr->prev_foot = mem-(2*size_t) = mstate ^ mparams.magic for PRECEDING block; + mchunkptr->head = mem-(1*size_t) = 8 multiple size of this block with bottom three bits = FLAG_BITS + FLAG_BITS = bit 0 is CINUSE (currently in use unless is mmap), bit 1 is PINUSE (previous block currently + in use unless mmap), bit 2 is UNUSED and currently is always zero. + */ + register void *RESTRICT leastusedaddress_=leastusedaddress; /* Cache these to avoid register reloading */ + register size_t largestusedblock_=largestusedblock; + if(!is_aligned(mem)) return 0; /* Would fail very rarely as all allocators return aligned blocks */ + if(mem<leastusedaddress_) return 0; /* Simple but effective */ + { + mchunkptr p=mem2chunk(mem); + mstate fm=0; + int ismmapped=is_mmapped(p); + if((!ismmapped && !is_inuse(p)) || (p->head & FLAG4_BIT)) return 0; + /* Reduced uncertainty by 0.5^2 = 25.0% */ + /* size should never exceed largestusedblock */ + if(chunksize(p)>largestusedblock_) return 0; + /* Reduced uncertainty by a minimum of 0.5^3 = 12.5%, maximum 0.5^16 = 0.0015% */ + /* Having sanity checked prev_foot and head, check next block */ + if(!ismmapped && (!next_pinuse(p) || (next_chunk(p)->head & FLAG4_BIT))) return 0; + /* Reduced uncertainty by 0.5^5 = 3.13% or 0.5^18 = 0.00038% */ + #if 0 + /* If previous block is free, check that its next block pointer equals us */ + if(!ismmapped && !pinuse(p)) + if(next_chunk(prev_chunk(p))!=p) return 0; + /* We could start comparing prev_foot's for similarity but it starts getting slow. */ + #endif + fm = get_mstate_for(p); + if(!is_aligned(fm) || (void *)fm<leastusedaddress_) return 0; + if((size_t)mem-(size_t)fm>=(size_t)1<<(SIZE_T_BITSIZE-1)) return 0; + assert(ok_magic(fm)); /* If this fails, someone tried to free a block twice */ + if(ok_magic(fm)) + return fm; + } + } +//#ifdef WIN32 +// __except(1) { } +//#endif +#endif +#endif +#endif + } + return 0; +} +NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC +{ + if(mem) + { + if(isforeign) *isforeign=1; +#if USE_MAGIC_HEADERS + { + size_t *_mem=(size_t *) mem-3; + if(_mem[0]==*(size_t *) "NEDMALOC") + { + mstate mspace=(mstate) _mem[1]; + size_t size=_mem[2]; + if(isforeign) *isforeign=0; + return size; + } + } +#elif USE_ALLOCATOR==1 + if(nedblkmstate(mem)) + { + mchunkptr p=mem2chunk(mem); + if(isforeign) *isforeign=0; + return chunksize(p)-overhead_for(p); + } +#ifdef DEBUG + else + { + int a=1; /* Set breakpoints here if needed */ + } +#endif +#endif +#if defined(ENABLE_TOLERANT_NEDMALLOC) || USE_ALLOCATOR==0 +#ifdef _MSC_VER + /* This is the MSVCRT equivalent */ + return _msize(mem); +#elif defined(__linux__) + /* This is the glibc/ptmalloc2/dlmalloc equivalent. */ + return malloc_usable_size(mem); +#elif defined(__FreeBSD__) || defined(__APPLE__) + /* This is the BSD libc equivalent. */ + return malloc_size(mem); +#else +#error Cannot tolerate the memory allocator of an unknown system! +#endif +#endif + } + return 0; +} + +NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC { nedpsetvalue((nedpool *) 0, v); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc((nedpool *) 0, size); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc((nedpool *) 0, no, size); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc((nedpool *) 0, mem, size); } +NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC { nedpfree((nedpool *) 0, mem); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign((nedpool *) 0, alignment, bytes); } +NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo((nedpool *) 0); } +NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt((nedpool *) 0, parno, value); } +NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim((nedpool *) 0, pad); } +void nedmalloc_stats() THROWSPEC { nedpmalloc_stats((nedpool *) 0); } +NEDMALLOCNOALIASATTR size_t nedmalloc_footprint() THROWSPEC { return nedpmalloc_footprint((nedpool *) 0); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc((nedpool *) 0, elemsno, elemsize, chunks); } +NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc((nedpool *) 0, elems, sizes, chunks); } + +struct threadcacheblk_t; +typedef struct threadcacheblk_t threadcacheblk; +struct threadcacheblk_t +{ /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */ +#ifdef FULLSANITYCHECKS + unsigned int magic; +#endif + unsigned int lastUsed, size; + threadcacheblk *next, *prev; +}; +typedef struct threadcache_t +{ +#ifdef FULLSANITYCHECKS + unsigned int magic1; +#endif + int mymspace; /* Last mspace entry this thread used */ + long threadid; + unsigned int mallocs, frees, successes; + size_t freeInCache; /* How much free space is stored in this cache */ + threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2]; +#ifdef FULLSANITYCHECKS + unsigned int magic2; +#endif +} threadcache; +struct nedpool_t +{ + MLOCK_T mutex; + void *uservalue; + int threads; /* Max entries in m to use */ + threadcache *caches[THREADCACHEMAXCACHES]; + TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */ + mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */ +}; +static nedpool syspool; + +static FORCEINLINE NEDMALLOCNOALIASATTR unsigned int size2binidx(size_t _size) THROWSPEC +{ /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */ + unsigned int topbit, size=(unsigned int)(_size>>4); + /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */ + +#if defined(__GNUC__) + topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size); +#elif defined(_MSC_VER) && _MSC_VER>=1300 + { + unsigned long bsrTopBit; + + _BitScanReverse(&bsrTopBit, size); + + topbit = bsrTopBit; + } +#else +#if 0 + union { + unsigned asInt[2]; + double asDouble; + }; + int n; + + asDouble = (double)size + 0.5; + topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023; +#else + { + unsigned int x=size; + x = x | (x >> 1); + x = x | (x >> 2); + x = x | (x >> 4); + x = x | (x >> 8); + x = x | (x >>16); + x = ~x; + x = x - ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0F0F0F0F; + x = x + (x << 8); + x = x + (x << 16); + topbit=31 - (x >> 24); + } +#endif +#endif + return topbit; +} + + +#ifdef FULLSANITYCHECKS +static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC +{ + assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1])); + if(ptr[0] && ptr[1]) + { + assert(nedblksize(ptr[0])>=sizeof(threadcacheblk)); + assert(nedblksize(ptr[1])>=sizeof(threadcacheblk)); + assert(*(unsigned int *) "NEDN"==ptr[0]->magic); + assert(*(unsigned int *) "NEDN"==ptr[1]->magic); + assert(!ptr[0]->prev); + assert(!ptr[1]->next); + if(ptr[0]==ptr[1]) + { + assert(!ptr[0]->next); + assert(!ptr[1]->prev); + } + } +} +static void tcfullsanitycheck(threadcache *tc) THROWSPEC +{ + threadcacheblk **tcbptr=tc->bins; + int n; + for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) + { + threadcacheblk *b, *ob=0; + tcsanitycheck(tcbptr); + for(b=tcbptr[0]; b; ob=b, b=b->next) + { + assert(*(unsigned int *) "NEDN"==b->magic); + assert(!ob || ob->next==b); + assert(!ob || b->prev==ob); + } + } +} +#endif + +static NOINLINE void RemoveCacheEntries(nedpool *RESTRICT p, threadcache *RESTRICT tc, unsigned int age) THROWSPEC +{ +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif + if(tc->freeInCache) + { + threadcacheblk **tcbptr=tc->bins; + int n; + for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) + { + threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */ + /*tcsanitycheck(tcbptr);*/ + for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; ) + { + threadcacheblk *f=*tcb; + size_t blksize=f->size; /*nedblksize(f);*/ + assert(blksize<=nedblksize(0, f)); + assert(blksize); +#ifdef FULLSANITYCHECKS + assert(*(unsigned int *) "NEDN"==(*tcb)->magic); +#endif + *tcb=(*tcb)->prev; + if(*tcb) + (*tcb)->next=0; + else + *tcbptr=0; + tc->freeInCache-=blksize; + assert((long) tc->freeInCache>=0); + CallFree(0, f, 0); + /*tcsanitycheck(tcbptr);*/ + } + } + } +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif +} +static void DestroyCaches(nedpool *RESTRICT p) THROWSPEC +{ + if(p->caches) + { + threadcache *tc; + int n; + for(n=0; n<THREADCACHEMAXCACHES; n++) + { + if((tc=p->caches[n])) + { + tc->frees++; + RemoveCacheEntries(p, tc, 0); + assert(!tc->freeInCache); + tc->mymspace=-1; + tc->threadid=0; + CallFree(0, tc, 0); + p->caches[n]=0; + } + } + } +} + +static NOINLINE threadcache *AllocCache(nedpool *RESTRICT p) THROWSPEC +{ + threadcache *tc=0; + int n, end; + ACQUIRE_LOCK(&p->mutex); + for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++); + if(THREADCACHEMAXCACHES==n) + { /* List exhausted, so disable for this thread */ + RELEASE_LOCK(&p->mutex); + return 0; + } + tc=p->caches[n]=(threadcache *) CallCalloc(p->m[0], sizeof(threadcache), 0); + if(!tc) + { + RELEASE_LOCK(&p->mutex); + return 0; + } +#ifdef FULLSANITYCHECKS + tc->magic1=*(unsigned int *)"NEDMALC1"; + tc->magic2=*(unsigned int *)"NEDMALC2"; +#endif + tc->threadid=(long)(size_t)CURRENT_THREAD; + for(end=0; p->m[end]; end++); + tc->mymspace=abs(tc->threadid) % end; + RELEASE_LOCK(&p->mutex); + if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort(); + return tc; +} + +static void *threadcache_malloc(nedpool *RESTRICT p, threadcache *RESTRICT tc, size_t *RESTRICT _size) THROWSPEC +{ + void *RESTRICT ret=0; + size_t size=*_size, blksize=0; + unsigned int bestsize; + unsigned int idx=size2binidx(size); + threadcacheblk *RESTRICT blk, **RESTRICT binsptr; +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif + /* Calculate best fit bin size */ + bestsize=1<<(idx+4); +#if 0 + /* Finer grained bin fit */ + idx<<=1; + if(size>bestsize) + { + idx++; + bestsize+=bestsize>>1; + } + if(size>bestsize) + { + idx++; + bestsize=1<<(4+(idx>>1)); + } +#else + if(size>bestsize) + { + idx++; + bestsize<<=1; + } +#endif + assert(bestsize>=size); + if(size<bestsize) size=bestsize; + assert(size<=THREADCACHEMAX); + assert(idx<=THREADCACHEMAXBINS); + binsptr=&tc->bins[idx*2]; + /* Try to match close, but move up a bin if necessary */ + blk=*binsptr; + if(!blk || blk->size<size) + { /* Bump it up a bin */ + if(idx<THREADCACHEMAXBINS) + { + idx++; + binsptr+=2; + blk=*binsptr; + } + } + if(blk) + { + blksize=blk->size; /*nedblksize(blk);*/ + assert(nedblksize(0, blk)>=blksize); + assert(blksize>=size); + if(blk->next) + blk->next->prev=0; + *binsptr=blk->next; + if(!*binsptr) + binsptr[1]=0; +#ifdef FULLSANITYCHECKS + blk->magic=0; +#endif + assert(binsptr[0]!=blk && binsptr[1]!=blk); + assert(nedblksize(0, blk)>=sizeof(threadcacheblk) && nedblksize(0, blk)<=THREADCACHEMAX+CHUNK_OVERHEAD); + /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) _size);*/ + ret=(void *) blk; + } + ++tc->mallocs; + if(ret) + { + assert(blksize>=size); + ++tc->successes; + tc->freeInCache-=blksize; + assert((long) tc->freeInCache>=0); + } +#if defined(DEBUG) && 0 + if(!(tc->mallocs & 0xfff)) + { + printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs, + (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache); + } +#endif +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif + *_size=size; + return ret; +} +static NOINLINE void ReleaseFreeInCache(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace) THROWSPEC +{ + unsigned int age=THREADCACHEMAXFREESPACE/8192; + /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/ + while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE) + { + RemoveCacheEntries(p, tc, age); + /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/ + age>>=1; + } + /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/ +} +static void threadcache_free(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, void *RESTRICT mem, size_t size) THROWSPEC +{ + unsigned int bestsize; + unsigned int idx=size2binidx(size); + threadcacheblk **RESTRICT binsptr, *RESTRICT tck=(threadcacheblk *) mem; + assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD); +#ifdef DEBUG + /* Make sure this is a valid memory block */ + assert(nedblksize(0, mem)); +#endif +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif + /* Calculate best fit bin size */ + bestsize=1<<(idx+4); +#if 0 + /* Finer grained bin fit */ + idx<<=1; + if(size>bestsize) + { + unsigned int biggerbestsize=bestsize+bestsize<<1; + if(size>=biggerbestsize) + { + idx++; + bestsize=biggerbestsize; + } + } +#endif + if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */ + size=bestsize; + binsptr=&tc->bins[idx*2]; + assert(idx<=THREADCACHEMAXBINS); + if(tck==*binsptr) + { + fprintf(stderr, "nedmalloc: Attempt to free already freed memory block %p - aborting!\n", tck); + abort(); + } +#ifdef FULLSANITYCHECKS + tck->magic=*(unsigned int *) "NEDN"; +#endif + tck->lastUsed=++tc->frees; + tck->size=(unsigned int) size; + tck->next=*binsptr; + tck->prev=0; + if(tck->next) + tck->next->prev=tck; + else + binsptr[1]=tck; + assert(!*binsptr || (*binsptr)->size==tck->size); + *binsptr=tck; + assert(tck==tc->bins[idx*2]); + assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck); + /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/ + tc->freeInCache+=size; +#ifdef FULLSANITYCHECKS + tcfullsanitycheck(tc); +#endif +#if 1 + if(tc->freeInCache>=THREADCACHEMAXFREESPACE) + ReleaseFreeInCache(p, tc, mymspace); +#endif +} + + + + +static NOINLINE int InitPool(nedpool *RESTRICT p, size_t capacity, int threads) THROWSPEC +{ /* threads is -1 for system pool */ + ensure_initialization(); + ACQUIRE_MALLOC_GLOBAL_LOCK(); + if(p->threads) goto done; + if(INITIAL_LOCK(&p->mutex)) goto err; + if(TLSALLOC(&p->mycache)) goto err; +#if USE_ALLOCATOR==0 + p->m[0]=(mstate) mspacecounter++; +#elif USE_ALLOCATOR==1 + if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err; + p->m[0]->extp=p; +#endif + p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads; +done: + RELEASE_MALLOC_GLOBAL_LOCK(); + return 1; +err: + if(threads<0) + abort(); /* If you can't allocate for system pool, we're screwed */ + DestroyCaches(p); + if(p->m[0]) + { +#if USE_ALLOCATOR==1 + destroy_mspace(p->m[0]); +#endif + p->m[0]=0; + } + if(p->mycache) + { + if(TLSFREE(p->mycache)) abort(); + p->mycache=0; + } + RELEASE_MALLOC_GLOBAL_LOCK(); + return 0; +} +static NOINLINE mstate FindMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int *RESTRICT lastUsed, size_t size) THROWSPEC +{ /* Gets called when thread's last used mspace is in use. The strategy + is to run through the list of all available mspaces looking for an + unlocked one and if we fail, we create a new one so long as we don't + exceed p->threads */ + int n, end; + for(n=end=*lastUsed+1; p->m[n]; end=++n) + { + if(TRY_LOCK(&p->m[n]->mutex)) goto found; + } + for(n=0; n<*lastUsed && p->m[n]; n++) + { + if(TRY_LOCK(&p->m[n]->mutex)) goto found; + } + if(end<p->threads) + { + mstate temp; +#if USE_ALLOCATOR==0 + temp=(mstate) mspacecounter++; +#elif USE_ALLOCATOR==1 + if(!(temp=(mstate) create_mspace(size, 1))) + goto badexit; +#endif + /* Now we're ready to modify the lists, we lock */ + ACQUIRE_LOCK(&p->mutex); + while(p->m[end] && end<p->threads) + end++; + if(end>=p->threads) + { /* Drat, must destroy it now */ + RELEASE_LOCK(&p->mutex); +#if USE_ALLOCATOR==1 + destroy_mspace((mstate) temp); +#endif + goto badexit; + } + /* We really want to make sure this goes into memory now but we + have to be careful of breaking aliasing rules, so write it twice */ + *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp; + ACQUIRE_LOCK(&p->m[end]->mutex); + /*printf("Created mspace idx %d\n", end);*/ + RELEASE_LOCK(&p->mutex); + n=end; + goto found; + } + /* Let it lock on the last one it used */ +badexit: + ACQUIRE_LOCK(&p->m[*lastUsed]->mutex); + return p->m[*lastUsed]; +found: + *lastUsed=n; + if(tc) + tc->mymspace=n; + else + { + if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort(); + } + return p->m[n]; +} + +typedef struct PoolList_t +{ + size_t size; /* Size of list */ + size_t length; /* Actual entries in list */ +#ifdef DEBUG + nedpool *list[1]; /* Force testing of list expansion */ +#else + nedpool *list[16]; +#endif +} PoolList; +static MLOCK_T poollistlock; +static PoolList *poollist; +NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC +{ + nedpool *ret=0; + if(!poollist) + { + PoolList *newpoollist=0; + if(!(newpoollist=(PoolList *) nedpcalloc(0, 1, sizeof(PoolList)+sizeof(nedpool *)))) return 0; + INITIAL_LOCK(&poollistlock); + ACQUIRE_LOCK(&poollistlock); + poollist=newpoollist; + poollist->size=sizeof(poollist->list)/sizeof(nedpool *); + } + else + ACQUIRE_LOCK(&poollistlock); + if(poollist->length==poollist->size) + { + PoolList *newpoollist=0; + size_t newsize=0; + newsize=sizeof(PoolList)+(poollist->size+1)*sizeof(nedpool *); + if(!(newpoollist=(PoolList *) nedprealloc(0, poollist, newsize))) goto badexit; + poollist=newpoollist; + memset(&poollist->list[poollist->size], 0, newsize-((size_t)&poollist->list[poollist->size]-(size_t)&poollist->list[0])); + poollist->size=((newsize-((char *)&poollist->list[0]-(char *)poollist))/sizeof(nedpool *))-1; + assert(poollist->size>poollist->length); + } + if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) goto badexit; + if(!InitPool(ret, capacity, threads)) + { + nedpfree(0, ret); + goto badexit; + } + poollist->list[poollist->length++]=ret; +badexit: + RELEASE_LOCK(&poollistlock); + return ret; +} +void neddestroypool(nedpool *p) THROWSPEC +{ + unsigned int n; + ACQUIRE_LOCK(&p->mutex); + DestroyCaches(p); + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + destroy_mspace(p->m[n]); +#endif + p->m[n]=0; + } + RELEASE_LOCK(&p->mutex); + if(TLSFREE(p->mycache)) abort(); + nedpfree(0, p); + ACQUIRE_LOCK(&poollistlock); + assert(poollist); + for(n=0; n<poollist->length && poollist->list[n]!=p; n++); + assert(n!=poollist->length); + memmove(&poollist->list[n], &poollist->list[n+1], (size_t)&poollist->list[poollist->length]-(size_t)&poollist->list[n]); + if(!--poollist->length) + { + assert(!poollist->list[0]); + nedpfree(0, poollist); + poollist=0; + } + RELEASE_LOCK(&poollistlock); +} +void neddestroysyspool() THROWSPEC +{ + nedpool *p=&syspool; + int n; + ACQUIRE_LOCK(&p->mutex); + DestroyCaches(p); + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + destroy_mspace(p->m[n]); +#endif + p->m[n]=0; + } + /* Render syspool unusable */ + for(n=0; n<THREADCACHEMAXCACHES; n++) + p->caches[n]=(threadcache *)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL); + for(n=0; n<MAXTHREADSINPOOL+1; n++) + p->m[n]=(mstate)(size_t)(sizeof(size_t)>4 ? 0xdeadbeefdeadbeefULL : 0xdeadbeefUL); + if(TLSFREE(p->mycache)) abort(); + RELEASE_LOCK(&p->mutex); +} +nedpool **nedpoollist() THROWSPEC +{ + nedpool **ret=0; + if(poollist) + { + ACQUIRE_LOCK(&poollistlock); + if(!(ret=(nedpool **) nedmalloc((poollist->length+1)*sizeof(nedpool *)))) goto badexit; + memcpy(ret, poollist->list, (poollist->length+1)*sizeof(nedpool *)); +badexit: + RELEASE_LOCK(&poollistlock); + } + return ret; +} + +void nedpsetvalue(nedpool *p, void *v) THROWSPEC +{ + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + p->uservalue=v; +} +void *nedgetvalue(nedpool **p, void *mem) THROWSPEC +{ + nedpool *np=0; + mstate fm=nedblkmstate(mem); + if(!fm || !fm->extp) return 0; + np=(nedpool *) fm->extp; + if(p) *p=np; + return np->uservalue; +} + +void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC +{ + int mycache; + if(!p) + { + p=&syspool; + if(!syspool.threads) InitPool(&syspool, 0, -1); + } + mycache=(int)(size_t) TLSGET(p->mycache); + if(!mycache) + { /* Set to mspace 0 */ + if(disable && TLSSET(p->mycache, (void *)(size_t)-1)) abort(); + } + else if(mycache>0) + { /* Set to last used mspace */ + threadcache *tc=p->caches[mycache-1]; +#if defined(DEBUG) + printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n", + 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs); +#endif + if(disable && TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort(); + tc->frees++; + RemoveCacheEntries(p, tc, 0); + assert(!tc->freeInCache); + if(disable) + { + tc->mymspace=-1; + tc->threadid=0; + CallFree(0, p->caches[mycache-1], 0); + p->caches[mycache-1]=0; + } + } +} +void neddisablethreadcache(nedpool *p) THROWSPEC +{ + nedtrimthreadcache(p, 1); +} + +#define GETMSPACE(m,p,tc,ms,s,action) \ + do \ + { \ + mstate m = GetMSpace((p),(tc),(ms),(s)); \ + action; \ + if(USE_ALLOCATOR==1) { RELEASE_LOCK(&m->mutex); } \ + } while (0) + +static FORCEINLINE mstate GetMSpace(nedpool *RESTRICT p, threadcache *RESTRICT tc, int mymspace, size_t size) THROWSPEC +{ /* Returns a locked and ready for use mspace */ + mstate m=p->m[mymspace]; + assert(m); +#if USE_ALLOCATOR==1 + if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size); + /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/ +#endif + return m; +} +static NOINLINE void GetThreadCache_cold1(nedpool *RESTRICT *RESTRICT p) THROWSPEC +{ + *p=&syspool; + if(!syspool.threads) InitPool(&syspool, 0, -1); +} +static NOINLINE void GetThreadCache_cold2(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, int mycache) THROWSPEC +{ + if(!mycache) + { /* Need to allocate a new cache */ + *tc=AllocCache(*p); + if(!*tc) + { /* Disable */ + if(TLSSET((*p)->mycache, (void *)(size_t)-1)) abort(); + *mymspace=0; + } + else + *mymspace=(*tc)->mymspace; + } + else + { /* Cache disabled, but we do have an assigned thread pool */ + *tc=0; + *mymspace=-mycache-1; + } +} +static FORCEINLINE void GetThreadCache(nedpool *RESTRICT *RESTRICT p, threadcache *RESTRICT *RESTRICT tc, int *RESTRICT mymspace, size_t *RESTRICT size) THROWSPEC +{ + int mycache; + if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk); + if(!*p) + GetThreadCache_cold1(p); + mycache=(int)(size_t) TLSGET((*p)->mycache); + if(mycache>0) + { /* Already have a cache */ + *tc=(*p)->caches[mycache-1]; + *mymspace=(*tc)->mymspace; + } + else GetThreadCache_cold2(p, tc, mymspace, mycache); + assert(*mymspace>=0); + assert(!(*tc) || (long)(size_t)CURRENT_THREAD==(*tc)->threadid); +#ifdef FULLSANITYCHECKS + if(*tc) + { + if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2) + { + abort(); + } + } +#endif +} + +NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC +{ + void *ret=0; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &size); +#if THREADCACHEMAX + if(tc && size<=THREADCACHEMAX) + { /* Use the thread cache */ + ret=threadcache_malloc(p, tc, &size); + } +#endif + if(!ret) + { /* Use this thread's mspace */ + GETMSPACE(m, p, tc, mymspace, size, + ret=CallMalloc(m, size, 0)); + } + return ret; +} +NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC +{ + size_t rsize=size*no; + void *ret=0; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &rsize); +#if THREADCACHEMAX + if(tc && rsize<=THREADCACHEMAX) + { /* Use the thread cache */ + if((ret=threadcache_malloc(p, tc, &rsize))) + memset(ret, 0, rsize); + } +#endif + if(!ret) + { /* Use this thread's mspace */ + GETMSPACE(m, p, tc, mymspace, rsize, + ret=CallCalloc(m, rsize, 0)); + } + return ret; +} +NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC +{ + void *ret=0; + threadcache *tc; + int mymspace, isforeign=1; + size_t memsize; + if(!mem) return nedpmalloc(p, size); + memsize=nedblksize(&isforeign, mem); + assert(memsize); + if(!memsize) + { + fprintf(stderr, "nedmalloc: nedprealloc() called with a block not created by nedmalloc!\n"); + abort(); + } + else if(size<=memsize && memsize-size< +#ifdef DEBUG + 32 +#else + 1024 +#endif + ) /* If realloc size is within 1Kb smaller than existing, noop it */ + return mem; + GetThreadCache(&p, &tc, &mymspace, &size); +#if THREADCACHEMAX + if(tc && size && size<=THREADCACHEMAX) + { /* Use the thread cache */ + if((ret=threadcache_malloc(p, tc, &size))) + { + memcpy(ret, mem, memsize<size ? memsize : size); + if(memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD)) + threadcache_free(p, tc, mymspace, mem, memsize); + else + CallFree(0, mem, isforeign); + } + } +#endif + if(!ret) + { /* Reallocs always happen in the mspace they happened in, so skip + locking the preferred mspace for this thread */ + ret=CallRealloc(p->m[mymspace], mem, isforeign, memsize, size); + } + return ret; +} +void nedpfree(nedpool *p, void *mem) THROWSPEC +{ /* Frees always happen in the mspace they happened in, so skip + locking the preferred mspace for this thread */ + threadcache *tc; + int mymspace, isforeign=1; + size_t memsize; + if(!mem) + { /* If you tried this on FreeBSD you'd be sorry! */ +#ifdef DEBUG + fprintf(stderr, "nedmalloc: WARNING nedpfree() called with zero. This is not portable behaviour!\n"); +#endif + return; + } + memsize=nedblksize(&isforeign, mem); + assert(memsize); + if(!memsize) + { + fprintf(stderr, "nedmalloc: nedpfree() called with a block not created by nedmalloc!\n"); + abort(); + } + GetThreadCache(&p, &tc, &mymspace, 0); +#if THREADCACHEMAX + if(mem && tc && memsize>=sizeof(threadcacheblk) && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD)) + threadcache_free(p, tc, mymspace, mem, memsize); + else +#endif + CallFree(0, mem, isforeign); +} +NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC +{ + void *ret; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &bytes); + { /* Use this thread's mspace */ + GETMSPACE(m, p, tc, mymspace, bytes, + ret=CallMalloc(m, bytes, alignment)); + } + return ret; +} +struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC +{ + int n; + struct nedmallinfo ret={0}; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 && !NO_MALLINFO + struct mallinfo t=mspace_mallinfo(p->m[n]); + ret.arena+=t.arena; + ret.ordblks+=t.ordblks; + ret.hblkhd+=t.hblkhd; + ret.usmblks+=t.usmblks; + ret.uordblks+=t.uordblks; + ret.fordblks+=t.fordblks; + ret.keepcost+=t.keepcost; +#endif + } + return ret; +} +int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC +{ +#if USE_ALLOCATOR==1 + return mspace_mallopt(parno, value); +#else + return 0; +#endif +} +NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC +{ +#if USE_ALLOCATOR==1 + if(granularity) *granularity=mparams.granularity; + if(magic) *magic=mparams.magic; + return (void *) &syspool; +#else + if(granularity) *granularity=0; + if(magic) *magic=0; + return 0; +#endif +} +int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC +{ + int n, ret=0; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + ret+=mspace_trim(p->m[n], pad); +#endif + } + return ret; +} +void nedpmalloc_stats(nedpool *p) THROWSPEC +{ + int n; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + mspace_malloc_stats(p->m[n]); +#endif + } +} +size_t nedpmalloc_footprint(nedpool *p) THROWSPEC +{ + size_t ret=0; + int n; + if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } + for(n=0; p->m[n]; n++) + { +#if USE_ALLOCATOR==1 + ret+=mspace_footprint(p->m[n]); +#endif + } + return ret; +} +NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC +{ + void **ret; + threadcache *tc; + int mymspace; + GetThreadCache(&p, &tc, &mymspace, &elemsize); +#if USE_ALLOCATOR==0 + GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, + ret=unsupported_operation("independent_calloc")); +#elif USE_ALLOCATOR==1 + GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, + ret=mspace_independent_calloc(m, elemsno, elemsize, chunks)); +#endif + return ret; +} +NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC +{ + void **ret; + threadcache *tc; + int mymspace; + size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t)); + if(!adjustedsizes) return 0; + for(i=0; i<elems; i++) + adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i]; + GetThreadCache(&p, &tc, &mymspace, 0); +#if USE_ALLOCATOR==0 + GETMSPACE(m, p, tc, mymspace, 0, + ret=unsupported_operation("independent_comalloc")); +#elif USE_ALLOCATOR==1 + GETMSPACE(m, p, tc, mymspace, 0, + ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks)); +#endif + return ret; +} + +#if defined(__cplusplus) +} +#endif + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +#endif diff --git a/drivers/nedmalloc/nedmalloc.h b/drivers/nedmalloc/nedmalloc.h index b9add1683a..7ec65849fc 100644 --- a/drivers/nedmalloc/nedmalloc.h +++ b/drivers/nedmalloc/nedmalloc.h @@ -1,302 +1,302 @@ -#ifdef NEDMALLOC_ENABLED
-
-/* nedalloc, an alternative malloc implementation for multiple threads without
-lock contention based on dlmalloc v2.8.3. (C) 2005-2009 Niall Douglas
-
-Boost Software License - Version 1.0 - August 17th, 2003
-
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-#ifndef NEDMALLOC_H
-#define NEDMALLOC_H
-
-#include "typedefs.h"
-#define MALLOC_ALIGNMENT DEFAULT_ALIGNMENT
-
-#ifdef PSP_ENABLED
-#define USE_LOCKS 0
-#define HAVE_MMAP 0
-#endif
-
-/* See malloc.c.h for what each function does.
-
-REPLACE_SYSTEM_ALLOCATOR on POSIX causes nedalloc's functions to be called
-malloc, free etc. instead of nedmalloc, nedfree etc. You may or may not want
-this. On Windows it causes nedmalloc to patch all loaded DLLs and binaries
-to replace usage of the system allocator.
-
-NO_NED_NAMESPACE prevents the functions from being defined in the nedalloc
-namespace when in C++ (uses the global namespace instead).
-
-NEDMALLOCEXTSPEC can be defined to be __declspec(dllexport) or
-__attribute__ ((visibility("default"))) or whatever you like. It defaults
-to extern unless NEDMALLOC_DLL_EXPORTS is set as it would be when building
-nedmalloc.dll.
-
-USE_LOCKS can be 2 if you want to define your own MLOCK_T, INITIAL_LOCK,
-ACQUIRE_LOCK, RELEASE_LOCK, TRY_LOCK, IS_LOCKED and NULL_LOCK_INITIALIZER.
-
-NEDMALLOC_DEBUG can be defined to cause DEBUG to be set differently for nedmalloc
-than for the rest of the build. Remember to set NDEBUG to disable all assertion
-checking too.
-
-USE_MAGIC_HEADERS causes nedalloc to allocate an extra three sizeof(size_t)
-to each block. nedpfree() and nedprealloc() can then automagically know when
-to free a system allocated block. Enabling this typically adds 20-50% to
-application memory usage.
-
-ENABLE_TOLERANT_NEDMALLOC is automatically turned on if REPLACE_SYSTEM_ALLOCATOR
-is set or the Windows DLL is being built. This causes nedmalloc to detect when a
-system allocator block is passed to it and to handle it appropriately. Note that
-without USE_MAGIC_HEADERS there is a very tiny chance that nedmalloc will segfault
-on non-Windows builds (it uses Win32 SEH to trap segfaults on Windows and there
-is no comparable system on POSIX).
-
-USE_ALLOCATOR can be one of these settings (it defaults to 1):
- 0: System allocator (nedmalloc now simply acts as a threadcache).
- WARNING: Intended for DEBUG USE ONLY - not all functions work correctly.
- 1: dlmalloc
-
-ENABLE_LARGE_PAGES enables support for requesting memory from the system in large
-(typically >=2Mb) pages if the host OS supports this. These occupy just a single
-TLB entry and can significantly improve performance in large working set applications.
-
-ENABLE_FAST_HEAP_DETECTION enables special logic to detect blocks allocated
-by the system heap. This avoids 1.5%-2% overhead when checking for non-nedmalloc
-blocks, but it assumes that the NT and glibc heaps function in a very specific
-fashion which may not hold true across OS upgrades.
-*/
-
-#include <stddef.h> /* for size_t */
-
-#ifndef NEDMALLOCEXTSPEC
- #ifdef NEDMALLOC_DLL_EXPORTS
- #ifdef WIN32
- #define NEDMALLOCEXTSPEC extern __declspec(dllexport)
- #elif defined(__GNUC__)
- #define NEDMALLOCEXTSPEC extern __attribute__ ((visibility("default")))
- #endif
- #ifndef ENABLE_TOLERANT_NEDMALLOC
- #define ENABLE_TOLERANT_NEDMALLOC 1
- #endif
- #else
- #define NEDMALLOCEXTSPEC extern
- #endif
-#endif
-
-#if __STDC_VERSION__ >= 199901L /* C99 or better */
- #define RESTRICT restrict
-#else
- #if defined(_MSC_VER) && _MSC_VER>=1400
- #define RESTRICT __restrict
- #endif
- #ifdef __GNUC__
- #define RESTRICT __restrict
- #endif
-#endif
-#ifndef RESTRICT
- #define RESTRICT
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER>=1400
- #define NEDMALLOCPTRATTR __declspec(restrict)
- #define NEDMALLOCNOALIASATTR __declspec(noalias)
-#endif
-#ifdef __GNUC__
- #define NEDMALLOCPTRATTR __attribute__ ((malloc))
-#endif
-#ifndef NEDMALLOCPTRATTR
- #define NEDMALLOCPTRATTR
-#endif
-#ifndef NEDMALLOCNOALIASATTR
- #define NEDMALLOCNOALIASATTR
-#endif
-
-#ifndef USE_MAGIC_HEADERS
- #define USE_MAGIC_HEADERS 0
-#endif
-
-#ifndef USE_ALLOCATOR
- #define USE_ALLOCATOR 1 /* dlmalloc */
-#endif
-
-#if !USE_ALLOCATOR && !USE_MAGIC_HEADERS
-#error If you are using the system allocator then you MUST use magic headers
-#endif
-
-#ifdef REPLACE_SYSTEM_ALLOCATOR
- #if USE_ALLOCATOR==0
- #error Cannot combine using the system allocator with replacing the system allocator
- #endif
- #ifndef ENABLE_TOLERANT_NEDMALLOC
- #define ENABLE_TOLERANT_NEDMALLOC 1
- #endif
- #ifndef WIN32 /* We have a dedicated patcher for Windows */
- #define nedmalloc malloc
- #define nedcalloc calloc
- #define nedrealloc realloc
- #define nedfree free
- #define nedmemalign memalign
- #define nedmallinfo mallinfo
- #define nedmallopt mallopt
- #define nedmalloc_trim malloc_trim
- #define nedmalloc_stats malloc_stats
- #define nedmalloc_footprint malloc_footprint
- #define nedindependent_calloc independent_calloc
- #define nedindependent_comalloc independent_comalloc
- #ifdef _MSC_VER
- #define nedblksize _msize
- #endif
- #endif
-#endif
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-struct nedmallinfo {
- size_t arena; /* non-mmapped space allocated from system */
- size_t ordblks; /* number of free chunks */
- size_t smblks; /* always 0 */
- size_t hblks; /* always 0 */
- size_t hblkhd; /* space in mmapped regions */
- size_t usmblks; /* maximum total allocated space */
- size_t fsmblks; /* always 0 */
- size_t uordblks; /* total allocated space */
- size_t fordblks; /* total free space */
- size_t keepcost; /* releasable (via malloc_trim) space */
-};
-#if defined(__cplusplus)
-}
-#endif
-
-#if defined(__cplusplus)
- #if !defined(NO_NED_NAMESPACE)
-namespace nedalloc {
- #else
-extern "C" {
- #endif
- #define THROWSPEC throw()
-#else
- #define THROWSPEC
-#endif
-
-/* These are the global functions */
-
-/* Gets the usable size of an allocated block. Note this will always be bigger than what was
-asked for due to rounding etc. Optionally returns 1 in isforeign if the block came from the
-system allocator - note that there is a small (>0.01%) but real chance of segfault on non-Windows
-systems when passing non-nedmalloc blocks if you don't use USE_MAGIC_HEADERS.
-*/
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC;
-
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC;
-
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC;
-NEDMALLOCEXTSPEC void nedmalloc_stats(void) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmalloc_footprint(void) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC;
-
-/* Destroys the system memory pool used by the functions above.
-Useful for when you have nedmalloc in a DLL you're about to unload.
-If you call ANY nedmalloc functions after calling this you will
-get a fatal exception!
-*/
-NEDMALLOCEXTSPEC void neddestroysyspool() THROWSPEC;
-
-/* These are the pool functions */
-struct nedpool_t;
-typedef struct nedpool_t nedpool;
-
-/* Creates a memory pool for use with the nedp* functions below.
-Capacity is how much to allocate immediately (if you know you'll be allocating a lot
-of memory very soon) which you can leave at zero. Threads specifies how many threads
-will *normally* be accessing the pool concurrently. Setting this to zero means it
-extends on demand, but be careful of this as it can rapidly consume system resources
-where bursts of concurrent threads use a pool at once.
-*/
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC;
-
-/* Destroys a memory pool previously created by nedcreatepool().
-*/
-NEDMALLOCEXTSPEC void neddestroypool(nedpool *p) THROWSPEC;
-
-/* Returns a zero terminated snapshot of threadpools existing at the time of call. Call
-nedfree() on the returned list when you are done. Returns zero if there is only the
-system pool in existence.
-*/
-NEDMALLOCEXTSPEC nedpool **nedpoollist() THROWSPEC;
-
-/* Sets a value to be associated with a pool. You can retrieve this value by passing
-any memory block allocated from that pool.
-*/
-NEDMALLOCEXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC;
-
-/* Gets a previously set value using nedpsetvalue() or zero if memory is unknown.
-Optionally can also retrieve pool. You can detect an unknown block by the return
-being zero and *p being unmodifed.
-*/
-NEDMALLOCEXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC;
-
-/* Trims the thread cache for the calling thread, returning any existing cache
-data to the central pool. Remember to ALWAYS call with zero if you used the
-system pool. Setting disable to non-zero replicates neddisablethreadcache().
-*/
-NEDMALLOCEXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC;
-
-/* Disables the thread cache for the calling thread, returning any existing cache
-data to the central pool. Remember to ALWAYS call with zero if you used the
-system pool.
-*/
-NEDMALLOCEXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC;
-
-
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC;
-NEDMALLOCEXTSPEC void nedpfree(nedpool *p, void *mem) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC;
-NEDMALLOCEXTSPEC struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC;
-NEDMALLOCEXTSPEC int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC;
-NEDMALLOCEXTSPEC int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC;
-NEDMALLOCEXTSPEC void nedpmalloc_stats(nedpool *p) THROWSPEC;
-NEDMALLOCEXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC;
-NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC;
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif
-
-#endif
+#ifdef NEDMALLOC_ENABLED + +/* nedalloc, an alternative malloc implementation for multiple threads without +lock contention based on dlmalloc v2.8.3. (C) 2005-2009 Niall Douglas + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + +#ifndef NEDMALLOC_H +#define NEDMALLOC_H + +#include "typedefs.h" +#define MALLOC_ALIGNMENT DEFAULT_ALIGNMENT + +#ifdef PSP_ENABLED +#define USE_LOCKS 0 +#define HAVE_MMAP 0 +#endif + +/* See malloc.c.h for what each function does. + +REPLACE_SYSTEM_ALLOCATOR on POSIX causes nedalloc's functions to be called +malloc, free etc. instead of nedmalloc, nedfree etc. You may or may not want +this. On Windows it causes nedmalloc to patch all loaded DLLs and binaries +to replace usage of the system allocator. + +NO_NED_NAMESPACE prevents the functions from being defined in the nedalloc +namespace when in C++ (uses the global namespace instead). + +NEDMALLOCEXTSPEC can be defined to be __declspec(dllexport) or +__attribute__ ((visibility("default"))) or whatever you like. It defaults +to extern unless NEDMALLOC_DLL_EXPORTS is set as it would be when building +nedmalloc.dll. + +USE_LOCKS can be 2 if you want to define your own MLOCK_T, INITIAL_LOCK, +ACQUIRE_LOCK, RELEASE_LOCK, TRY_LOCK, IS_LOCKED and NULL_LOCK_INITIALIZER. + +NEDMALLOC_DEBUG can be defined to cause DEBUG to be set differently for nedmalloc +than for the rest of the build. Remember to set NDEBUG to disable all assertion +checking too. + +USE_MAGIC_HEADERS causes nedalloc to allocate an extra three sizeof(size_t) +to each block. nedpfree() and nedprealloc() can then automagically know when +to free a system allocated block. Enabling this typically adds 20-50% to +application memory usage. + +ENABLE_TOLERANT_NEDMALLOC is automatically turned on if REPLACE_SYSTEM_ALLOCATOR +is set or the Windows DLL is being built. This causes nedmalloc to detect when a +system allocator block is passed to it and to handle it appropriately. Note that +without USE_MAGIC_HEADERS there is a very tiny chance that nedmalloc will segfault +on non-Windows builds (it uses Win32 SEH to trap segfaults on Windows and there +is no comparable system on POSIX). + +USE_ALLOCATOR can be one of these settings (it defaults to 1): + 0: System allocator (nedmalloc now simply acts as a threadcache). + WARNING: Intended for DEBUG USE ONLY - not all functions work correctly. + 1: dlmalloc + +ENABLE_LARGE_PAGES enables support for requesting memory from the system in large +(typically >=2Mb) pages if the host OS supports this. These occupy just a single +TLB entry and can significantly improve performance in large working set applications. + +ENABLE_FAST_HEAP_DETECTION enables special logic to detect blocks allocated +by the system heap. This avoids 1.5%-2% overhead when checking for non-nedmalloc +blocks, but it assumes that the NT and glibc heaps function in a very specific +fashion which may not hold true across OS upgrades. +*/ + +#include <stddef.h> /* for size_t */ + +#ifndef NEDMALLOCEXTSPEC + #ifdef NEDMALLOC_DLL_EXPORTS + #ifdef WIN32 + #define NEDMALLOCEXTSPEC extern __declspec(dllexport) + #elif defined(__GNUC__) + #define NEDMALLOCEXTSPEC extern __attribute__ ((visibility("default"))) + #endif + #ifndef ENABLE_TOLERANT_NEDMALLOC + #define ENABLE_TOLERANT_NEDMALLOC 1 + #endif + #else + #define NEDMALLOCEXTSPEC extern + #endif +#endif + +#if __STDC_VERSION__ >= 199901L /* C99 or better */ + #define RESTRICT restrict +#else + #if defined(_MSC_VER) && _MSC_VER>=1400 + #define RESTRICT __restrict + #endif + #ifdef __GNUC__ + #define RESTRICT __restrict + #endif +#endif +#ifndef RESTRICT + #define RESTRICT +#endif + +#if defined(_MSC_VER) && _MSC_VER>=1400 + #define NEDMALLOCPTRATTR __declspec(restrict) + #define NEDMALLOCNOALIASATTR __declspec(noalias) +#endif +#ifdef __GNUC__ + #define NEDMALLOCPTRATTR __attribute__ ((malloc)) +#endif +#ifndef NEDMALLOCPTRATTR + #define NEDMALLOCPTRATTR +#endif +#ifndef NEDMALLOCNOALIASATTR + #define NEDMALLOCNOALIASATTR +#endif + +#ifndef USE_MAGIC_HEADERS + #define USE_MAGIC_HEADERS 0 +#endif + +#ifndef USE_ALLOCATOR + #define USE_ALLOCATOR 1 /* dlmalloc */ +#endif + +#if !USE_ALLOCATOR && !USE_MAGIC_HEADERS +#error If you are using the system allocator then you MUST use magic headers +#endif + +#ifdef REPLACE_SYSTEM_ALLOCATOR + #if USE_ALLOCATOR==0 + #error Cannot combine using the system allocator with replacing the system allocator + #endif + #ifndef ENABLE_TOLERANT_NEDMALLOC + #define ENABLE_TOLERANT_NEDMALLOC 1 + #endif + #ifndef WIN32 /* We have a dedicated patcher for Windows */ + #define nedmalloc malloc + #define nedcalloc calloc + #define nedrealloc realloc + #define nedfree free + #define nedmemalign memalign + #define nedmallinfo mallinfo + #define nedmallopt mallopt + #define nedmalloc_trim malloc_trim + #define nedmalloc_stats malloc_stats + #define nedmalloc_footprint malloc_footprint + #define nedindependent_calloc independent_calloc + #define nedindependent_comalloc independent_comalloc + #ifdef _MSC_VER + #define nedblksize _msize + #endif + #endif +#endif + +#if defined(__cplusplus) +extern "C" { +#endif +struct nedmallinfo { + size_t arena; /* non-mmapped space allocated from system */ + size_t ordblks; /* number of free chunks */ + size_t smblks; /* always 0 */ + size_t hblks; /* always 0 */ + size_t hblkhd; /* space in mmapped regions */ + size_t usmblks; /* maximum total allocated space */ + size_t fsmblks; /* always 0 */ + size_t uordblks; /* total allocated space */ + size_t fordblks; /* total free space */ + size_t keepcost; /* releasable (via malloc_trim) space */ +}; +#if defined(__cplusplus) +} +#endif + +#if defined(__cplusplus) + #if !defined(NO_NED_NAMESPACE) +namespace nedalloc { + #else +extern "C" { + #endif + #define THROWSPEC throw() +#else + #define THROWSPEC +#endif + +/* These are the global functions */ + +/* Gets the usable size of an allocated block. Note this will always be bigger than what was +asked for due to rounding etc. Optionally returns 1 in isforeign if the block came from the +system allocator - note that there is a small (>0.01%) but real chance of segfault on non-Windows +systems when passing non-nedmalloc blocks if you don't use USE_MAGIC_HEADERS. +*/ +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedblksize(int *RESTRICT isforeign, void *RESTRICT mem) THROWSPEC; + +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedsetvalue(void *v) THROWSPEC; + +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmalloc(size_t size) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedcalloc(size_t no, size_t size) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedrealloc(void *mem, size_t size) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void nedfree(void *mem) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR struct nedmallinfo nedmallinfo(void) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmallopt(int parno, int value) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR void* nedmalloc_internals(size_t *granularity, size_t *magic) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR int nedmalloc_trim(size_t pad) THROWSPEC; +NEDMALLOCEXTSPEC void nedmalloc_stats(void) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR size_t nedmalloc_footprint(void) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCNOALIASATTR NEDMALLOCPTRATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC; + +/* Destroys the system memory pool used by the functions above. +Useful for when you have nedmalloc in a DLL you're about to unload. +If you call ANY nedmalloc functions after calling this you will +get a fatal exception! +*/ +NEDMALLOCEXTSPEC void neddestroysyspool() THROWSPEC; + +/* These are the pool functions */ +struct nedpool_t; +typedef struct nedpool_t nedpool; + +/* Creates a memory pool for use with the nedp* functions below. +Capacity is how much to allocate immediately (if you know you'll be allocating a lot +of memory very soon) which you can leave at zero. Threads specifies how many threads +will *normally* be accessing the pool concurrently. Setting this to zero means it +extends on demand, but be careful of this as it can rapidly consume system resources +where bursts of concurrent threads use a pool at once. +*/ +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC; + +/* Destroys a memory pool previously created by nedcreatepool(). +*/ +NEDMALLOCEXTSPEC void neddestroypool(nedpool *p) THROWSPEC; + +/* Returns a zero terminated snapshot of threadpools existing at the time of call. Call +nedfree() on the returned list when you are done. Returns zero if there is only the +system pool in existence. +*/ +NEDMALLOCEXTSPEC nedpool **nedpoollist() THROWSPEC; + +/* Sets a value to be associated with a pool. You can retrieve this value by passing +any memory block allocated from that pool. +*/ +NEDMALLOCEXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC; + +/* Gets a previously set value using nedpsetvalue() or zero if memory is unknown. +Optionally can also retrieve pool. You can detect an unknown block by the return +being zero and *p being unmodifed. +*/ +NEDMALLOCEXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC; + +/* Trims the thread cache for the calling thread, returning any existing cache +data to the central pool. Remember to ALWAYS call with zero if you used the +system pool. Setting disable to non-zero replicates neddisablethreadcache(). +*/ +NEDMALLOCEXTSPEC void nedtrimthreadcache(nedpool *p, int disable) THROWSPEC; + +/* Disables the thread cache for the calling thread, returning any existing cache +data to the central pool. Remember to ALWAYS call with zero if you used the +system pool. +*/ +NEDMALLOCEXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC; + + +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC; +NEDMALLOCEXTSPEC void nedpfree(nedpool *p, void *mem) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC; +NEDMALLOCEXTSPEC struct nedmallinfo nedpmallinfo(nedpool *p) THROWSPEC; +NEDMALLOCEXTSPEC int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC; +NEDMALLOCEXTSPEC int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC; +NEDMALLOCEXTSPEC void nedpmalloc_stats(nedpool *p) THROWSPEC; +NEDMALLOCEXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; +NEDMALLOCEXTSPEC NEDMALLOCPTRATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC; + +#if defined(__cplusplus) +} +#endif + +#endif + +#endif diff --git a/drivers/openssl/register_openssl.cpp b/drivers/openssl/register_openssl.cpp index a4a60813b6..ed2150bef5 100644 --- a/drivers/openssl/register_openssl.cpp +++ b/drivers/openssl/register_openssl.cpp @@ -1,19 +1,19 @@ -#include "register_openssl.h"
-
-#include "stream_peer_openssl.h"
-#ifdef OPENSSL_ENABLED
-
-void register_openssl() {
-
- ObjectTypeDB::register_type<StreamPeerOpenSSL>();
- StreamPeerOpenSSL::initialize_ssl();
-
-}
-
-void unregister_openssl() {
-
- StreamPeerOpenSSL::finalize_ssl();
-
-}
-#endif
-
+#include "register_openssl.h" + +#include "stream_peer_openssl.h" +#ifdef OPENSSL_ENABLED + +void register_openssl() { + + ObjectTypeDB::register_type<StreamPeerOpenSSL>(); + StreamPeerOpenSSL::initialize_ssl(); + +} + +void unregister_openssl() { + + StreamPeerOpenSSL::finalize_ssl(); + +} +#endif + diff --git a/drivers/openssl/register_openssl.h b/drivers/openssl/register_openssl.h index e1c554ca4a..e547a2b750 100644 --- a/drivers/openssl/register_openssl.h +++ b/drivers/openssl/register_openssl.h @@ -1,11 +1,11 @@ -#ifndef REGISTER_OPENSSL_H
-#define REGISTER_OPENSSL_H
-
-#ifdef OPENSSL_ENABLED
-
-void register_openssl();
-void unregister_openssl();
-
-#endif
-
-#endif // REGISTER_OPENSSL_H
+#ifndef REGISTER_OPENSSL_H +#define REGISTER_OPENSSL_H + +#ifdef OPENSSL_ENABLED + +void register_openssl(); +void unregister_openssl(); + +#endif + +#endif // REGISTER_OPENSSL_H diff --git a/drivers/opus/SCsub b/drivers/opus/SCsub new file mode 100644 index 0000000000..a2bebf62b3 --- /dev/null +++ b/drivers/opus/SCsub @@ -0,0 +1,200 @@ +Import('env') + +opus_sources = [ + "opus/audio_stream_opus.cpp", +] + +opus_sources_silk=[] + +opus_sources_lib = [ + "opus/celt/bands.c", + "opus/celt/celt_lpc.c", + "opus/celt/entenc.c", + "opus/celt/mdct.c", + "opus/celt/quant_bands.c", + "opus/celt/celt.c", + "opus/celt/cwrs.c", + "opus/celt/kiss_fft.c", + "opus/celt/modes.c", + "opus/celt/rate.c", + "opus/celt/celt_decoder.c", + "opus/celt/entcode.c", + "opus/celt/laplace.c", + #opus/celt/opus_custom_demo.c", + "opus/celt/vq.c", + "opus/celt/celt_encoder.c", + "opus/celt/entdec.c", + "opus/celt/mathops.c", + "opus/celt/pitch.c", + "opus/silk/A2NLSF.c", + "opus/silk/decoder_set_fs.c", + "opus/silk/NLSF_stabilize.c", + "opus/silk/sigm_Q15.c", + "opus/silk/ana_filt_bank_1.c", + "opus/silk/enc_API.c", + "opus/silk/NLSF_unpack.c", + "opus/silk/sort.c", + "opus/silk/biquad_alt.c", + "opus/silk/encode_indices.c", + "opus/silk/NLSF_VQ.c", + "opus/silk/stereo_decode_pred.c", + "opus/silk/bwexpander_32.c", + "opus/silk/encode_pulses.c", + "opus/silk/NLSF_VQ_weights_laroia.c", + "opus/silk/stereo_encode_pred.c", + "opus/silk/bwexpander.c", + "opus/silk/gain_quant.c", + "opus/silk/NSQ.c", + "opus/silk/stereo_find_predictor.c", + "opus/silk/check_control_input.c", + "opus/silk/HP_variable_cutoff.c", + "opus/silk/NSQ_del_dec.c", + "opus/silk/stereo_LR_to_MS.c", + "opus/silk/CNG.c", + "opus/silk/init_decoder.c", + "opus/silk/pitch_est_tables.c", + "opus/silk/stereo_MS_to_LR.c", + "opus/silk/code_signs.c", + "opus/silk/init_encoder.c", + "opus/silk/PLC.c", + "opus/silk/stereo_quant_pred.c", + "opus/silk/control_audio_bandwidth.c", + "opus/silk/inner_prod_aligned.c", + "opus/silk/process_NLSFs.c", + "opus/silk/sum_sqr_shift.c", + "opus/silk/control_codec.c", + "opus/silk/interpolate.c", + "opus/silk/quant_LTP_gains.c", + "opus/silk/table_LSF_cos.c", + "opus/silk/control_SNR.c", + "opus/silk/lin2log.c", + "opus/silk/resampler.c", + "opus/silk/tables_gain.c", + "opus/silk/debug.c", + "opus/silk/log2lin.c", + "opus/silk/resampler_down2_3.c", + "opus/silk/tables_LTP.c", + "opus/silk/dec_API.c", + "opus/silk/LPC_analysis_filter.c", + "opus/silk/resampler_down2.c", + "opus/silk/tables_NLSF_CB_NB_MB.c", + "opus/silk/decode_core.c", + "opus/silk/LPC_inv_pred_gain.c", + "opus/silk/resampler_private_AR2.c", + "opus/silk/tables_NLSF_CB_WB.c", + "opus/silk/decode_frame.c", + "opus/silk/LP_variable_cutoff.c", + "opus/silk/resampler_private_down_FIR.c", + "opus/silk/tables_other.c", + "opus/silk/decode_indices.c", + "opus/silk/NLSF2A.c", + "opus/silk/resampler_private_IIR_FIR.c", + "opus/silk/tables_pitch_lag.c", + "opus/silk/decode_parameters.c", + "opus/silk/NLSF_decode.c", + "opus/silk/resampler_private_up2_HQ.c", + "opus/silk/tables_pulses_per_block.c", + "opus/silk/decode_pitch.c", + "opus/silk/NLSF_del_dec_quant.c", + "opus/silk/resampler_rom.c", + "opus/silk/VAD.c", + "opus/silk/decode_pulses.c", + "opus/silk/NLSF_encode.c", + "opus/silk/shell_coder.c", + "opus/silk/VQ_WMat_EC.c", + "opus/analysis.c", + "opus/internal.c", + "opus/opus.c", + #"opus/opus_demo.c", + "opus/opus_multistream.c", + "opus/repacketizer.c", + "opus/wincerts.c", + "opus/http.c", + "opus/mlp.c", + #"opus/opus_compare.c", + "opus/opus_encoder.c", + "opus/opus_multistream_decoder.c", + #"opus/repacketizer_demo.c", + "opus/info.c", + "opus/mlp_data.c", + "opus/opus_decoder.c", + "opus/opusfile.c", + "opus/opus_multistream_encoder.c", + "opus/stream.c" +] + +if("opus_fixed_point" in env and env.opus_fixed_point=="yes"): + env.Append(CPPPATH=["#drivers/opus/silk/fixed"], CFLAGS=["-DOPUS_FIXED_POINT"]) + opus_sources_silk = [ + "opus/silk/fixed/apply_sine_window_FIX.c", + "opus/silk/fixed/k2a_FIX.c", + "opus/silk/fixed/residual_energy16_FIX.c", + "opus/silk/fixed/autocorr_FIX.c", + "opus/silk/fixed/k2a_Q16_FIX.c", + "opus/silk/fixed/residual_energy_FIX.c", + "opus/silk/fixed/burg_modified_FIX.c", + "opus/silk/fixed/LTP_analysis_filter_FIX.c", + "opus/silk/fixed/schur64_FIX.c", + "opus/silk/fixed/corrMatrix_FIX.c", + "opus/silk/fixed/LTP_scale_ctrl_FIX.c", + "opus/silk/fixed/schur_FIX.c", + "opus/silk/fixed/encode_frame_FIX.c", + "opus/silk/fixed/noise_shape_analysis_FIX.c", + "opus/silk/fixed/solve_LS_FIX.c", + "opus/silk/fixed/find_LPC_FIX.c", + "opus/silk/fixed/pitch_analysis_core_FIX.c", + "opus/silk/fixed/vector_ops_FIX.c", + "opus/silk/fixed/find_LTP_FIX.c", + "opus/silk/fixed/prefilter_FIX.c", + "opus/silk/fixed/warped_autocorrelation_FIX.c", + "opus/silk/fixed/find_pitch_lags_FIX.c", + "opus/silk/fixed/process_gains_FIX.c", + "opus/silk/fixed/find_pred_coefs_FIX.c", + "opus/silk/fixed/regularize_correlations_FIX.c" + ] +else: + env.Append(CPPPATH=["#drivers/opus/silk/float"]) + opus_sources_silk = [ + "opus/silk/float/apply_sine_window_FLP.c", + "opus/silk/float/inner_product_FLP.c", + "opus/silk/float/regularize_correlations_FLP.c", + "opus/silk/float/autocorrelation_FLP.c", + "opus/silk/float/k2a_FLP.c", + "opus/silk/float/residual_energy_FLP.c", + "opus/silk/float/burg_modified_FLP.c", + "opus/silk/float/levinsondurbin_FLP.c", + "opus/silk/float/scale_copy_vector_FLP.c", + "opus/silk/float/bwexpander_FLP.c", + "opus/silk/float/LPC_analysis_filter_FLP.c", + "opus/silk/float/scale_vector_FLP.c", + "opus/silk/float/corrMatrix_FLP.c", + "opus/silk/float/LPC_inv_pred_gain_FLP.c", + "opus/silk/float/schur_FLP.c", + "opus/silk/float/encode_frame_FLP.c", + "opus/silk/float/LTP_analysis_filter_FLP.c", + "opus/silk/float/solve_LS_FLP.c", + "opus/silk/float/energy_FLP.c", + "opus/silk/float/LTP_scale_ctrl_FLP.c", + "opus/silk/float/sort_FLP.c", + "opus/silk/float/find_LPC_FLP.c", + "opus/silk/float/noise_shape_analysis_FLP.c", + "opus/silk/float/warped_autocorrelation_FLP.c", + "opus/silk/float/find_LTP_FLP.c", + "opus/silk/float/pitch_analysis_core_FLP.c", + "opus/silk/float/wrappers_FLP.c", + "opus/silk/float/find_pitch_lags_FLP.c", + "opus/silk/float/prefilter_FLP.c", + "opus/silk/float/find_pred_coefs_FLP.c", + "opus/silk/float/process_gains_FLP.c" + ] + + +opus_sources_lib+=opus_sources_silk +env.drivers_sources+=opus_sources_lib +env.drivers_sources+=opus_sources + +env.Append(CPPPATH=["#drivers/opus"]) +env.Append(CPPPATH=["#drivers/opus/celt","#drivers/opus/silk","#drivers/opus/silk/float"]) +env.Append(CFLAGS=["-DOPUS_HAVE_CONFIG_H"]) + +Export('env') diff --git a/drivers/opus/analysis.c b/drivers/opus/analysis.c new file mode 100644 index 0000000000..47e8668b8e --- /dev/null +++ b/drivers/opus/analysis.c @@ -0,0 +1,645 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "kiss_fft.h" +#include "celt.h" +#include "opus_modes.h" +#include "arch.h" +#include "quant_bands.h" +#include <stdio.h> +#include "analysis.h" +#include "mlp.h" +#include "stack_alloc.h" + +extern const MLP net; + +#ifndef M_PI +#define M_PI 3.141592653 +#endif + +static const float dct_table[128] = { + 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, + 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, + 0.351851f, 0.338330f, 0.311806f, 0.273300f, 0.224292f, 0.166664f, 0.102631f, 0.034654f, + -0.034654f,-0.102631f,-0.166664f,-0.224292f,-0.273300f,-0.311806f,-0.338330f,-0.351851f, + 0.346760f, 0.293969f, 0.196424f, 0.068975f,-0.068975f,-0.196424f,-0.293969f,-0.346760f, + -0.346760f,-0.293969f,-0.196424f,-0.068975f, 0.068975f, 0.196424f, 0.293969f, 0.346760f, + 0.338330f, 0.224292f, 0.034654f,-0.166664f,-0.311806f,-0.351851f,-0.273300f,-0.102631f, + 0.102631f, 0.273300f, 0.351851f, 0.311806f, 0.166664f,-0.034654f,-0.224292f,-0.338330f, + 0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f, + 0.326641f, 0.135299f,-0.135299f,-0.326641f,-0.326641f,-0.135299f, 0.135299f, 0.326641f, + 0.311806f, 0.034654f,-0.273300f,-0.338330f,-0.102631f, 0.224292f, 0.351851f, 0.166664f, + -0.166664f,-0.351851f,-0.224292f, 0.102631f, 0.338330f, 0.273300f,-0.034654f,-0.311806f, + 0.293969f,-0.068975f,-0.346760f,-0.196424f, 0.196424f, 0.346760f, 0.068975f,-0.293969f, + -0.293969f, 0.068975f, 0.346760f, 0.196424f,-0.196424f,-0.346760f,-0.068975f, 0.293969f, + 0.273300f,-0.166664f,-0.338330f, 0.034654f, 0.351851f, 0.102631f,-0.311806f,-0.224292f, + 0.224292f, 0.311806f,-0.102631f,-0.351851f,-0.034654f, 0.338330f, 0.166664f,-0.273300f, +}; + +static const float analysis_window[240] = { + 0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f, + 0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f, + 0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f, + 0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f, + 0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f, + 0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f, + 0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f, + 0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f, + 0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f, + 0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f, + 0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f, + 0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f, + 0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f, + 0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f, + 0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f, + 0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f, + 0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f, + 0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f, + 0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f, + 0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f, + 0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f, + 0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f, + 0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f, + 0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f, + 0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f, + 0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f, + 0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f, + 0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f, + 0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f, + 0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f, +}; + +static const int tbands[NB_TBANDS+1] = { + 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120 +}; + +static const int extra_bands[NB_TOT_BANDS+1] = { + 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200 +}; + +/*static const float tweight[NB_TBANDS+1] = { + .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5 +};*/ + +#define NB_TONAL_SKIP_BANDS 9 + +#define cA 0.43157974f +#define cB 0.67848403f +#define cC 0.08595542f +#define cE ((float)M_PI/2) +static OPUS_INLINE float fast_atan2f(float y, float x) { + float x2, y2; + /* Should avoid underflow on the values we'll get */ + if (ABS16(x)+ABS16(y)<1e-9f) + { + x*=1e12f; + y*=1e12f; + } + x2 = x*x; + y2 = y*y; + if(x2<y2){ + float den = (y2 + cB*x2) * (y2 + cC*x2); + if (den!=0) + return -x*y*(y2 + cA*x2) / den + (y<0 ? -cE : cE); + else + return (y<0 ? -cE : cE); + }else{ + float den = (x2 + cB*y2) * (x2 + cC*y2); + if (den!=0) + return x*y*(x2 + cA*y2) / den + (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE); + else + return (y<0 ? -cE : cE) - (x*y<0 ? -cE : cE); + } +} + +void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len) +{ + int pos; + int curr_lookahead; + float psum; + int i; + + pos = tonal->read_pos; + curr_lookahead = tonal->write_pos-tonal->read_pos; + if (curr_lookahead<0) + curr_lookahead += DETECT_SIZE; + + if (len > 480 && pos != tonal->write_pos) + { + pos++; + if (pos==DETECT_SIZE) + pos=0; + } + if (pos == tonal->write_pos) + pos--; + if (pos<0) + pos = DETECT_SIZE-1; + OPUS_COPY(info_out, &tonal->info[pos], 1); + tonal->read_subframe += len/120; + while (tonal->read_subframe>=4) + { + tonal->read_subframe -= 4; + tonal->read_pos++; + } + if (tonal->read_pos>=DETECT_SIZE) + tonal->read_pos-=DETECT_SIZE; + + /* Compensate for the delay in the features themselves. + FIXME: Need a better estimate the 10 I just made up */ + curr_lookahead = IMAX(curr_lookahead-10, 0); + + psum=0; + /* Summing the probability of transition patterns that involve music at + time (DETECT_SIZE-curr_lookahead-1) */ + for (i=0;i<DETECT_SIZE-curr_lookahead;i++) + psum += tonal->pmusic[i]; + for (;i<DETECT_SIZE;i++) + psum += tonal->pspeech[i]; + psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; + /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ + + info_out->music_prob = psum; +} + +void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) +{ + int i, b; + const kiss_fft_state *kfft; + VARDECL(kiss_fft_cpx, in); + VARDECL(kiss_fft_cpx, out); + int N = 480, N2=240; + float * OPUS_RESTRICT A = tonal->angle; + float * OPUS_RESTRICT dA = tonal->d_angle; + float * OPUS_RESTRICT d2A = tonal->d2_angle; + VARDECL(float, tonality); + VARDECL(float, noisiness); + float band_tonality[NB_TBANDS]; + float logE[NB_TBANDS]; + float BFCC[8]; + float features[25]; + float frame_tonality; + float max_frame_tonality; + /*float tw_sum=0;*/ + float frame_noisiness; + const float pi4 = (float)(M_PI*M_PI*M_PI*M_PI); + float slope=0; + float frame_stationarity; + float relativeE; + float frame_probs[2]; + float alpha, alphaE, alphaE2; + float frame_loudness; + float bandwidth_mask; + int bandwidth=0; + float maxE = 0; + float noise_floor; + int remaining; + AnalysisInfo *info; + SAVE_STACK; + + tonal->last_transition++; + alpha = 1.f/IMIN(20, 1+tonal->count); + alphaE = 1.f/IMIN(50, 1+tonal->count); + alphaE2 = 1.f/IMIN(1000, 1+tonal->count); + + if (tonal->count<4) + tonal->music_prob = .5; + kfft = celt_mode->mdct.kfft[0]; + if (tonal->count==0) + tonal->mem_fill = 240; + downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C); + if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE) + { + tonal->mem_fill += len; + /* Don't have enough to update the analysis */ + RESTORE_STACK; + return; + } + info = &tonal->info[tonal->write_pos++]; + if (tonal->write_pos>=DETECT_SIZE) + tonal->write_pos-=DETECT_SIZE; + + ALLOC(in, 480, kiss_fft_cpx); + ALLOC(out, 480, kiss_fft_cpx); + ALLOC(tonality, 240, float); + ALLOC(noisiness, 240, float); + for (i=0;i<N2;i++) + { + float w = analysis_window[i]; + in[i].r = (kiss_fft_scalar)(w*tonal->inmem[i]); + in[i].i = (kiss_fft_scalar)(w*tonal->inmem[N2+i]); + in[N-i-1].r = (kiss_fft_scalar)(w*tonal->inmem[N-i-1]); + in[N-i-1].i = (kiss_fft_scalar)(w*tonal->inmem[N+N2-i-1]); + } + OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); + remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); + downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); + tonal->mem_fill = 240 + remaining; + opus_fft(kfft, in, out); + + for (i=1;i<N2;i++) + { + float X1r, X2r, X1i, X2i; + float angle, d_angle, d2_angle; + float angle2, d_angle2, d2_angle2; + float mod1, mod2, avg_mod; + X1r = (float)out[i].r+out[N-i].r; + X1i = (float)out[i].i-out[N-i].i; + X2r = (float)out[i].i+out[N-i].i; + X2i = (float)out[N-i].r-out[i].r; + + angle = (float)(.5f/M_PI)*fast_atan2f(X1i, X1r); + d_angle = angle - A[i]; + d2_angle = d_angle - dA[i]; + + angle2 = (float)(.5f/M_PI)*fast_atan2f(X2i, X2r); + d_angle2 = angle2 - angle; + d2_angle2 = d_angle2 - d_angle; + + mod1 = d2_angle - (float)floor(.5+d2_angle); + noisiness[i] = ABS16(mod1); + mod1 *= mod1; + mod1 *= mod1; + + mod2 = d2_angle2 - (float)floor(.5+d2_angle2); + noisiness[i] += ABS16(mod2); + mod2 *= mod2; + mod2 *= mod2; + + avg_mod = .25f*(d2A[i]+2.f*mod1+mod2); + tonality[i] = 1.f/(1.f+40.f*16.f*pi4*avg_mod)-.015f; + + A[i] = angle2; + dA[i] = d_angle2; + d2A[i] = mod2; + } + + frame_tonality = 0; + max_frame_tonality = 0; + /*tw_sum = 0;*/ + info->activity = 0; + frame_noisiness = 0; + frame_stationarity = 0; + if (!tonal->count) + { + for (b=0;b<NB_TBANDS;b++) + { + tonal->lowE[b] = 1e10; + tonal->highE[b] = -1e10; + } + } + relativeE = 0; + frame_loudness = 0; + for (b=0;b<NB_TBANDS;b++) + { + float E=0, tE=0, nE=0; + float L1, L2; + float stationarity; + for (i=tbands[b];i<tbands[b+1];i++) + { + float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r + + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; +#ifdef OPUS_FIXED_POINT + /* FIXME: It's probably best to change the BFCC filter initial state instead */ + binE *= 5.55e-17f; +#endif + E += binE; + tE += binE*tonality[i]; + nE += binE*2.f*(.5f-noisiness[i]); + } + tonal->E[tonal->E_count][b] = E; + frame_noisiness += nE/(1e-15f+E); + + frame_loudness += (float)sqrt(E+1e-10f); + logE[b] = (float)log(E+1e-10f); + tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); + tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); + if (tonal->highE[b] < tonal->lowE[b]+1.f) + { + tonal->highE[b]+=.5f; + tonal->lowE[b]-=.5f; + } + relativeE += (logE[b]-tonal->lowE[b])/(1e-15f+tonal->highE[b]-tonal->lowE[b]); + + L1=L2=0; + for (i=0;i<NB_FRAMES;i++) + { + L1 += (float)sqrt(tonal->E[i][b]); + L2 += tonal->E[i][b]; + } + + stationarity = MIN16(0.99f,L1/(float)sqrt(1e-15+NB_FRAMES*L2)); + stationarity *= stationarity; + stationarity *= stationarity; + frame_stationarity += stationarity; + /*band_tonality[b] = tE/(1e-15+E)*/; + band_tonality[b] = MAX16(tE/(1e-15f+E), stationarity*tonal->prev_band_tonality[b]); +#if 0 + if (b>=NB_TONAL_SKIP_BANDS) + { + frame_tonality += tweight[b]*band_tonality[b]; + tw_sum += tweight[b]; + } +#else + frame_tonality += band_tonality[b]; + if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS) + frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS]; +#endif + max_frame_tonality = MAX16(max_frame_tonality, (1.f+.03f*(b-NB_TBANDS))*frame_tonality); + slope += band_tonality[b]*(b-8); + /*printf("%f %f ", band_tonality[b], stationarity);*/ + tonal->prev_band_tonality[b] = band_tonality[b]; + } + + bandwidth_mask = 0; + bandwidth = 0; + maxE = 0; + noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8))); +#ifdef OPUS_FIXED_POINT + noise_floor *= 1<<(15+SIG_SHIFT); +#endif + noise_floor *= noise_floor; + for (b=0;b<NB_TOT_BANDS;b++) + { + float E=0; + int band_start, band_end; + /* Keep a margin of 300 Hz for aliasing */ + band_start = extra_bands[b]; + band_end = extra_bands[b+1]; + for (i=band_start;i<band_end;i++) + { + float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r + + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; + E += binE; + } + maxE = MAX32(maxE, E); + tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E); + E = MAX32(E, tonal->meanE[b]); + /* Use a simple follower with 13 dB/Bark slope for spreading function */ + bandwidth_mask = MAX32(.05f*bandwidth_mask, E); + /* Consider the band "active" only if all these conditions are met: + 1) less than 10 dB below the simple follower + 2) less than 90 dB below the peak band (maximal masking possible considering + both the ATH and the loudness-dependent slope of the spreading function) + 3) above the PCM quantization noise floor + */ + if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start)) + bandwidth = b; + } + if (tonal->count<=2) + bandwidth = 20; + frame_loudness = 20*(float)log10(frame_loudness); + tonal->Etracker = MAX32(tonal->Etracker-.03f, frame_loudness); + tonal->lowECount *= (1-alphaE); + if (frame_loudness < tonal->Etracker-30) + tonal->lowECount += alphaE; + + for (i=0;i<8;i++) + { + float sum=0; + for (b=0;b<16;b++) + sum += dct_table[i*16+b]*logE[b]; + BFCC[i] = sum; + } + + frame_stationarity /= NB_TBANDS; + relativeE /= NB_TBANDS; + if (tonal->count<10) + relativeE = .5; + frame_noisiness /= NB_TBANDS; +#if 1 + info->activity = frame_noisiness + (1-frame_noisiness)*relativeE; +#else + info->activity = .5*(1+frame_noisiness-frame_stationarity); +#endif + frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS)); + frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8f); + tonal->prev_tonality = frame_tonality; + + slope /= 8*8; + info->tonality_slope = slope; + + tonal->E_count = (tonal->E_count+1)%NB_FRAMES; + tonal->count++; + info->tonality = frame_tonality; + + for (i=0;i<4;i++) + features[i] = -0.12299f*(BFCC[i]+tonal->mem[i+24]) + 0.49195f*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693f*tonal->mem[i+8] - 1.4349f*tonal->cmean[i]; + + for (i=0;i<4;i++) + tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i]; + + for (i=0;i<4;i++) + features[4+i] = 0.63246f*(BFCC[i]-tonal->mem[i+24]) + 0.31623f*(tonal->mem[i]-tonal->mem[i+16]); + for (i=0;i<3;i++) + features[8+i] = 0.53452f*(BFCC[i]+tonal->mem[i+24]) - 0.26726f*(tonal->mem[i]+tonal->mem[i+16]) -0.53452f*tonal->mem[i+8]; + + if (tonal->count > 5) + { + for (i=0;i<9;i++) + tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i]; + } + + for (i=0;i<8;i++) + { + tonal->mem[i+24] = tonal->mem[i+16]; + tonal->mem[i+16] = tonal->mem[i+8]; + tonal->mem[i+8] = tonal->mem[i]; + tonal->mem[i] = BFCC[i]; + } + for (i=0;i<9;i++) + features[11+i] = (float)sqrt(tonal->std[i]); + features[20] = info->tonality; + features[21] = info->activity; + features[22] = frame_stationarity; + features[23] = info->tonality_slope; + features[24] = tonal->lowECount; + +#ifndef DISABLE_FLOAT_API + mlp_process(&net, features, frame_probs); + frame_probs[0] = .5f*(frame_probs[0]+1); + /* Curve fitting between the MLP probability and the actual probability */ + frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10); + /* Probability of active audio (as opposed to silence) */ + frame_probs[1] = .5f*frame_probs[1]+.5f; + /* Consider that silence has a 50-50 probability. */ + frame_probs[0] = frame_probs[1]*frame_probs[0] + (1-frame_probs[1])*.5f; + + /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/ + { + /* Probability of state transition */ + float tau; + /* Represents independence of the MLP probabilities, where + beta=1 means fully independent. */ + float beta; + /* Denormalized probability of speech (p0) and music (p1) after update */ + float p0, p1; + /* Probabilities for "all speech" and "all music" */ + float s0, m0; + /* Probability sum for renormalisation */ + float psum; + /* Instantaneous probability of speech and music, with beta pre-applied. */ + float speech0; + float music0; + + /* One transition every 3 minutes of active audio */ + tau = .00005f*frame_probs[1]; + beta = .05f; + if (1) { + /* Adapt beta based on how "unexpected" the new prob is */ + float p, q; + p = MAX16(.05f,MIN16(.95f,frame_probs[0])); + q = MAX16(.05f,MIN16(.95f,tonal->music_prob)); + beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p)); + } + /* p0 and p1 are the probabilities of speech and music at this frame + using only information from previous frame and applying the + state transition model */ + p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau; + p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau; + /* We apply the current probability with exponent beta to work around + the fact that the probability estimates aren't independent. */ + p0 *= (float)pow(1-frame_probs[0], beta); + p1 *= (float)pow(frame_probs[0], beta); + /* Normalise the probabilities to get the Marokv probability of music. */ + tonal->music_prob = p1/(p0+p1); + info->music_prob = tonal->music_prob; + + /* This chunk of code deals with delayed decision. */ + psum=1e-20f; + /* Instantaneous probability of speech and music, with beta pre-applied. */ + speech0 = (float)pow(1-frame_probs[0], beta); + music0 = (float)pow(frame_probs[0], beta); + if (tonal->count==1) + { + tonal->pspeech[0]=.5; + tonal->pmusic [0]=.5; + } + /* Updated probability of having only speech (s0) or only music (m0), + before considering the new observation. */ + s0 = tonal->pspeech[0] + tonal->pspeech[1]; + m0 = tonal->pmusic [0] + tonal->pmusic [1]; + /* Updates s0 and m0 with instantaneous probability. */ + tonal->pspeech[0] = s0*(1-tau)*speech0; + tonal->pmusic [0] = m0*(1-tau)*music0; + /* Propagate the transition probabilities */ + for (i=1;i<DETECT_SIZE-1;i++) + { + tonal->pspeech[i] = tonal->pspeech[i+1]*speech0; + tonal->pmusic [i] = tonal->pmusic [i+1]*music0; + } + /* Probability that the latest frame is speech, when all the previous ones were music. */ + tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0; + /* Probability that the latest frame is music, when all the previous ones were speech. */ + tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0; + + /* Renormalise probabilities to 1 */ + for (i=0;i<DETECT_SIZE;i++) + psum += tonal->pspeech[i] + tonal->pmusic[i]; + psum = 1.f/psum; + for (i=0;i<DETECT_SIZE;i++) + { + tonal->pspeech[i] *= psum; + tonal->pmusic [i] *= psum; + } + psum = tonal->pmusic[0]; + for (i=1;i<DETECT_SIZE;i++) + psum += tonal->pspeech[i]; + + /* Estimate our confidence in the speech/music decisions */ + if (frame_probs[1]>.75) + { + if (tonal->music_prob>.9) + { + float adapt; + adapt = 1.f/(++tonal->music_confidence_count); + tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500); + tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence); + } + if (tonal->music_prob<.1) + { + float adapt; + adapt = 1.f/(++tonal->speech_confidence_count); + tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500); + tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence); + } + } else { + if (tonal->music_confidence_count==0) + tonal->music_confidence = .9f; + if (tonal->speech_confidence_count==0) + tonal->speech_confidence = .1f; + } + } + if (tonal->last_music != (tonal->music_prob>.5f)) + tonal->last_transition=0; + tonal->last_music = tonal->music_prob>.5f; +#else + info->music_prob = 0; +#endif + /*for (i=0;i<25;i++) + printf("%f ", features[i]); + printf("\n");*/ + + info->bandwidth = bandwidth; + /*printf("%d %d\n", info->bandwidth, info->opus_bandwidth);*/ + info->noisiness = frame_noisiness; + info->valid = 1; + if (info_out!=NULL) + OPUS_COPY(info_out, info, 1); + RESTORE_STACK; +} + +void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, + int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) +{ + int offset; + int pcm_len; + + if (analysis_pcm != NULL) + { + /* Avoid overflow/wrap-around of the analysis buffer */ + analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); + + pcm_len = analysis_frame_size - analysis->analysis_offset; + offset = analysis->analysis_offset; + do { + tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); + offset += 480; + pcm_len -= 480; + } while (pcm_len>0); + analysis->analysis_offset = analysis_frame_size; + + analysis->analysis_offset -= frame_size; + } + + analysis_info->valid = 0; + tonality_get_info(analysis, analysis_info, frame_size); +} diff --git a/drivers/opus/analysis.h b/drivers/opus/analysis.h new file mode 100644 index 0000000000..be0388faa3 --- /dev/null +++ b/drivers/opus/analysis.h @@ -0,0 +1,90 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef ANALYSIS_H +#define ANALYSIS_H + +#include "celt.h" +#include "opus_private.h" + +#define NB_FRAMES 8 +#define NB_TBANDS 18 +#define NB_TOT_BANDS 21 +#define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */ + +#define DETECT_SIZE 200 + +typedef struct { + float angle[240]; + float d_angle[240]; + float d2_angle[240]; + opus_val32 inmem[ANALYSIS_BUF_SIZE]; + int mem_fill; /* number of usable samples in the buffer */ + float prev_band_tonality[NB_TBANDS]; + float prev_tonality; + float E[NB_FRAMES][NB_TBANDS]; + float lowE[NB_TBANDS]; + float highE[NB_TBANDS]; + float meanE[NB_TOT_BANDS]; + float mem[32]; + float cmean[8]; + float std[9]; + float music_prob; + float Etracker; + float lowECount; + int E_count; + int last_music; + int last_transition; + int count; + float subframe_mem[3]; + int analysis_offset; + /** Probability of having speech for time i to DETECT_SIZE-1 (and music before). + pspeech[0] is the probability that all frames in the window are speech. */ + float pspeech[DETECT_SIZE]; + /** Probability of having music for time i to DETECT_SIZE-1 (and speech before). + pmusic[0] is the probability that all frames in the window are music. */ + float pmusic[DETECT_SIZE]; + float speech_confidence; + float music_confidence; + int speech_confidence_count; + int music_confidence_count; + int write_pos; + int read_pos; + int read_subframe; + AnalysisInfo info[DETECT_SIZE]; +} TonalityAnalysisState; + +void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, + const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix); + +void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); + +void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, + int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); + +#endif diff --git a/drivers/opus/audio_stream_opus.cpp b/drivers/opus/audio_stream_opus.cpp new file mode 100644 index 0000000000..eb9c81e152 --- /dev/null +++ b/drivers/opus/audio_stream_opus.cpp @@ -0,0 +1,376 @@ +/*************************************************************************/ +/* audio_stream_opus.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Author: George Marques <george@gmarqu.es> */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#include "audio_stream_opus.h" + +const float AudioStreamPlaybackOpus::osrate=48000.0f; + +int AudioStreamPlaybackOpus::_op_read_func(void *_stream, unsigned char *_ptr, int _nbytes) { + FileAccess *fa=(FileAccess*)_stream; + + if(fa->eof_reached()) + return 0; + + uint8_t *dst = (uint8_t*)_ptr; + + int read = fa->get_buffer(dst, _nbytes); + + return read; +} + +int AudioStreamPlaybackOpus::_op_seek_func(void *_stream, opus_int64 _offset, int _whence){ + +#ifdef SEEK_SET + FileAccess *fa=(FileAccess*)_stream; + + switch (_whence) { + case SEEK_SET: { + fa->seek(_offset); + } break; + case SEEK_CUR: { + fa->seek(fa->get_pos()+_offset); + } break; + case SEEK_END: { + fa->seek_end(_offset); + } break; + default: { + ERR_PRINT("BUG, wtf was whence set to?\n"); + } + } + int ret=fa->eof_reached()?-1:0; + return ret; +#else + return -1; // no seeking +#endif +} + +int AudioStreamPlaybackOpus::_op_close_func(void *_stream) { + if (!_stream) + return 0; + FileAccess *fa=(FileAccess*)_stream; + if (fa->is_open()) + fa->close(); + return 0; +} + +opus_int64 AudioStreamPlaybackOpus::_op_tell_func(void *_stream) { + FileAccess *_fa = (FileAccess*)_stream; + return (opus_int64)_fa->get_pos(); +} + +void AudioStreamPlaybackOpus::_clear_stream() { + if(!stream_loaded) + return; + + op_free(opus_file); + _close_file(); + + stream_loaded=false; + stream_channels=1; + playing=false; +} + +void AudioStreamPlaybackOpus::_close_file() { + if (f) { + memdelete(f); + f=NULL; + } +} + +Error AudioStreamPlaybackOpus::_load_stream() { + + ERR_FAIL_COND_V(!stream_valid,ERR_UNCONFIGURED); + + _clear_stream(); + if (file=="") + return ERR_INVALID_DATA; + + Error err; + f=FileAccess::open(file,FileAccess::READ,&err); + + if (err) { + ERR_FAIL_COND_V( err, err ); + } + + int _err = 0; + + opus_file = op_open_callbacks(f,&_op_callbacks,NULL,0,&_err); + + switch (_err) { + case OP_EREAD: { // - Can't read the file. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_FILE_CANT_READ ); + } break; + case OP_EVERSION: // - Unrecognized version number. + case OP_ENOTFORMAT: // - Stream is not Opus data. + case OP_EIMPL : { // - Stream used non-implemented feature. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_FILE_UNRECOGNIZED ); + } break; + case OP_EBADLINK: // - Failed to find old data after seeking. + case OP_EBADTIMESTAMP: // - Timestamp failed the validity checks. + case OP_EBADHEADER: { // - Invalid or mising Opus bitstream header. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_FILE_CORRUPT ); + } break; + case OP_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_BUG ); + } break; + } + repeats=0; + stream_loaded=true; + + + return OK; +} + +AudioStreamPlaybackOpus::AudioStreamPlaybackOpus() { + loops=false; + playing=false; + f = NULL; + stream_loaded=false; + stream_valid=false; + repeats=0; + paused=true; + stream_channels=0; + current_section=0; + length=0; + loop_restart_time=0; + pre_skip=0; + + _op_callbacks.read = _op_read_func; + _op_callbacks.seek = _op_seek_func; + _op_callbacks.tell = _op_tell_func; + _op_callbacks.close = _op_close_func; +} + +Error AudioStreamPlaybackOpus::set_file(const String &p_file) { + file=p_file; + stream_valid=false; + Error err; + f=FileAccess::open(file,FileAccess::READ,&err); + + if (err) { + ERR_FAIL_COND_V( err, err ); + } + + int _err; + + opus_file = op_open_callbacks(f,&_op_callbacks,NULL,0,&_err); + + switch (_err) { + case OP_EREAD: { // - Can't read the file. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_FILE_CANT_READ ); + } break; + case OP_EVERSION: // - Unrecognized version number. + case OP_ENOTFORMAT: // - Stream is not Opus data. + case OP_EIMPL : { // - Stream used non-implemented feature. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_FILE_UNRECOGNIZED ); + } break; + case OP_EBADLINK: // - Failed to find old data after seeking. + case OP_EBADTIMESTAMP: // - Timestamp failed the validity checks. + case OP_EBADHEADER: { // - Invalid or mising Opus bitstream header. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_FILE_CORRUPT ); + } break; + case OP_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_BUG ); + } break; + } + + const OpusHead *oinfo = op_head(opus_file,-1); + + stream_channels=oinfo->channel_count; + pre_skip=oinfo->pre_skip; + frames_mixed=pre_skip; + ogg_int64_t len = op_pcm_total(opus_file,-1); + if(len < 0) { + length = 0; + } else { + length=(len/osrate); + } + + op_free(opus_file); + memdelete(f); + f=NULL; + stream_valid=true; + + + return OK; +} + +void AudioStreamPlaybackOpus::play(float p_from) { + if (playing) + stop(); + + if (_load_stream()!=OK) + return; + + frames_mixed=pre_skip; + playing=true; + if (p_from>0) { + seek_pos(p_from); + } +} + +void AudioStreamPlaybackOpus::stop() { + _clear_stream(); + playing=false; +} + +void AudioStreamPlaybackOpus::seek_pos(float p_time) { + if(!playing) return; + ogg_int64_t pcm_offset = (ogg_int64_t)(p_time * osrate); + bool ok = op_pcm_seek(opus_file,pcm_offset)==0; + if(!ok) { + ERR_PRINT("Seek time over stream size."); + return; + } + frames_mixed=osrate*p_time; +} + +int AudioStreamPlaybackOpus::mix(int16_t* p_bufer,int p_frames) { + if (!playing) + return 0; + + int total=p_frames; + + while (true) { + + int todo = p_frames; + + if (todo==0 || todo<MIN_MIX) { + break; + } + + int ret=op_read(opus_file,(opus_int16*)p_bufer,todo*stream_channels,¤t_section); + if (ret<0) { + playing = false; + ERR_EXPLAIN("Error reading Opus File: "+file); + ERR_BREAK(ret<0); + } else if (ret==0) { // end of song, reload? + op_free(opus_file); + + _close_file(); + + f=FileAccess::open(file,FileAccess::READ); + + int errv = 0; + opus_file = op_open_callbacks(f,&_op_callbacks,NULL,0,&errv); + if (errv!=0) { + playing=false; + break; // :( + } + + if (!has_loop()) { + playing=false; + repeats=1; + break; + } + + if (loop_restart_time) { + bool ok = op_pcm_seek(opus_file, (loop_restart_time*osrate)+pre_skip)==0; + if (!ok) { + playing=false; + ERR_PRINT("loop restart time rejected") + } + + frames_mixed=(loop_restart_time*osrate)+pre_skip; + } else { + frames_mixed=pre_skip; + } + repeats++; + continue; + + } + + stream_channels=op_head(opus_file,current_section)->channel_count; + + frames_mixed+=ret; + + p_bufer+=ret*stream_channels; + p_frames-=ret; + + } + + return total-p_frames; +} + +float AudioStreamPlaybackOpus::get_length() const { + if(!stream_loaded) { + if(const_cast<AudioStreamPlaybackOpus*>(this)->_load_stream() != OK) + return 0; + } + return length; +} + +float AudioStreamPlaybackOpus::get_pos() const { + + int32_t frames = int32_t(frames_mixed); + if (frames < 0) + frames=0; + return double(frames) / osrate; +} + +int AudioStreamPlaybackOpus::get_minimum_buffer_size() const { + return MIN_MIX; +} + +AudioStreamPlaybackOpus::~AudioStreamPlaybackOpus() { + _clear_stream(); +} + +RES ResourceFormatLoaderAudioStreamOpus::load(const String &p_path, const String& p_original_path, Error *r_error) { + if (r_error) + *r_error=OK; + + AudioStreamOpus *opus_stream = memnew(AudioStreamOpus); + opus_stream->set_file(p_path); + return Ref<AudioStreamOpus>(opus_stream); +} + +void ResourceFormatLoaderAudioStreamOpus::get_recognized_extensions(List<String> *p_extensions) const { + + p_extensions->push_back("opus"); +} +String ResourceFormatLoaderAudioStreamOpus::get_resource_type(const String &p_path) const { + + if (p_path.extension().to_lower()=="opus") + return "AudioStreamOpus"; + return ""; +} + +bool ResourceFormatLoaderAudioStreamOpus::handles_type(const String& p_type) const { + return (p_type=="AudioStream" || p_type=="AudioStreamOpus"); +} diff --git a/drivers/opus/audio_stream_opus.h b/drivers/opus/audio_stream_opus.h new file mode 100644 index 0000000000..2f173cc270 --- /dev/null +++ b/drivers/opus/audio_stream_opus.h @@ -0,0 +1,141 @@ +/*************************************************************************/ +/* audio_stream_opus.h */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Author: George Marques <george@gmarqu.es> */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#ifndef AUDIO_STREAM_OPUS_H +#define AUDIO_STREAM_OPUS_H + +#include "scene/resources/audio_stream.h" +#include "opus/opusfile.h" +#include "opus/internal.h" +#include "os/file_access.h" +#include "io/resource_loader.h" + +class AudioStreamPlaybackOpus : public AudioStreamPlayback { + + OBJ_TYPE(AudioStreamPlaybackOpus,AudioStreamPlayback) + + enum { + MIN_MIX=1024 + }; + + FileAccess *f; + + OpusFileCallbacks _op_callbacks; + float length; + static int _op_read_func(void *_stream, unsigned char *_ptr, int _nbytes); + static int _op_seek_func(void *_stream, opus_int64 _offset, int _whence); + static int _op_close_func(void *_stream); + static opus_int64 _op_tell_func(void *_stream); + static const float osrate; + + String file; + int64_t frames_mixed; + + bool stream_loaded; + volatile bool playing; + OggOpusFile *opus_file; + int stream_channels; + int current_section; + int pre_skip; + + bool paused; + bool loops; + int repeats; + + Error _load_stream(); + void _clear_stream(); + void _close_file(); + + bool stream_valid; + float loop_restart_time; + +public: + Error set_file(const String& p_file); + + virtual void play(float p_from=0); + virtual void stop(); + virtual bool is_playing() const { return playing; } + + virtual void set_loop_restart_time(float p_time) { loop_restart_time=p_time; } + + virtual void set_paused(bool p_paused) { paused=p_paused; } + virtual bool is_paused() const { return paused; } + + virtual void set_loop(bool p_enable) { loops=p_enable; } + virtual bool has_loop() const {return loops; } + + virtual float get_length() const; + + virtual String get_stream_name() const { return ""; } + + virtual int get_loop_count() const { return repeats; } + + virtual float get_pos() const; + virtual void seek_pos(float p_time); + + virtual int get_channels() const { return stream_channels; } + virtual int get_mix_rate() const { return osrate; } + + virtual int get_minimum_buffer_size() const; + + virtual int mix(int16_t* p_bufer,int p_frames); + + AudioStreamPlaybackOpus(); + ~AudioStreamPlaybackOpus(); +}; + + +class AudioStreamOpus: public AudioStream { + + OBJ_TYPE(AudioStreamOpus,AudioStream) + + String file; +public: + + Ref<AudioStreamPlayback> instance_playback() { + Ref<AudioStreamPlaybackOpus> pb = memnew( AudioStreamPlaybackOpus ); + pb->set_file(file); + return pb; + } + + void set_file(const String& p_file) { file=p_file; } + +}; + +class ResourceFormatLoaderAudioStreamOpus: public ResourceFormatLoader { +public: + virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL); + virtual void get_recognized_extensions(List<String> *p_extensions) const; + virtual bool handles_type(const String& p_type) const; + virtual String get_resource_type(const String &p_path) const; +}; + +#endif // AUDIO_STREAM_OPUS_H diff --git a/drivers/opus/celt/_kiss_fft_guts.h b/drivers/opus/celt/_kiss_fft_guts.h new file mode 100644 index 0000000000..21bea8a9b0 --- /dev/null +++ b/drivers/opus/celt/_kiss_fft_guts.h @@ -0,0 +1,183 @@ +/*Copyright (c) 2003-2004, Mark Borgerding + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE.*/ + +#ifndef KISS_FFT_GUTS_H +#define KISS_FFT_GUTS_H + +#define MIN(a,b) ((a)<(b) ? (a):(b)) +#define MAX(a,b) ((a)>(b) ? (a):(b)) + +/* kiss_fft.h + defines kiss_fft_scalar as either short or a float type + and defines + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ +#include "kiss_fft.h" + +/* + Explanation of macros dealing with complex math: + + C_MUL(m,a,b) : m = a*b + C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise + C_SUB( res, a,b) : res = a - b + C_SUBFROM( res , a) : res -= a + C_ADDTO( res , a) : res += a + * */ +#ifdef OPUS_FIXED_POINT +#include "arch.h" + + +#define SAMP_MAX 2147483647 +#define TWID_MAX 32767 +#define TRIG_UPSCALE 1 + +#define SAMP_MIN -SAMP_MAX + + +# define S_MUL(a,b) MULT16_32_Q15(b, a) + +# define C_MUL(m,a,b) \ + do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ + (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) + +# define C_MULC(m,a,b) \ + do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ + (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) + +# define C_MUL4(m,a,b) \ + do{ (m).r = SHR32(SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)),2); \ + (m).i = SHR32(ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)),2); }while(0) + +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r = S_MUL( (c).r , s ) ;\ + (c).i = S_MUL( (c).i , s ) ; }while(0) + +# define DIVSCALAR(x,k) \ + (x) = S_MUL( x, (TWID_MAX-((k)>>1))/(k)+1 ) + +# define C_FIXDIV(c,div) \ + do { DIVSCALAR( (c).r , div); \ + DIVSCALAR( (c).i , div); }while (0) + +#define C_ADD( res, a,b)\ + do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \ + }while(0) +#define C_SUB( res, a,b)\ + do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \ + }while(0) +#define C_ADDTO( res , a)\ + do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\ + }while(0) + +#define C_SUBFROM( res , a)\ + do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ + }while(0) + +#if defined(OPUS_ARM_INLINE_ASM) +#include "arm/kiss_fft_armv4.h" +#endif + +#if defined(OPUS_ARM_INLINE_EDSP) +#include "arm/kiss_fft_armv5e.h" +#endif + +#else /* not OPUS_FIXED_POINT*/ + +# define S_MUL(a,b) ( (a)*(b) ) +#define C_MUL(m,a,b) \ + do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ + (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) +#define C_MULC(m,a,b) \ + do{ (m).r = (a).r*(b).r + (a).i*(b).i;\ + (m).i = (a).i*(b).r - (a).r*(b).i; }while(0) + +#define C_MUL4(m,a,b) C_MUL(m,a,b) + +# define C_FIXDIV(c,div) /* NOOP */ +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r *= (s);\ + (c).i *= (s); }while(0) +#endif + +#ifndef CHECK_OVERFLOW_OP +# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ +#endif + +#ifndef C_ADD +#define C_ADD( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,+,(b).r)\ + CHECK_OVERFLOW_OP((a).i,+,(b).i)\ + (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ + }while(0) +#define C_SUB( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,-,(b).r)\ + CHECK_OVERFLOW_OP((a).i,-,(b).i)\ + (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ + }while(0) +#define C_ADDTO( res , a)\ + do { \ + CHECK_OVERFLOW_OP((res).r,+,(a).r)\ + CHECK_OVERFLOW_OP((res).i,+,(a).i)\ + (res).r += (a).r; (res).i += (a).i;\ + }while(0) + +#define C_SUBFROM( res , a)\ + do {\ + CHECK_OVERFLOW_OP((res).r,-,(a).r)\ + CHECK_OVERFLOW_OP((res).i,-,(a).i)\ + (res).r -= (a).r; (res).i -= (a).i; \ + }while(0) +#endif /* C_ADD defined */ + +#ifdef OPUS_FIXED_POINT +# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase)))) +# define KISS_FFT_SIN(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase)))) +# define KISS_FFT_COS(phase) floor(.5+TWID_MAX*cos (phase)) +# define KISS_FFT_SIN(phase) floor(.5+TWID_MAX*sin (phase)) +# define HALF_OF(x) ((x)>>1) +#elif defined(USE_SIMD) +# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) +# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) +# define HALF_OF(x) ((x)*_mm_set1_ps(.5f)) +#else +# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) +# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) +# define HALF_OF(x) ((x)*.5f) +#endif + +#define kf_cexp(x,phase) \ + do{ \ + (x)->r = KISS_FFT_COS(phase);\ + (x)->i = KISS_FFT_SIN(phase);\ + }while(0) + +#define kf_cexp2(x,phase) \ + do{ \ + (x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\ + (x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\ +}while(0) + +#endif /* KISS_FFT_GUTS_H */ diff --git a/drivers/opus/celt/arch.h b/drivers/opus/celt/arch.h new file mode 100644 index 0000000000..83e3705000 --- /dev/null +++ b/drivers/opus/celt/arch.h @@ -0,0 +1,214 @@ +/* Copyright (c) 2003-2008 Jean-Marc Valin + Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/** + @file arch.h + @brief Various architecture definitions for CELT +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef ARCH_H +#define ARCH_H + +#include "opus_types.h" +#include "opus_defines.h" + +# if !defined(__GNUC_PREREQ) +# if defined(__GNUC__)&&defined(__GNUC_MINOR__) +# define __GNUC_PREREQ(_maj,_min) \ + ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) +# else +# define __GNUC_PREREQ(_maj,_min) 0 +# endif +# endif + +#define CELT_SIG_SCALE 32768.f + +#define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__); +#ifdef ENABLE_ASSERTIONS +#include <stdio.h> +#include <stdlib.h> +#ifdef __GNUC__ +__attribute__((noreturn)) +#endif +static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line) +{ + fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); + abort(); +} +#define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}} +#define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}} +#else +#define celt_assert(cond) +#define celt_assert2(cond, message) +#endif + +#define IMUL32(a,b) ((a)*(b)) + +#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ +#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ +#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */ +#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ +#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ +#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */ +#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ +#define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */ +#define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */ +#define UADD32(a,b) ((a)+(b)) +#define USUB32(a,b) ((a)-(b)) + +#define PRINT_MIPS(file) + +#ifdef OPUS_FIXED_POINT + +typedef opus_int16 opus_val16; +typedef opus_int32 opus_val32; + +typedef opus_val32 celt_sig; +typedef opus_val16 celt_norm; +typedef opus_val32 celt_ener; + +#define Q15ONE 32767 + +#define SIG_SHIFT 12 + +#define NORM_SCALING 16384 + +#define DB_SHIFT 10 + +#define EPSILON 1 +#define VERY_SMALL 0 +#define VERY_LARGE16 ((opus_val16)32767) +#define Q15_ONE ((opus_val16)32767) + +#define SCALEIN(a) (a) +#define SCALEOUT(a) (a) + +#ifdef FIXED_DEBUG +#include "fixed_debug.h" +#else + +#include "fixed_generic.h" + +#ifdef OPUS_ARM_INLINE_EDSP +#include "arm/fixed_armv5e.h" +#elif defined (OPUS_ARM_INLINE_ASM) +#include "arm/fixed_armv4.h" +#elif defined (BFIN_ASM) +#include "fixed_bfin.h" +#elif defined (TI_C5X_ASM) +#include "fixed_c5x.h" +#elif defined (TI_C6X_ASM) +#include "fixed_c6x.h" +#endif + +#endif + +#else /* OPUS_FIXED_POINT */ + +typedef float opus_val16; +typedef float opus_val32; + +typedef float celt_sig; +typedef float celt_norm; +typedef float celt_ener; + +#define Q15ONE 1.0f + +#define NORM_SCALING 1.f + +#define EPSILON 1e-15f +#define VERY_SMALL 1e-30f +#define VERY_LARGE16 1e15f +#define Q15_ONE ((opus_val16)1.f) + +#define QCONST16(x,bits) (x) +#define QCONST32(x,bits) (x) + +#define NEG16(x) (-(x)) +#define NEG32(x) (-(x)) +#define EXTRACT16(x) (x) +#define EXTEND32(x) (x) +#define SHR16(a,shift) (a) +#define SHL16(a,shift) (a) +#define SHR32(a,shift) (a) +#define SHL32(a,shift) (a) +#define PSHR32(a,shift) (a) +#define VSHR32(a,shift) (a) + +#define PSHR(a,shift) (a) +#define SHR(a,shift) (a) +#define SHL(a,shift) (a) +#define SATURATE(x,a) (x) +#define SATURATE16(x) (x) + +#define ROUND16(a,shift) (a) +#define HALF16(x) (.5f*(x)) +#define HALF32(x) (.5f*(x)) + +#define ADD16(a,b) ((a)+(b)) +#define SUB16(a,b) ((a)-(b)) +#define ADD32(a,b) ((a)+(b)) +#define SUB32(a,b) ((a)-(b)) +#define MULT16_16_16(a,b) ((a)*(b)) +#define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) +#define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) + +#define MULT16_32_Q15(a,b) ((a)*(b)) +#define MULT16_32_Q16(a,b) ((a)*(b)) + +#define MULT32_32_Q31(a,b) ((a)*(b)) + +#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) + +#define MULT16_16_Q11_32(a,b) ((a)*(b)) +#define MULT16_16_Q11(a,b) ((a)*(b)) +#define MULT16_16_Q13(a,b) ((a)*(b)) +#define MULT16_16_Q14(a,b) ((a)*(b)) +#define MULT16_16_Q15(a,b) ((a)*(b)) +#define MULT16_16_P15(a,b) ((a)*(b)) +#define MULT16_16_P13(a,b) ((a)*(b)) +#define MULT16_16_P14(a,b) ((a)*(b)) +#define MULT16_32_P16(a,b) ((a)*(b)) + +#define DIV32_16(a,b) (((opus_val32)(a))/(opus_val16)(b)) +#define DIV32(a,b) (((opus_val32)(a))/(opus_val32)(b)) + +#define SCALEIN(a) ((a)*CELT_SIG_SCALE) +#define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE)) + +#endif /* !OPUS_FIXED_POINT */ + +#ifndef GLOBAL_STACK_SIZE +#ifdef OPUS_FIXED_POINT +#define GLOBAL_STACK_SIZE 100000 +#else +#define GLOBAL_STACK_SIZE 100000 +#endif +#endif + +#endif /* ARCH_H */ diff --git a/drivers/opus/celt/arm/arm2gnu.pl b/drivers/opus/celt/arm/arm2gnu.pl new file mode 100755 index 0000000000..eab42efa2b --- /dev/null +++ b/drivers/opus/celt/arm/arm2gnu.pl @@ -0,0 +1,316 @@ +#!/usr/bin/perl + +my $bigend; # little/big endian +my $nxstack; + +$nxstack = 0; + +eval 'exec /usr/local/bin/perl -S $0 ${1+"$@"}' + if $running_under_some_shell; + +while ($ARGV[0] =~ /^-/) { + $_ = shift; + last if /^--/; + if (/^-n/) { + $nflag++; + next; + } + die "I don't recognize this switch: $_\\n"; +} +$printit++ unless $nflag; + +$\ = "\n"; # automatically add newline on print +$n=0; + +$thumb = 0; # ARM mode by default, not Thumb. +@proc_stack = (); + +LINE: +while (<>) { + + # For ADRLs we need to add a new line after the substituted one. + $addPadding = 0; + + # First, we do not dare to touch *anything* inside double quotes, do we? + # Second, if you want a dollar character in the string, + # insert two of them -- that's how ARM C and assembler treat strings. + s/^([A-Za-z_]\w*)[ \t]+DCB[ \t]*\"/$1: .ascii \"/ && do { s/\$\$/\$/g; next }; + s/\bDCB\b[ \t]*\"/.ascii \"/ && do { s/\$\$/\$/g; next }; + s/^(\S+)\s+RN\s+(\S+)/$1 .req r$2/ && do { s/\$\$/\$/g; next }; + # If there's nothing on a line but a comment, don't try to apply any further + # substitutions (this is a cheap hack to avoid mucking up the license header) + s/^([ \t]*);/$1@/ && do { s/\$\$/\$/g; next }; + # If substituted -- leave immediately ! + + s/@/,:/; + s/;/@/; + while ( /@.*'/ ) { + s/(@.*)'/$1/g; + } + s/\{FALSE\}/0/g; + s/\{TRUE\}/1/g; + s/\{(\w\w\w\w+)\}/$1/g; + s/\bINCLUDE[ \t]*([^ \t\n]+)/.include \"$1\"/; + s/\bGET[ \t]*([^ \t\n]+)/.include \"${ my $x=$1; $x =~ s|\.s|-gnu.S|; \$x }\"/; + s/\bIMPORT\b/.extern/; + s/\bEXPORT\b/.global/; + s/^(\s+)\[/$1IF/; + s/^(\s+)\|/$1ELSE/; + s/^(\s+)\]/$1ENDIF/; + s/IF *:DEF:/ .ifdef/; + s/IF *:LNOT: *:DEF:/ .ifndef/; + s/ELSE/ .else/; + s/ENDIF/ .endif/; + + if( /\bIF\b/ ) { + s/\bIF\b/ .if/; + s/=/==/; + } + if ( $n == 2) { + s/\$/\\/g; + } + if ($n == 1) { + s/\$//g; + s/label//g; + $n = 2; + } + if ( /MACRO/ ) { + s/MACRO *\n/.macro/; + $n=1; + } + if ( /\bMEND\b/ ) { + s/\bMEND\b/.endm/; + $n=0; + } + + # ".rdata" doesn't work in 'as' version 2.13.2, as it is ".rodata" there. + # + if ( /\bAREA\b/ ) { + my $align; + $align = "2"; + if ( /ALIGN=(\d+)/ ) { + $align = $1; + } + if ( /CODE/ ) { + $nxstack = 1; + } + s/^(.+)CODE(.+)READONLY(.*)/ .text/; + s/^(.+)DATA(.+)READONLY(.*)/ .section .rdata/; + s/^(.+)\|\|\.data\|\|(.+)/ .data/; + s/^(.+)\|\|\.bss\|\|(.+)/ .bss/; + s/$/; .p2align $align/; + # Enable NEON instructions but don't produce a binary that requires + # ARMv7. RVCT does not have equivalent directives, so we just do this + # for all CODE areas. + if ( /.text/ ) { + # Separating .arch, .fpu, etc., by semicolons does not work (gas + # thinks the semicolon is part of the arch name, even when there's + # whitespace separating them). Sadly this means our line numbers + # won't match the original source file (we could use the .line + # directive, which is documented to be obsolete, but then gdb will + # show the wrong line in the translated source file). + s/$/; .arch armv7-a\n .fpu neon\n .object_arch armv4t/; + } + } + + s/\|\|\.constdata\$(\d+)\|\|/.L_CONST$1/; # ||.constdata$3|| + s/\|\|\.bss\$(\d+)\|\|/.L_BSS$1/; # ||.bss$2|| + s/\|\|\.data\$(\d+)\|\|/.L_DATA$1/; # ||.data$2|| + s/\|\|([a-zA-Z0-9_]+)\@([a-zA-Z0-9_]+)\|\|/@ $&/; + s/^(\s+)\%(\s)/ .space $1/; + + s/\|(.+)\.(\d+)\|/\.$1_$2/; # |L80.123| -> .L80_123 + s/\bCODE32\b/.code 32/ && do {$thumb = 0}; + s/\bCODE16\b/.code 16/ && do {$thumb = 1}; + if (/\bPROC\b/) + { + my $prefix; + my $proc; + /^([A-Za-z_\.]\w+)\b/; + $proc = $1; + $prefix = ""; + if ($proc) + { + $prefix = $prefix.sprintf("\t.type\t%s, %%function; ",$proc); + push(@proc_stack, $proc); + s/^[A-Za-z_\.]\w+/$&:/; + } + $prefix = $prefix."\t.thumb_func; " if ($thumb); + s/\bPROC\b/@ $&/; + $_ = $prefix.$_; + } + s/^(\s*)(S|Q|SH|U|UQ|UH)ASX\b/$1$2ADDSUBX/; + s/^(\s*)(S|Q|SH|U|UQ|UH)SAX\b/$1$2SUBADDX/; + if (/\bENDP\b/) + { + my $proc; + s/\bENDP\b/@ $&/; + $proc = pop(@proc_stack); + $_ = "\t.size $proc, .-$proc".$_ if ($proc); + } + s/\bSUBT\b/@ $&/; + s/\bDATA\b/@ $&/; # DATA directive is deprecated -- Asm guide, p.7-25 + s/\bKEEP\b/@ $&/; + s/\bEXPORTAS\b/@ $&/; + s/\|\|(.)+\bEQU\b/@ $&/; + s/\|\|([\w\$]+)\|\|/$1/; + s/\bENTRY\b/@ $&/; + s/\bASSERT\b/@ $&/; + s/\bGBLL\b/@ $&/; + s/\bGBLA\b/@ $&/; + s/^\W+OPT\b/@ $&/; + s/:OR:/|/g; + s/:SHL:/<</g; + s/:SHR:/>>/g; + s/:AND:/&/g; + s/:LAND:/&&/g; + s/CPSR/cpsr/; + s/SPSR/spsr/; + s/ALIGN$/.balign 4/; + s/ALIGN\s+([0-9x]+)$/.balign $1/; + s/psr_cxsf/psr_all/; + s/LTORG/.ltorg/; + s/^([A-Za-z_]\w*)[ \t]+EQU/ .set $1,/; + s/^([A-Za-z_]\w*)[ \t]+SETL/ .set $1,/; + s/^([A-Za-z_]\w*)[ \t]+SETA/ .set $1,/; + s/^([A-Za-z_]\w*)[ \t]+\*/ .set $1,/; + + # {PC} + 0xdeadfeed --> . + 0xdeadfeed + s/\{PC\} \+/ \. +/; + + # Single hex constant on the line ! + # + # >>> NOTE <<< + # Double-precision floats in gcc are always mixed-endian, which means + # bytes in two words are little-endian, but words are big-endian. + # So, 0x0000deadfeed0000 would be stored as 0x0000dead at low address + # and 0xfeed0000 at high address. + # + s/\bDCFD\b[ \t]+0x([a-fA-F0-9]{8})([a-fA-F0-9]{8})/.long 0x$1, 0x$2/; + # Only decimal constants on the line, no hex ! + s/\bDCFD\b[ \t]+([0-9\.\-]+)/.double $1/; + + # Single hex constant on the line ! +# s/\bDCFS\b[ \t]+0x([a-f0-9]{8})([a-f0-9]{8})/.long 0x$1, 0x$2/; + # Only decimal constants on the line, no hex ! +# s/\bDCFS\b[ \t]+([0-9\.\-]+)/.double $1/; + s/\bDCFS[ \t]+0x/.word 0x/; + s/\bDCFS\b/.float/; + + s/^([A-Za-z_]\w*)[ \t]+DCD/$1 .word/; + s/\bDCD\b/.word/; + s/^([A-Za-z_]\w*)[ \t]+DCW/$1 .short/; + s/\bDCW\b/.short/; + s/^([A-Za-z_]\w*)[ \t]+DCB/$1 .byte/; + s/\bDCB\b/.byte/; + s/^([A-Za-z_]\w*)[ \t]+\%/.comm $1,/; + s/^[A-Za-z_\.]\w+/$&:/; + s/^(\d+)/$1:/; + s/\%(\d+)/$1b_or_f/; + s/\%[Bb](\d+)/$1b/; + s/\%[Ff](\d+)/$1f/; + s/\%[Ff][Tt](\d+)/$1f/; + s/&([\dA-Fa-f]+)/0x$1/; + if ( /\b2_[01]+\b/ ) { + s/\b2_([01]+)\b/conv$1&&&&/g; + while ( /[01][01][01][01]&&&&/ ) { + s/0000&&&&/&&&&0/g; + s/0001&&&&/&&&&1/g; + s/0010&&&&/&&&&2/g; + s/0011&&&&/&&&&3/g; + s/0100&&&&/&&&&4/g; + s/0101&&&&/&&&&5/g; + s/0110&&&&/&&&&6/g; + s/0111&&&&/&&&&7/g; + s/1000&&&&/&&&&8/g; + s/1001&&&&/&&&&9/g; + s/1010&&&&/&&&&A/g; + s/1011&&&&/&&&&B/g; + s/1100&&&&/&&&&C/g; + s/1101&&&&/&&&&D/g; + s/1110&&&&/&&&&E/g; + s/1111&&&&/&&&&F/g; + } + s/000&&&&/&&&&0/g; + s/001&&&&/&&&&1/g; + s/010&&&&/&&&&2/g; + s/011&&&&/&&&&3/g; + s/100&&&&/&&&&4/g; + s/101&&&&/&&&&5/g; + s/110&&&&/&&&&6/g; + s/111&&&&/&&&&7/g; + s/00&&&&/&&&&0/g; + s/01&&&&/&&&&1/g; + s/10&&&&/&&&&2/g; + s/11&&&&/&&&&3/g; + s/0&&&&/&&&&0/g; + s/1&&&&/&&&&1/g; + s/conv&&&&/0x/g; + } + + if ( /commandline/) + { + if( /-bigend/) + { + $bigend=1; + } + } + + if ( /\bDCDU\b/ ) + { + my $cmd=$_; + my $value; + my $prefix; + my $w1; + my $w2; + my $w3; + my $w4; + + s/\s+DCDU\b/@ $&/; + + $cmd =~ /\bDCDU\b\s+0x(\d+)/; + $value = $1; + $value =~ /(\w\w)(\w\w)(\w\w)(\w\w)/; + $w1 = $1; + $w2 = $2; + $w3 = $3; + $w4 = $4; + + if( $bigend ne "") + { + # big endian + $prefix = "\t.byte\t0x".$w1.";". + "\t.byte\t0x".$w2.";". + "\t.byte\t0x".$w3.";". + "\t.byte\t0x".$w4."; "; + } + else + { + # little endian + $prefix = "\t.byte\t0x".$w4.";". + "\t.byte\t0x".$w3.";". + "\t.byte\t0x".$w2.";". + "\t.byte\t0x".$w1."; "; + } + $_=$prefix.$_; + } + + if ( /\badrl\b/i ) + { + s/\badrl\s+(\w+)\s*,\s*(\w+)/ldr $1,=$2/i; + $addPadding = 1; + } + s/\bEND\b/@ END/; +} continue { + printf ("%s", $_) if $printit; + if ($addPadding != 0) + { + printf (" mov r0,r0\n"); + $addPadding = 0; + } +} +#If we had a code section, mark that this object doesn't need an executable +# stack. +if ($nxstack) { + printf (" .section\t.note.GNU-stack,\"\",\%\%progbits\n"); +} diff --git a/drivers/opus/celt/arm/arm_celt_map.c b/drivers/opus/celt/arm/arm_celt_map.c new file mode 100644 index 0000000000..b187345154 --- /dev/null +++ b/drivers/opus/celt/arm/arm_celt_map.c @@ -0,0 +1,49 @@ +/* Copyright (c) 2010 Xiph.Org Foundation + * Copyright (c) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "pitch.h" + +#if defined(OPUS_HAVE_RTCD) + +# if defined(OPUS_FIXED_POINT) +opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int , int) = { + celt_pitch_xcorr_c, /* ARMv4 */ + MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */ + MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ + MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ +}; +# else +# error "Floating-point implementation is not supported by ARM asm yet." \ + "Reconfigure with --disable-rtcd or send patches." +# endif + +#endif diff --git a/drivers/opus/celt/arm/armcpu.c b/drivers/opus/celt/arm/armcpu.c new file mode 100644 index 0000000000..7f0af631b9 --- /dev/null +++ b/drivers/opus/celt/arm/armcpu.c @@ -0,0 +1,174 @@ +/* Copyright (c) 2010 Xiph.Org Foundation + * Copyright (c) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Original code from libtheora modified to suit to Opus */ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#ifdef OPUS_HAVE_RTCD + +#include "armcpu.h" +#include "cpu_support.h" +#include "os_support.h" +#include "opus_types.h" + +#define OPUS_CPU_ARM_V4 (1) +#define OPUS_CPU_ARM_EDSP (1<<1) +#define OPUS_CPU_ARM_MEDIA (1<<2) +#define OPUS_CPU_ARM_NEON (1<<3) + +#if defined(_MSC_VER) +/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ +# define WIN32_LEAN_AND_MEAN +# define WIN32_EXTRA_LEAN +# include <windows.h> + +static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){ + opus_uint32 flags; + flags=0; + /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit + * instructions via their assembled hex code. + * All of these instructions should be essentially nops. */ +# if defined(OPUS_ARM_MAY_HAVE_EDSP) + __try{ + /*PLD [r13]*/ + __emit(0xF5DDF000); + flags|=OPUS_CPU_ARM_EDSP; + } + __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ + /*Ignore exception.*/ + } +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) + __try{ + /*SHADD8 r3,r3,r3*/ + __emit(0xE6333F93); + flags|=OPUS_CPU_ARM_MEDIA; + } + __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ + /*Ignore exception.*/ + } +# if defined(OPUS_ARM_MAY_HAVE_NEON) + __try{ + /*VORR q0,q0,q0*/ + __emit(0xF2200150); + flags|=OPUS_CPU_ARM_NEON; + } + __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ + /*Ignore exception.*/ + } +# endif +# endif +# endif + return flags; +} + +#elif defined(__linux__) +/* Linux based */ +opus_uint32 opus_cpu_capabilities(void) +{ + opus_uint32 flags = 0; + FILE *cpuinfo; + + /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on + * Android */ + cpuinfo = fopen("/proc/cpuinfo", "r"); + + if(cpuinfo != NULL) + { + /* 512 should be enough for anybody (it's even enough for all the flags that + * x86 has accumulated... so far). */ + char buf[512]; + + while(fgets(buf, 512, cpuinfo) != NULL) + { +# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) + /* Search for edsp and neon flag */ + if(memcmp(buf, "Features", 8) == 0) + { + char *p; +# if defined(OPUS_ARM_MAY_HAVE_EDSP) + p = strstr(buf, " edsp"); + if(p != NULL && (p[5] == ' ' || p[5] == '\n')) + flags |= OPUS_CPU_ARM_EDSP; +# endif + +# if defined(OPUS_ARM_MAY_HAVE_NEON) + p = strstr(buf, " neon"); + if(p != NULL && (p[5] == ' ' || p[5] == '\n')) + flags |= OPUS_CPU_ARM_NEON; +# endif + } +# endif + +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) + /* Search for media capabilities (>= ARMv6) */ + if(memcmp(buf, "CPU architecture:", 17) == 0) + { + int version; + version = atoi(buf+17); + + if(version >= 6) + flags |= OPUS_CPU_ARM_MEDIA; + } +# endif + } + + fclose(cpuinfo); + } + return flags; +} +#else +/* The feature registers which can tell us what the processor supports are + * accessible in priveleged modes only, so we can't have a general user-space + * detection method like on x86.*/ +# error "Configured to use ARM asm but no CPU detection method available for " \ + "your platform. Reconfigure with --disable-rtcd (or send patches)." +#endif + +int opus_select_arch(void) +{ + opus_uint32 flags = opus_cpu_capabilities(); + int arch = 0; + + if(!(flags & OPUS_CPU_ARM_EDSP)) + return arch; + arch++; + + if(!(flags & OPUS_CPU_ARM_MEDIA)) + return arch; + arch++; + + if(!(flags & OPUS_CPU_ARM_NEON)) + return arch; + arch++; + + return arch; +} + +#endif diff --git a/drivers/opus/celt/arm/armcpu.h b/drivers/opus/celt/arm/armcpu.h new file mode 100644 index 0000000000..ac5744606e --- /dev/null +++ b/drivers/opus/celt/arm/armcpu.h @@ -0,0 +1,71 @@ +/* Copyright (c) 2010 Xiph.Org Foundation + * Copyright (c) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(ARMCPU_H) +# define ARMCPU_H + +# if defined(OPUS_ARM_MAY_HAVE_EDSP) +# define MAY_HAVE_EDSP(name) name ## _edsp +# else +# define MAY_HAVE_EDSP(name) name ## _c +# endif + +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) +# define MAY_HAVE_MEDIA(name) name ## _media +# else +# define MAY_HAVE_MEDIA(name) MAY_HAVE_EDSP(name) +# endif + +# if defined(OPUS_ARM_MAY_HAVE_NEON) +# define MAY_HAVE_NEON(name) name ## _neon +# else +# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name) +# endif + +# if defined(OPUS_ARM_PRESUME_EDSP) +# define PRESUME_EDSP(name) name ## _edsp +# else +# define PRESUME_EDSP(name) name ## _c +# endif + +# if defined(OPUS_ARM_PRESUME_MEDIA) +# define PRESUME_MEDIA(name) name ## _media +# else +# define PRESUME_MEDIA(name) PRESUME_EDSP(name) +# endif + +# if defined(OPUS_ARM_PRESUME_NEON) +# define PRESUME_NEON(name) name ## _neon +# else +# define PRESUME_NEON(name) PRESUME_MEDIA(name) +# endif + +# if defined(OPUS_HAVE_RTCD) +int opus_select_arch(void); +# endif + +#endif diff --git a/drivers/opus/celt/arm/armopts.s b/drivers/opus/celt/arm/armopts.s new file mode 100644 index 0000000000..fb9196072a --- /dev/null +++ b/drivers/opus/celt/arm/armopts.s @@ -0,0 +1,37 @@ +/* Copyright (C) 2013 Mozilla Corporation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +; Set the following to 1 if we have EDSP instructions +; (LDRD/STRD, etc., ARMv5E and later). +OPUS_ARM_MAY_HAVE_EDSP * + +; Set the following to 1 if we have ARMv6 media instructions. +OPUS_ARM_MAY_HAVE_MEDIA * + +; Set the following to 1 if we have NEON (some ARMv7) +OPUS_ARM_MAY_HAVE_NEON * + +END diff --git a/drivers/opus/celt/arm/armopts.s.in b/drivers/opus/celt/arm/armopts.s.in new file mode 100644 index 0000000000..3d8aaf2754 --- /dev/null +++ b/drivers/opus/celt/arm/armopts.s.in @@ -0,0 +1,37 @@ +/* Copyright (C) 2013 Mozilla Corporation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +; Set the following to 1 if we have EDSP instructions +; (LDRD/STRD, etc., ARMv5E and later). +OPUS_ARM_MAY_HAVE_EDSP * @OPUS_ARM_MAY_HAVE_EDSP@ + +; Set the following to 1 if we have ARMv6 media instructions. +OPUS_ARM_MAY_HAVE_MEDIA * @OPUS_ARM_MAY_HAVE_MEDIA@ + +; Set the following to 1 if we have NEON (some ARMv7) +OPUS_ARM_MAY_HAVE_NEON * @OPUS_ARM_MAY_HAVE_NEON@ + +END diff --git a/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s b/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s new file mode 100644 index 0000000000..09917b16bf --- /dev/null +++ b/drivers/opus/celt/arm/celt_pitch_xcorr_arm.s @@ -0,0 +1,545 @@ +; Copyright (c) 2007-2008 CSIRO +; Copyright (c) 2007-2009 Xiph.Org Foundation +; Copyright (c) 2013 Parrot +; Written by Aurélien Zanelli +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; +; - Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; +; - Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in the +; documentation and/or other materials provided with the distribution. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + AREA |.text|, CODE, READONLY + + GET celt/arm/armopts.s + +IF OPUS_ARM_MAY_HAVE_EDSP + EXPORT celt_pitch_xcorr_edsp +ENDIF + +IF OPUS_ARM_MAY_HAVE_NEON + EXPORT celt_pitch_xcorr_neon +ENDIF + +IF OPUS_ARM_MAY_HAVE_NEON + +; Compute sum[k]=sum(x[j]*y[j+k],j=0...len-1), k=0...3 +xcorr_kernel_neon PROC + ; input: + ; r3 = int len + ; r4 = opus_val16 *x + ; r5 = opus_val16 *y + ; q0 = opus_val32 sum[4] + ; output: + ; q0 = opus_val32 sum[4] + ; preserved: r0-r3, r6-r11, d2, q4-q7, q9-q15 + ; internal usage: + ; r12 = int j + ; d3 = y_3|y_2|y_1|y_0 + ; q2 = y_B|y_A|y_9|y_8|y_7|y_6|y_5|y_4 + ; q3 = x_7|x_6|x_5|x_4|x_3|x_2|x_1|x_0 + ; q8 = scratch + ; + ; Load y[0...3] + ; This requires len>0 to always be valid (which we assert in the C code). + VLD1.16 {d5}, [r5]! + SUBS r12, r3, #8 + BLE xcorr_kernel_neon_process4 +; Process 8 samples at a time. +; This loop loads one y value more than we actually need. Therefore we have to +; stop as soon as there are 8 or fewer samples left (instead of 7), to avoid +; reading past the end of the array. +xcorr_kernel_neon_process8 + ; This loop has 19 total instructions (10 cycles to issue, minimum), with + ; - 2 cycles of ARM insrtuctions, + ; - 10 cycles of load/store/byte permute instructions, and + ; - 9 cycles of data processing instructions. + ; On a Cortex A8, we dual-issue the maximum amount (9 cycles) between the + ; latter two categories, meaning the whole loop should run in 10 cycles per + ; iteration, barring cache misses. + ; + ; Load x[0...7] + VLD1.16 {d6, d7}, [r4]! + ; Unlike VMOV, VAND is a data processsing instruction (and doesn't get + ; assembled to VMOV, like VORR would), so it dual-issues with the prior VLD1. + VAND d3, d5, d5 + SUBS r12, r12, #8 + ; Load y[4...11] + VLD1.16 {d4, d5}, [r5]! + VMLAL.S16 q0, d3, d6[0] + VEXT.16 d16, d3, d4, #1 + VMLAL.S16 q0, d4, d7[0] + VEXT.16 d17, d4, d5, #1 + VMLAL.S16 q0, d16, d6[1] + VEXT.16 d16, d3, d4, #2 + VMLAL.S16 q0, d17, d7[1] + VEXT.16 d17, d4, d5, #2 + VMLAL.S16 q0, d16, d6[2] + VEXT.16 d16, d3, d4, #3 + VMLAL.S16 q0, d17, d7[2] + VEXT.16 d17, d4, d5, #3 + VMLAL.S16 q0, d16, d6[3] + VMLAL.S16 q0, d17, d7[3] + BGT xcorr_kernel_neon_process8 +; Process 4 samples here if we have > 4 left (still reading one extra y value). +xcorr_kernel_neon_process4 + ADDS r12, r12, #4 + BLE xcorr_kernel_neon_process2 + ; Load x[0...3] + VLD1.16 d6, [r4]! + ; Use VAND since it's a data processing instruction again. + VAND d4, d5, d5 + SUB r12, r12, #4 + ; Load y[4...7] + VLD1.16 d5, [r5]! + VMLAL.S16 q0, d4, d6[0] + VEXT.16 d16, d4, d5, #1 + VMLAL.S16 q0, d16, d6[1] + VEXT.16 d16, d4, d5, #2 + VMLAL.S16 q0, d16, d6[2] + VEXT.16 d16, d4, d5, #3 + VMLAL.S16 q0, d16, d6[3] +; Process 2 samples here if we have > 2 left (still reading one extra y value). +xcorr_kernel_neon_process2 + ADDS r12, r12, #2 + BLE xcorr_kernel_neon_process1 + ; Load x[0...1] + VLD2.16 {d6[],d7[]}, [r4]! + ; Use VAND since it's a data processing instruction again. + VAND d4, d5, d5 + SUB r12, r12, #2 + ; Load y[4...5] + VLD1.32 {d5[]}, [r5]! + VMLAL.S16 q0, d4, d6 + VEXT.16 d16, d4, d5, #1 + ; Replace bottom copy of {y5,y4} in d5 with {y3,y2} from d4, using VSRI + ; instead of VEXT, since it's a data-processing instruction. + VSRI.64 d5, d4, #32 + VMLAL.S16 q0, d16, d7 +; Process 1 sample using the extra y value we loaded above. +xcorr_kernel_neon_process1 + ; Load next *x + VLD1.16 {d6[]}, [r4]! + ADDS r12, r12, #1 + ; y[0...3] are left in d5 from prior iteration(s) (if any) + VMLAL.S16 q0, d5, d6 + MOVLE pc, lr +; Now process 1 last sample, not reading ahead. + ; Load last *y + VLD1.16 {d4[]}, [r5]! + VSRI.64 d4, d5, #16 + ; Load last *x + VLD1.16 {d6[]}, [r4]! + VMLAL.S16 q0, d4, d6 + MOV pc, lr + ENDP + +; opus_val32 celt_pitch_xcorr_neon(opus_val16 *_x, opus_val16 *_y, +; opus_val32 *xcorr, int len, int max_pitch) +celt_pitch_xcorr_neon PROC + ; input: + ; r0 = opus_val16 *_x + ; r1 = opus_val16 *_y + ; r2 = opus_val32 *xcorr + ; r3 = int len + ; output: + ; r0 = int maxcorr + ; internal usage: + ; r4 = opus_val16 *x (for xcorr_kernel_neon()) + ; r5 = opus_val16 *y (for xcorr_kernel_neon()) + ; r6 = int max_pitch + ; r12 = int j + ; q15 = int maxcorr[4] (q15 is not used by xcorr_kernel_neon()) + STMFD sp!, {r4-r6, lr} + LDR r6, [sp, #16] + VMOV.S32 q15, #1 + ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done + SUBS r6, r6, #4 + BLT celt_pitch_xcorr_neon_process4_done +celt_pitch_xcorr_neon_process4 + ; xcorr_kernel_neon parameters: + ; r3 = len, r4 = _x, r5 = _y, q0 = {0, 0, 0, 0} + MOV r4, r0 + MOV r5, r1 + VEOR q0, q0, q0 + ; xcorr_kernel_neon only modifies r4, r5, r12, and q0...q3. + ; So we don't save/restore any other registers. + BL xcorr_kernel_neon + SUBS r6, r6, #4 + VST1.32 {q0}, [r2]! + ; _y += 4 + ADD r1, r1, #8 + VMAX.S32 q15, q15, q0 + ; if (max_pitch < 4) goto celt_pitch_xcorr_neon_process4_done + BGE celt_pitch_xcorr_neon_process4 +; We have less than 4 sums left to compute. +celt_pitch_xcorr_neon_process4_done + ADDS r6, r6, #4 + ; Reduce maxcorr to a single value + VMAX.S32 d30, d30, d31 + VPMAX.S32 d30, d30, d30 + ; if (max_pitch <= 0) goto celt_pitch_xcorr_neon_done + BLE celt_pitch_xcorr_neon_done +; Now compute each remaining sum one at a time. +celt_pitch_xcorr_neon_process_remaining + MOV r4, r0 + MOV r5, r1 + VMOV.I32 q0, #0 + SUBS r12, r3, #8 + BLT celt_pitch_xcorr_neon_process_remaining4 +; Sum terms 8 at a time. +celt_pitch_xcorr_neon_process_remaining_loop8 + ; Load x[0...7] + VLD1.16 {q1}, [r4]! + ; Load y[0...7] + VLD1.16 {q2}, [r5]! + SUBS r12, r12, #8 + VMLAL.S16 q0, d4, d2 + VMLAL.S16 q0, d5, d3 + BGE celt_pitch_xcorr_neon_process_remaining_loop8 +; Sum terms 4 at a time. +celt_pitch_xcorr_neon_process_remaining4 + ADDS r12, r12, #4 + BLT celt_pitch_xcorr_neon_process_remaining4_done + ; Load x[0...3] + VLD1.16 {d2}, [r4]! + ; Load y[0...3] + VLD1.16 {d3}, [r5]! + SUB r12, r12, #4 + VMLAL.S16 q0, d3, d2 +celt_pitch_xcorr_neon_process_remaining4_done + ; Reduce the sum to a single value. + VADD.S32 d0, d0, d1 + VPADDL.S32 d0, d0 + ADDS r12, r12, #4 + BLE celt_pitch_xcorr_neon_process_remaining_loop_done +; Sum terms 1 at a time. +celt_pitch_xcorr_neon_process_remaining_loop1 + VLD1.16 {d2[]}, [r4]! + VLD1.16 {d3[]}, [r5]! + SUBS r12, r12, #1 + VMLAL.S16 q0, d2, d3 + BGT celt_pitch_xcorr_neon_process_remaining_loop1 +celt_pitch_xcorr_neon_process_remaining_loop_done + VST1.32 {d0[0]}, [r2]! + VMAX.S32 d30, d30, d0 + SUBS r6, r6, #1 + ; _y++ + ADD r1, r1, #2 + ; if (--max_pitch > 0) goto celt_pitch_xcorr_neon_process_remaining + BGT celt_pitch_xcorr_neon_process_remaining +celt_pitch_xcorr_neon_done + VMOV.32 r0, d30[0] + LDMFD sp!, {r4-r6, pc} + ENDP + +ENDIF + +IF OPUS_ARM_MAY_HAVE_EDSP + +; This will get used on ARMv7 devices without NEON, so it has been optimized +; to take advantage of dual-issuing where possible. +xcorr_kernel_edsp PROC + ; input: + ; r3 = int len + ; r4 = opus_val16 *_x (must be 32-bit aligned) + ; r5 = opus_val16 *_y (must be 32-bit aligned) + ; r6...r9 = opus_val32 sum[4] + ; output: + ; r6...r9 = opus_val32 sum[4] + ; preserved: r0-r5 + ; internal usage + ; r2 = int j + ; r12,r14 = opus_val16 x[4] + ; r10,r11 = opus_val16 y[4] + STMFD sp!, {r2,r4,r5,lr} + LDR r10, [r5], #4 ; Load y[0...1] + SUBS r2, r3, #4 ; j = len-4 + LDR r11, [r5], #4 ; Load y[2...3] + BLE xcorr_kernel_edsp_process4_done + LDR r12, [r4], #4 ; Load x[0...1] + ; Stall +xcorr_kernel_edsp_process4 + ; The multiplies must issue from pipeline 0, and can't dual-issue with each + ; other. Every other instruction here dual-issues with a multiply, and is + ; thus "free". There should be no stalls in the body of the loop. + SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_0,y_0) + LDR r14, [r4], #4 ; Load x[2...3] + SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x_0,y_1) + SUBS r2, r2, #4 ; j-=4 + SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_0,y_2) + SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x_0,y_3) + SMLATT r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x_1,y_1) + LDR r10, [r5], #4 ; Load y[4...5] + SMLATB r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],x_1,y_2) + SMLATT r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x_1,y_3) + SMLATB r9, r12, r10, r9 ; sum[3] = MAC16_16(sum[3],x_1,y_4) + LDRGT r12, [r4], #4 ; Load x[0...1] + SMLABB r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_2,y_2) + SMLABT r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x_2,y_3) + SMLABB r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_2,y_4) + SMLABT r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x_2,y_5) + SMLATT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],x_3,y_3) + LDR r11, [r5], #4 ; Load y[6...7] + SMLATB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],x_3,y_4) + SMLATT r8, r14, r10, r8 ; sum[2] = MAC16_16(sum[2],x_3,y_5) + SMLATB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],x_3,y_6) + BGT xcorr_kernel_edsp_process4 +xcorr_kernel_edsp_process4_done + ADDS r2, r2, #4 + BLE xcorr_kernel_edsp_done + LDRH r12, [r4], #2 ; r12 = *x++ + SUBS r2, r2, #1 ; j-- + ; Stall + SMLABB r6, r12, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_0) + LDRGTH r14, [r4], #2 ; r14 = *x++ + SMLABT r7, r12, r10, r7 ; sum[1] = MAC16_16(sum[1],x,y_1) + SMLABB r8, r12, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_2) + SMLABT r9, r12, r11, r9 ; sum[3] = MAC16_16(sum[3],x,y_3) + BLE xcorr_kernel_edsp_done + SMLABT r6, r14, r10, r6 ; sum[0] = MAC16_16(sum[0],x,y_1) + SUBS r2, r2, #1 ; j-- + SMLABB r7, r14, r11, r7 ; sum[1] = MAC16_16(sum[1],x,y_2) + LDRH r10, [r5], #2 ; r10 = y_4 = *y++ + SMLABT r8, r14, r11, r8 ; sum[2] = MAC16_16(sum[2],x,y_3) + LDRGTH r12, [r4], #2 ; r12 = *x++ + SMLABB r9, r14, r10, r9 ; sum[3] = MAC16_16(sum[3],x,y_4) + BLE xcorr_kernel_edsp_done + SMLABB r6, r12, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_2) + CMP r2, #1 ; j-- + SMLABT r7, r12, r11, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_3) + LDRH r2, [r5], #2 ; r2 = y_5 = *y++ + SMLABB r8, r12, r10, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_4) + LDRGTH r14, [r4] ; r14 = *x + SMLABB r9, r12, r2, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_5) + BLE xcorr_kernel_edsp_done + SMLABT r6, r14, r11, r6 ; sum[0] = MAC16_16(sum[0],tmp,y_3) + LDRH r11, [r5] ; r11 = y_6 = *y + SMLABB r7, r14, r10, r7 ; sum[1] = MAC16_16(sum[1],tmp,y_4) + SMLABB r8, r14, r2, r8 ; sum[2] = MAC16_16(sum[2],tmp,y_5) + SMLABB r9, r14, r11, r9 ; sum[3] = MAC16_16(sum[3],tmp,y_6) +xcorr_kernel_edsp_done + LDMFD sp!, {r2,r4,r5,pc} + ENDP + +celt_pitch_xcorr_edsp PROC + ; input: + ; r0 = opus_val16 *_x (must be 32-bit aligned) + ; r1 = opus_val16 *_y (only needs to be 16-bit aligned) + ; r2 = opus_val32 *xcorr + ; r3 = int len + ; output: + ; r0 = maxcorr + ; internal usage + ; r4 = opus_val16 *x + ; r5 = opus_val16 *y + ; r6 = opus_val32 sum0 + ; r7 = opus_val32 sum1 + ; r8 = opus_val32 sum2 + ; r9 = opus_val32 sum3 + ; r1 = int max_pitch + ; r12 = int j + STMFD sp!, {r4-r11, lr} + MOV r5, r1 + LDR r1, [sp, #36] + MOV r4, r0 + TST r5, #3 + ; maxcorr = 1 + MOV r0, #1 + BEQ celt_pitch_xcorr_edsp_process1u_done +; Compute one sum at the start to make y 32-bit aligned. + SUBS r12, r3, #4 + ; r14 = sum = 0 + MOV r14, #0 + LDRH r8, [r5], #2 + BLE celt_pitch_xcorr_edsp_process1u_loop4_done + LDR r6, [r4], #4 + MOV r8, r8, LSL #16 +celt_pitch_xcorr_edsp_process1u_loop4 + LDR r9, [r5], #4 + SMLABT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) + LDR r7, [r4], #4 + SMLATB r14, r6, r9, r14 ; sum = MAC16_16(sum, x_1, y_1) + LDR r8, [r5], #4 + SMLABT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) + SUBS r12, r12, #4 ; j-=4 + SMLATB r14, r7, r8, r14 ; sum = MAC16_16(sum, x_3, y_3) + LDRGT r6, [r4], #4 + BGT celt_pitch_xcorr_edsp_process1u_loop4 + MOV r8, r8, LSR #16 +celt_pitch_xcorr_edsp_process1u_loop4_done + ADDS r12, r12, #4 +celt_pitch_xcorr_edsp_process1u_loop1 + LDRGEH r6, [r4], #2 + ; Stall + SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) + SUBGES r12, r12, #1 + LDRGTH r8, [r5], #2 + BGT celt_pitch_xcorr_edsp_process1u_loop1 + ; Restore _x + SUB r4, r4, r3, LSL #1 + ; Restore and advance _y + SUB r5, r5, r3, LSL #1 + ; maxcorr = max(maxcorr, sum) + CMP r0, r14 + ADD r5, r5, #2 + MOVLT r0, r14 + SUBS r1, r1, #1 + ; xcorr[i] = sum + STR r14, [r2], #4 + BLE celt_pitch_xcorr_edsp_done +celt_pitch_xcorr_edsp_process1u_done + ; if (max_pitch < 4) goto celt_pitch_xcorr_edsp_process2 + SUBS r1, r1, #4 + BLT celt_pitch_xcorr_edsp_process2 +celt_pitch_xcorr_edsp_process4 + ; xcorr_kernel_edsp parameters: + ; r3 = len, r4 = _x, r5 = _y, r6...r9 = sum[4] = {0, 0, 0, 0} + MOV r6, #0 + MOV r7, #0 + MOV r8, #0 + MOV r9, #0 + BL xcorr_kernel_edsp ; xcorr_kernel_edsp(_x, _y+i, xcorr+i, len) + ; maxcorr = max(maxcorr, sum0, sum1, sum2, sum3) + CMP r0, r6 + ; _y+=4 + ADD r5, r5, #8 + MOVLT r0, r6 + CMP r0, r7 + MOVLT r0, r7 + CMP r0, r8 + MOVLT r0, r8 + CMP r0, r9 + MOVLT r0, r9 + STMIA r2!, {r6-r9} + SUBS r1, r1, #4 + BGE celt_pitch_xcorr_edsp_process4 +celt_pitch_xcorr_edsp_process2 + ADDS r1, r1, #2 + BLT celt_pitch_xcorr_edsp_process1a + SUBS r12, r3, #4 + ; {r10, r11} = {sum0, sum1} = {0, 0} + MOV r10, #0 + MOV r11, #0 + LDR r8, [r5], #4 + BLE celt_pitch_xcorr_edsp_process2_loop_done + LDR r6, [r4], #4 + LDR r9, [r5], #4 +celt_pitch_xcorr_edsp_process2_loop4 + SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) + LDR r7, [r4], #4 + SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) + SUBS r12, r12, #4 ; j-=4 + SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1) + LDR r8, [r5], #4 + SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2) + LDRGT r6, [r4], #4 + SMLABB r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_2, y_2) + SMLABT r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_2, y_3) + SMLATT r10, r7, r9, r10 ; sum0 = MAC16_16(sum0, x_3, y_3) + LDRGT r9, [r5], #4 + SMLATB r11, r7, r8, r11 ; sum1 = MAC16_16(sum1, x_3, y_4) + BGT celt_pitch_xcorr_edsp_process2_loop4 +celt_pitch_xcorr_edsp_process2_loop_done + ADDS r12, r12, #2 + BLE celt_pitch_xcorr_edsp_process2_1 + LDR r6, [r4], #4 + ; Stall + SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) + LDR r9, [r5], #4 + SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) + SUB r12, r12, #2 + SMLATT r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_1, y_1) + MOV r8, r9 + SMLATB r11, r6, r9, r11 ; sum1 = MAC16_16(sum1, x_1, y_2) +celt_pitch_xcorr_edsp_process2_1 + LDRH r6, [r4], #2 + ADDS r12, r12, #1 + ; Stall + SMLABB r10, r6, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_0) + LDRGTH r7, [r4], #2 + SMLABT r11, r6, r8, r11 ; sum1 = MAC16_16(sum1, x_0, y_1) + BLE celt_pitch_xcorr_edsp_process2_done + LDRH r9, [r5], #2 + SMLABT r10, r7, r8, r10 ; sum0 = MAC16_16(sum0, x_0, y_1) + SMLABB r11, r7, r9, r11 ; sum1 = MAC16_16(sum1, x_0, y_2) +celt_pitch_xcorr_edsp_process2_done + ; Restore _x + SUB r4, r4, r3, LSL #1 + ; Restore and advance _y + SUB r5, r5, r3, LSL #1 + ; maxcorr = max(maxcorr, sum0) + CMP r0, r10 + ADD r5, r5, #2 + MOVLT r0, r10 + SUB r1, r1, #2 + ; maxcorr = max(maxcorr, sum1) + CMP r0, r11 + ; xcorr[i] = sum + STR r10, [r2], #4 + MOVLT r0, r11 + STR r11, [r2], #4 +celt_pitch_xcorr_edsp_process1a + ADDS r1, r1, #1 + BLT celt_pitch_xcorr_edsp_done + SUBS r12, r3, #4 + ; r14 = sum = 0 + MOV r14, #0 + BLT celt_pitch_xcorr_edsp_process1a_loop_done + LDR r6, [r4], #4 + LDR r8, [r5], #4 + LDR r7, [r4], #4 + LDR r9, [r5], #4 +celt_pitch_xcorr_edsp_process1a_loop4 + SMLABB r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) + SUBS r12, r12, #4 ; j-=4 + SMLATT r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) + LDRGE r6, [r4], #4 + SMLABB r14, r7, r9, r14 ; sum = MAC16_16(sum, x_2, y_2) + LDRGE r8, [r5], #4 + SMLATT r14, r7, r9, r14 ; sum = MAC16_16(sum, x_3, y_3) + LDRGE r7, [r4], #4 + LDRGE r9, [r5], #4 + BGE celt_pitch_xcorr_edsp_process1a_loop4 +celt_pitch_xcorr_edsp_process1a_loop_done + ADDS r12, r12, #2 + LDRGE r6, [r4], #4 + LDRGE r8, [r5], #4 + ; Stall + SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_0, y_0) + SUBGE r12, r12, #2 + SMLATTGE r14, r6, r8, r14 ; sum = MAC16_16(sum, x_1, y_1) + ADDS r12, r12, #1 + LDRGEH r6, [r4], #2 + LDRGEH r8, [r5], #2 + ; Stall + SMLABBGE r14, r6, r8, r14 ; sum = MAC16_16(sum, *x, *y) + ; maxcorr = max(maxcorr, sum) + CMP r0, r14 + ; xcorr[i] = sum + STR r14, [r2], #4 + MOVLT r0, r14 +celt_pitch_xcorr_edsp_done + LDMFD sp!, {r4-r11, pc} + ENDP + +ENDIF + +END diff --git a/drivers/opus/celt/arm/fixed_armv4.h b/drivers/opus/celt/arm/fixed_armv4.h new file mode 100644 index 0000000000..b690bc8cea --- /dev/null +++ b/drivers/opus/celt/arm/fixed_armv4.h @@ -0,0 +1,76 @@ +/* Copyright (C) 2013 Xiph.Org Foundation and contributors */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FIXED_ARMv4_H +#define FIXED_ARMv4_H + +/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#undef MULT16_32_Q16 +static OPUS_INLINE opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#MULT16_32_Q16\n\t" + "smull %0, %1, %2, %3\n\t" + : "=&r"(rd_lo), "=&r"(rd_hi) + : "%r"(b),"r"(a<<16) + ); + return rd_hi; +} +#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b)) + + +/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ +#undef MULT16_32_Q15 +static OPUS_INLINE opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#MULT16_32_Q15\n\t" + "smull %0, %1, %2, %3\n\t" + : "=&r"(rd_lo), "=&r"(rd_hi) + : "%r"(b), "r"(a<<16) + ); + /*We intentionally don't OR in the high bit of rd_lo for speed.*/ + return rd_hi<<1; +} +#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b)) + + +/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. + b must fit in 31 bits. + Result fits in 32 bits. */ +#undef MAC16_32_Q15 +#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) + + +/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ +#undef MULT32_32_Q31 +#define MULT32_32_Q31(a,b) (opus_val32)((((opus_int64)(a)) * ((opus_int64)(b)))>>31) + +#endif diff --git a/drivers/opus/celt/arm/fixed_armv5e.h b/drivers/opus/celt/arm/fixed_armv5e.h new file mode 100644 index 0000000000..1194a7d3ec --- /dev/null +++ b/drivers/opus/celt/arm/fixed_armv5e.h @@ -0,0 +1,116 @@ +/* Copyright (C) 2007-2009 Xiph.Org Foundation + Copyright (C) 2003-2008 Jean-Marc Valin + Copyright (C) 2007-2008 CSIRO + Copyright (C) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FIXED_ARMv5E_H +#define FIXED_ARMv5E_H + +#include "fixed_armv4.h" + +/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#undef MULT16_32_Q16 +static OPUS_INLINE opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b) +{ + int res; + __asm__( + "#MULT16_32_Q16\n\t" + "smulwb %0, %1, %2\n\t" + : "=r"(res) + : "r"(b),"r"(a) + ); + return res; +} +#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b)) + + +/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ +#undef MULT16_32_Q15 +static OPUS_INLINE opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b) +{ + int res; + __asm__( + "#MULT16_32_Q15\n\t" + "smulwb %0, %1, %2\n\t" + : "=r"(res) + : "r"(b), "r"(a) + ); + return res<<1; +} +#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b)) + + +/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. + b must fit in 31 bits. + Result fits in 32 bits. */ +#undef MAC16_32_Q15 +static OPUS_INLINE opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, + opus_val32 b) +{ + int res; + __asm__( + "#MAC16_32_Q15\n\t" + "smlawb %0, %1, %2, %3;\n" + : "=r"(res) + : "r"(b<<1), "r"(a), "r"(c) + ); + return res; +} +#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) + +/** 16x16 multiply-add where the result fits in 32 bits */ +#undef MAC16_16 +static OPUS_INLINE opus_val32 MAC16_16_armv5e(opus_val32 c, opus_val16 a, + opus_val16 b) +{ + int res; + __asm__( + "#MAC16_16\n\t" + "smlabb %0, %1, %2, %3;\n" + : "=r"(res) + : "r"(a), "r"(b), "r"(c) + ); + return res; +} +#define MAC16_16(c, a, b) (MAC16_16_armv5e(c, a, b)) + +/** 16x16 multiplication where the result fits in 32 bits */ +#undef MULT16_16 +static OPUS_INLINE opus_val32 MULT16_16_armv5e(opus_val16 a, opus_val16 b) +{ + int res; + __asm__( + "#MULT16_16\n\t" + "smulbb %0, %1, %2;\n" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define MULT16_16(a, b) (MULT16_16_armv5e(a, b)) + +#endif diff --git a/drivers/opus/celt/arm/kiss_fft_armv4.h b/drivers/opus/celt/arm/kiss_fft_armv4.h new file mode 100644 index 0000000000..773464628b --- /dev/null +++ b/drivers/opus/celt/arm/kiss_fft_armv4.h @@ -0,0 +1,121 @@ +/*Copyright (c) 2013, Xiph.Org Foundation and contributors. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE.*/ + +#ifndef KISS_FFT_ARMv4_H +#define KISS_FFT_ARMv4_H + +#if !defined(KISS_FFT_GUTS_H) +#error "This file should only be included from _kiss_fft_guts.h" +#endif + +#ifdef OPUS_FIXED_POINT + +#undef C_MUL +#define C_MUL(m,a,b) \ + do{ \ + int br__; \ + int bi__; \ + int tt__; \ + __asm__ __volatile__( \ + "#C_MUL\n\t" \ + "ldrsh %[br], [%[bp], #0]\n\t" \ + "ldm %[ap], {r0,r1}\n\t" \ + "ldrsh %[bi], [%[bp], #2]\n\t" \ + "smull %[tt], %[mi], r1, %[br]\n\t" \ + "smlal %[tt], %[mi], r0, %[bi]\n\t" \ + "rsb %[bi], %[bi], #0\n\t" \ + "smull %[br], %[mr], r0, %[br]\n\t" \ + "mov %[tt], %[tt], lsr #15\n\t" \ + "smlal %[br], %[mr], r1, %[bi]\n\t" \ + "orr %[mi], %[tt], %[mi], lsl #17\n\t" \ + "mov %[br], %[br], lsr #15\n\t" \ + "orr %[mr], %[br], %[mr], lsl #17\n\t" \ + : [mr]"=r"((m).r), [mi]"=r"((m).i), \ + [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ + : [ap]"r"(&(a)), [bp]"r"(&(b)) \ + : "r0", "r1" \ + ); \ + } \ + while(0) + +#undef C_MUL4 +#define C_MUL4(m,a,b) \ + do{ \ + int br__; \ + int bi__; \ + int tt__; \ + __asm__ __volatile__( \ + "#C_MUL4\n\t" \ + "ldrsh %[br], [%[bp], #0]\n\t" \ + "ldm %[ap], {r0,r1}\n\t" \ + "ldrsh %[bi], [%[bp], #2]\n\t" \ + "smull %[tt], %[mi], r1, %[br]\n\t" \ + "smlal %[tt], %[mi], r0, %[bi]\n\t" \ + "rsb %[bi], %[bi], #0\n\t" \ + "smull %[br], %[mr], r0, %[br]\n\t" \ + "mov %[tt], %[tt], lsr #17\n\t" \ + "smlal %[br], %[mr], r1, %[bi]\n\t" \ + "orr %[mi], %[tt], %[mi], lsl #15\n\t" \ + "mov %[br], %[br], lsr #17\n\t" \ + "orr %[mr], %[br], %[mr], lsl #15\n\t" \ + : [mr]"=r"((m).r), [mi]"=r"((m).i), \ + [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ + : [ap]"r"(&(a)), [bp]"r"(&(b)) \ + : "r0", "r1" \ + ); \ + } \ + while(0) + +#undef C_MULC +#define C_MULC(m,a,b) \ + do{ \ + int br__; \ + int bi__; \ + int tt__; \ + __asm__ __volatile__( \ + "#C_MULC\n\t" \ + "ldrsh %[br], [%[bp], #0]\n\t" \ + "ldm %[ap], {r0,r1}\n\t" \ + "ldrsh %[bi], [%[bp], #2]\n\t" \ + "smull %[tt], %[mr], r0, %[br]\n\t" \ + "smlal %[tt], %[mr], r1, %[bi]\n\t" \ + "rsb %[bi], %[bi], #0\n\t" \ + "smull %[br], %[mi], r1, %[br]\n\t" \ + "mov %[tt], %[tt], lsr #15\n\t" \ + "smlal %[br], %[mi], r0, %[bi]\n\t" \ + "orr %[mr], %[tt], %[mr], lsl #17\n\t" \ + "mov %[br], %[br], lsr #15\n\t" \ + "orr %[mi], %[br], %[mi], lsl #17\n\t" \ + : [mr]"=r"((m).r), [mi]"=r"((m).i), \ + [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ + : [ap]"r"(&(a)), [bp]"r"(&(b)) \ + : "r0", "r1" \ + ); \ + } \ + while(0) + +#endif /* OPUS_FIXED_POINT */ + +#endif /* KISS_FFT_ARMv4_H */ diff --git a/drivers/opus/celt/arm/kiss_fft_armv5e.h b/drivers/opus/celt/arm/kiss_fft_armv5e.h new file mode 100644 index 0000000000..1eff56a66a --- /dev/null +++ b/drivers/opus/celt/arm/kiss_fft_armv5e.h @@ -0,0 +1,118 @@ +/*Copyright (c) 2013, Xiph.Org Foundation and contributors. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE.*/ + +#ifndef KISS_FFT_ARMv5E_H +#define KISS_FFT_ARMv5E_H + +#if !defined(KISS_FFT_GUTS_H) +#error "This file should only be included from _kiss_fft_guts.h" +#endif + +#ifdef OPUS_FIXED_POINT + +#if defined(__thumb__)||defined(__thumb2__) +#define LDRD_CONS "Q" +#else +#define LDRD_CONS "Uq" +#endif + +#undef C_MUL +#define C_MUL(m,a,b) \ + do{ \ + int mr1__; \ + int mr2__; \ + int mi__; \ + long long aval__; \ + int bval__; \ + __asm__( \ + "#C_MUL\n\t" \ + "ldrd %[aval], %H[aval], %[ap]\n\t" \ + "ldr %[bval], %[bp]\n\t" \ + "smulwb %[mi], %H[aval], %[bval]\n\t" \ + "smulwb %[mr1], %[aval], %[bval]\n\t" \ + "smulwt %[mr2], %H[aval], %[bval]\n\t" \ + "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \ + : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \ + [aval]"=&r"(aval__), [bval]"=r"(bval__) \ + : [ap]LDRD_CONS(a), [bp]"m"(b) \ + ); \ + (m).r = SHL32(SUB32(mr1__, mr2__), 1); \ + (m).i = SHL32(mi__, 1); \ + } \ + while(0) + +#undef C_MUL4 +#define C_MUL4(m,a,b) \ + do{ \ + int mr1__; \ + int mr2__; \ + int mi__; \ + long long aval__; \ + int bval__; \ + __asm__( \ + "#C_MUL4\n\t" \ + "ldrd %[aval], %H[aval], %[ap]\n\t" \ + "ldr %[bval], %[bp]\n\t" \ + "smulwb %[mi], %H[aval], %[bval]\n\t" \ + "smulwb %[mr1], %[aval], %[bval]\n\t" \ + "smulwt %[mr2], %H[aval], %[bval]\n\t" \ + "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \ + : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \ + [aval]"=&r"(aval__), [bval]"=r"(bval__) \ + : [ap]LDRD_CONS(a), [bp]"m"(b) \ + ); \ + (m).r = SHR32(SUB32(mr1__, mr2__), 1); \ + (m).i = SHR32(mi__, 1); \ + } \ + while(0) + +#undef C_MULC +#define C_MULC(m,a,b) \ + do{ \ + int mr__; \ + int mi1__; \ + int mi2__; \ + long long aval__; \ + int bval__; \ + __asm__( \ + "#C_MULC\n\t" \ + "ldrd %[aval], %H[aval], %[ap]\n\t" \ + "ldr %[bval], %[bp]\n\t" \ + "smulwb %[mr], %[aval], %[bval]\n\t" \ + "smulwb %[mi1], %H[aval], %[bval]\n\t" \ + "smulwt %[mi2], %[aval], %[bval]\n\t" \ + "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \ + : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \ + [aval]"=&r"(aval__), [bval]"=r"(bval__) \ + : [ap]LDRD_CONS(a), [bp]"m"(b) \ + ); \ + (m).r = SHL32(mr__, 1); \ + (m).i = SHL32(SUB32(mi1__, mi2__), 1); \ + } \ + while(0) + +#endif /* OPUS_FIXED_POINT */ + +#endif /* KISS_FFT_GUTS_H */ diff --git a/drivers/opus/celt/arm/pitch_arm.h b/drivers/opus/celt/arm/pitch_arm.h new file mode 100644 index 0000000000..df5e82ef0b --- /dev/null +++ b/drivers/opus/celt/arm/pitch_arm.h @@ -0,0 +1,57 @@ +/* Copyright (c) 2010 Xiph.Org Foundation + * Copyright (c) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(PITCH_ARM_H) +# define PITCH_ARM_H + +# include "armcpu.h" + +# if defined(OPUS_FIXED_POINT) + +# if defined(OPUS_ARM_MAY_HAVE_NEON) +opus_val32 celt_pitch_xcorr_neon(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch); +# endif + +# if defined(OPUS_ARM_MAY_HAVE_MEDIA) +# define celt_pitch_xcorr_media MAY_HAVE_EDSP(celt_pitch_xcorr) +# endif + +# if defined(OPUS_ARM_MAY_HAVE_EDSP) +opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch); +# endif + +# if !defined(OPUS_HAVE_RTCD) +# define OVERRIDE_PITCH_XCORR (1) +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) +# endif + +# endif + +#endif diff --git a/drivers/opus/celt/bands.c b/drivers/opus/celt/bands.c new file mode 100644 index 0000000000..87280c8333 --- /dev/null +++ b/drivers/opus/celt/bands.c @@ -0,0 +1,1518 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2008-2009 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include <math.h> +#include "bands.h" +#include "opus_modes.h" +#include "vq.h" +#include "cwrs.h" +#include "stack_alloc.h" +#include "os_support.h" +#include "mathops.h" +#include "rate.h" +#include "quant_bands.h" +#include "pitch.h" + +int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev) +{ + int i; + for (i=0;i<N;i++) + { + if (val < thresholds[i]) + break; + } + if (i>prev && val < thresholds[prev]+hysteresis[prev]) + i=prev; + if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1]) + i=prev; + return i; +} + +opus_uint32 celt_lcg_rand(opus_uint32 seed) +{ + return 1664525 * seed + 1013904223; +} + +/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness + with this approximation is important because it has an impact on the bit allocation */ +static opus_int16 bitexact_cos(opus_int16 x) +{ + opus_int32 tmp; + opus_int16 x2; + tmp = (4096+((opus_int32)(x)*(x)))>>13; + celt_assert(tmp<=32767); + x2 = tmp; + x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2))))); + celt_assert(x2<=32766); + return 1+x2; +} + +static int bitexact_log2tan(int isin,int icos) +{ + int lc; + int ls; + lc=EC_ILOG(icos); + ls=EC_ILOG(isin); + icos<<=15-lc; + isin<<=15-ls; + return (ls-lc)*(1<<11) + +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932) + -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932); +} + +#ifdef OPUS_FIXED_POINT +/* Compute the amplitude (sqrt energy) in each of the bands */ +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +{ + int i, c, N; + const opus_int16 *eBands = m->eBands; + N = M*m->shortMdctSize; + c=0; do { + for (i=0;i<end;i++) + { + int j; + opus_val32 maxval=0; + opus_val32 sum = 0; + + j=M*eBands[i]; do { + maxval = MAX32(maxval, X[j+c*N]); + maxval = MAX32(maxval, -X[j+c*N]); + } while (++j<M*eBands[i+1]); + + if (maxval > 0) + { + int shift = celt_ilog2(maxval)-10; + j=M*eBands[i]; do { + sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), + EXTRACT16(VSHR32(X[j+c*N],shift))); + } while (++j<M*eBands[i+1]); + /* We're adding one here to ensure the normalized band isn't larger than unity norm */ + bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); + } else { + bandE[i+c*m->nbEBands] = EPSILON; + } + /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ + } + } while (++c<C); + /*printf ("\n");*/ +} + +/* Normalise each band such that the energy is one. */ +void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M) +{ + int i, c, N; + const opus_int16 *eBands = m->eBands; + N = M*m->shortMdctSize; + c=0; do { + i=0; do { + opus_val16 g; + int j,shift; + opus_val16 E; + shift = celt_zlog2(bandE[i+c*m->nbEBands])-13; + E = VSHR32(bandE[i+c*m->nbEBands], shift); + g = EXTRACT16(celt_rcp(SHL32(E,3))); + j=M*eBands[i]; do { + X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g); + } while (++j<M*eBands[i+1]); + } while (++i<end); + } while (++c<C); +} + +#else /* OPUS_FIXED_POINT */ +/* Compute the amplitude (sqrt energy) in each of the bands */ +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) +{ + int i, c, N; + const opus_int16 *eBands = m->eBands; + N = M*m->shortMdctSize; + c=0; do { + for (i=0;i<end;i++) + { + int j; + opus_val32 sum = 1e-27f; + for (j=M*eBands[i];j<M*eBands[i+1];j++) + sum += X[j+c*N]*X[j+c*N]; + bandE[i+c*m->nbEBands] = celt_sqrt(sum); + /*printf ("%f ", bandE[i+c*m->nbEBands]);*/ + } + } while (++c<C); + /*printf ("\n");*/ +} + +/* Normalise each band such that the energy is one. */ +void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M) +{ + int i, c, N; + const opus_int16 *eBands = m->eBands; + N = M*m->shortMdctSize; + c=0; do { + for (i=0;i<end;i++) + { + int j; + opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]); + for (j=M*eBands[i];j<M*eBands[i+1];j++) + X[j+c*N] = freq[j+c*N]*g; + } + } while (++c<C); +} + +#endif /* OPUS_FIXED_POINT */ + +/* De-normalise the energy to produce the synthesis from the unit-energy bands */ +void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M) +{ + int i, c, N; + const opus_int16 *eBands = m->eBands; + N = M*m->shortMdctSize; + celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); + c=0; do { + celt_sig * OPUS_RESTRICT f; + const celt_norm * OPUS_RESTRICT x; + f = freq+c*N; + x = X+c*N+M*eBands[start]; + for (i=0;i<M*eBands[start];i++) + *f++ = 0; + for (i=start;i<end;i++) + { + int j, band_end; + opus_val16 g; + opus_val16 lg; +#ifdef OPUS_FIXED_POINT + int shift; +#endif + j=M*eBands[i]; + band_end = M*eBands[i+1]; + lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6)); +#ifndef OPUS_FIXED_POINT + g = celt_exp2(lg); +#else + /* Handle the integer part of the log energy */ + shift = 16-(lg>>DB_SHIFT); + if (shift>31) + { + shift=0; + g=0; + } else { + /* Handle the fractional part. */ + g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); + } + /* Handle extreme gains with negative shift. */ + if (shift<0) + { + /* For shift < -2 we'd be likely to overflow, so we're capping + the gain here. This shouldn't happen unless the bitstream is + already corrupted. */ + if (shift < -2) + { + g = 32767; + shift = -2; + } + do { + *f++ = SHL32(MULT16_16(*x++, g), -shift); + } while (++j<band_end); + } else +#endif + /* Be careful of the fixed-point "else" just above when changing this code */ + do { + *f++ = SHR32(MULT16_16(*x++, g), shift); + } while (++j<band_end); + } + celt_assert(start <= end); + for (i=M*eBands[end];i<N;i++) + *f++ = 0; + } while (++c<C); +} + +/* This prevents energy collapse for transients with multiple short MDCTs */ +void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, + int start, int end, opus_val16 *logE, opus_val16 *prev1logE, + opus_val16 *prev2logE, int *pulses, opus_uint32 seed) +{ + int c, i, j, k; + for (i=start;i<end;i++) + { + int N0; + opus_val16 thresh, sqrt_1; + int depth; +#ifdef OPUS_FIXED_POINT + int shift; + opus_val32 thresh32; +#endif + + N0 = m->eBands[i+1]-m->eBands[i]; + /* depth in 1/8 bits */ + depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM); + +#ifdef OPUS_FIXED_POINT + thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); + thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32)); + { + opus_val32 t; + t = N0<<LM; + shift = celt_ilog2(t)>>1; + t = SHL32(t, (7-shift)<<1); + sqrt_1 = celt_rsqrt_norm(t); + } +#else + thresh = .5f*celt_exp2(-.125f*depth); + sqrt_1 = celt_rsqrt(N0<<LM); +#endif + + c=0; do + { + celt_norm *X; + opus_val16 prev1; + opus_val16 prev2; + opus_val32 Ediff; + opus_val16 r; + int renormalize=0; + prev1 = prev1logE[c*m->nbEBands+i]; + prev2 = prev2logE[c*m->nbEBands+i]; + if (C==1) + { + prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]); + prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]); + } + Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2)); + Ediff = MAX32(0, Ediff); + +#ifdef OPUS_FIXED_POINT + if (Ediff < 16384) + { + opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1); + r = 2*MIN16(16383,r32); + } else { + r = 0; + } + if (LM==3) + r = MULT16_16_Q14(23170, MIN32(23169, r)); + r = SHR16(MIN16(thresh, r),1); + r = SHR32(MULT16_16_Q15(sqrt_1, r),shift); +#else + /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because + short blocks don't have the same energy as long */ + r = 2.f*celt_exp2(-Ediff); + if (LM==3) + r *= 1.41421356f; + r = MIN16(thresh, r); + r = r*sqrt_1; +#endif + X = X_+c*size+(m->eBands[i]<<LM); + for (k=0;k<1<<LM;k++) + { + /* Detect collapse */ + if (!(collapse_masks[i*C+c]&1<<k)) + { + /* Fill with noise */ + for (j=0;j<N0;j++) + { + seed = celt_lcg_rand(seed); + X[(j<<LM)+k] = (seed&0x8000 ? r : -r); + } + renormalize = 1; + } + } + /* We just added some energy, so we need to renormalise */ + if (renormalize) + renormalise_vector(X, N0<<LM, Q15ONE); + } while (++c<C); + } +} + +static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N) +{ + int i = bandID; + int j; + opus_val16 a1, a2; + opus_val16 left, right; + opus_val16 norm; +#ifdef OPUS_FIXED_POINT + int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13; +#endif + left = VSHR32(bandE[i],shift); + right = VSHR32(bandE[i+m->nbEBands],shift); + norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right)); + a1 = DIV32_16(SHL32(EXTEND32(left),14),norm); + a2 = DIV32_16(SHL32(EXTEND32(right),14),norm); + for (j=0;j<N;j++) + { + celt_norm r, l; + l = X[j]; + r = Y[j]; + X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r); + /* Side is not encoded, no need to calculate */ + } +} + +static void stereo_split(celt_norm *X, celt_norm *Y, int N) +{ + int j; + for (j=0;j<N;j++) + { + celt_norm r, l; + l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]); + r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]); + X[j] = l+r; + Y[j] = r-l; + } +} + +static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) +{ + int j; + opus_val32 xp=0, side=0; + opus_val32 El, Er; + opus_val16 mid2; +#ifdef OPUS_FIXED_POINT + int kl, kr; +#endif + opus_val32 t, lgain, rgain; + + /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */ + dual_inner_prod(Y, X, Y, N, &xp, &side); + /* Compensating for the mid normalization */ + xp = MULT16_32_Q15(mid, xp); + /* mid and side are in Q15, not Q14 like X and Y */ + mid2 = SHR32(mid, 1); + El = MULT16_16(mid2, mid2) + side - 2*xp; + Er = MULT16_16(mid2, mid2) + side + 2*xp; + if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) + { + for (j=0;j<N;j++) + Y[j] = X[j]; + return; + } + +#ifdef OPUS_FIXED_POINT + kl = celt_ilog2(El)>>1; + kr = celt_ilog2(Er)>>1; +#endif + t = VSHR32(El, (kl-7)<<1); + lgain = celt_rsqrt_norm(t); + t = VSHR32(Er, (kr-7)<<1); + rgain = celt_rsqrt_norm(t); + +#ifdef OPUS_FIXED_POINT + if (kl < 7) + kl = 7; + if (kr < 7) + kr = 7; +#endif + + for (j=0;j<N;j++) + { + celt_norm r, l; + /* Apply mid scaling (side is already scaled) */ + l = MULT16_16_Q15(mid, X[j]); + r = Y[j]; + X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1)); + Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1)); + } +} + +/* Decide whether we should spread the pulses in the current frame */ +int spreading_decision(const CELTMode *m, celt_norm *X, int *average, + int last_decision, int *hf_average, int *tapset_decision, int update_hf, + int end, int C, int M) +{ + int i, c, N0; + int sum = 0, nbBands=0; + const opus_int16 * OPUS_RESTRICT eBands = m->eBands; + int decision; + int hf_sum=0; + + celt_assert(end>0); + + N0 = M*m->shortMdctSize; + + if (M*(eBands[end]-eBands[end-1]) <= 8) + return SPREAD_NONE; + c=0; do { + for (i=0;i<end;i++) + { + int j, N, tmp=0; + int tcount[3] = {0,0,0}; + celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; + N = M*(eBands[i+1]-eBands[i]); + if (N<=8) + continue; + /* Compute rough CDF of |x[j]| */ + for (j=0;j<N;j++) + { + opus_val32 x2N; /* Q13 */ + + x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N); + if (x2N < QCONST16(0.25f,13)) + tcount[0]++; + if (x2N < QCONST16(0.0625f,13)) + tcount[1]++; + if (x2N < QCONST16(0.015625f,13)) + tcount[2]++; + } + + /* Only include four last bands (8 kHz and up) */ + if (i>m->nbEBands-4) + hf_sum += 32*(tcount[1]+tcount[0])/N; + tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); + sum += tmp*256; + nbBands++; + } + } while (++c<C); + + if (update_hf) + { + if (hf_sum) + hf_sum /= C*(4-m->nbEBands+end); + *hf_average = (*hf_average+hf_sum)>>1; + hf_sum = *hf_average; + if (*tapset_decision==2) + hf_sum += 4; + else if (*tapset_decision==0) + hf_sum -= 4; + if (hf_sum > 22) + *tapset_decision=2; + else if (hf_sum > 18) + *tapset_decision=1; + else + *tapset_decision=0; + } + /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ + celt_assert(nbBands>0); /* end has to be non-zero */ + sum /= nbBands; + /* Recursive averaging */ + sum = (sum+*average)>>1; + *average = sum; + /* Hysteresis */ + sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2; + if (sum < 80) + { + decision = SPREAD_AGGRESSIVE; + } else if (sum < 256) + { + decision = SPREAD_NORMAL; + } else if (sum < 384) + { + decision = SPREAD_LIGHT; + } else { + decision = SPREAD_NONE; + } +#ifdef FUZZING + decision = rand()&0x3; + *tapset_decision=rand()%3; +#endif + return decision; +} + +/* Indexing table for converting from natural Hadamard to ordery Hadamard + This is essentially a bit-reversed Gray, on top of which we've added + an inversion of the order because we want the DC at the end rather than + the beginning. The lines are for N=2, 4, 8, 16 */ +static const int ordery_table[] = { + 1, 0, + 3, 0, 2, 1, + 7, 0, 4, 3, 6, 1, 5, 2, + 15, 0, 8, 7, 12, 3, 11, 4, 14, 1, 9, 6, 13, 2, 10, 5, +}; + +static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) +{ + int i,j; + VARDECL(celt_norm, tmp); + int N; + SAVE_STACK; + N = N0*stride; + ALLOC(tmp, N, celt_norm); + celt_assert(stride>0); + if (hadamard) + { + const int *ordery = ordery_table+stride-2; + for (i=0;i<stride;i++) + { + for (j=0;j<N0;j++) + tmp[ordery[i]*N0+j] = X[j*stride+i]; + } + } else { + for (i=0;i<stride;i++) + for (j=0;j<N0;j++) + tmp[i*N0+j] = X[j*stride+i]; + } + for (j=0;j<N;j++) + X[j] = tmp[j]; + RESTORE_STACK; +} + +static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) +{ + int i,j; + VARDECL(celt_norm, tmp); + int N; + SAVE_STACK; + N = N0*stride; + ALLOC(tmp, N, celt_norm); + if (hadamard) + { + const int *ordery = ordery_table+stride-2; + for (i=0;i<stride;i++) + for (j=0;j<N0;j++) + tmp[j*stride+i] = X[ordery[i]*N0+j]; + } else { + for (i=0;i<stride;i++) + for (j=0;j<N0;j++) + tmp[j*stride+i] = X[i*N0+j]; + } + for (j=0;j<N;j++) + X[j] = tmp[j]; + RESTORE_STACK; +} + +void haar1(celt_norm *X, int N0, int stride) +{ + int i, j; + N0 >>= 1; + for (i=0;i<stride;i++) + for (j=0;j<N0;j++) + { + celt_norm tmp1, tmp2; + tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]); + tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); + X[stride*2*j+i] = tmp1 + tmp2; + X[stride*(2*j+1)+i] = tmp1 - tmp2; + } +} + +static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo) +{ + static const opus_int16 exp2_table8[8] = + {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048}; + int qn, qb; + int N2 = 2*N-1; + if (stereo && N==2) + N2--; + /* The upper limit ensures that in a stereo split with itheta==16384, we'll + always have enough bits left over to code at least one pulse in the + side; otherwise it would collapse, since it doesn't get folded. */ + qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2); + + qb = IMIN(8<<BITRES, qb); + + if (qb<(1<<BITRES>>1)) { + qn = 1; + } else { + qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES)); + qn = (qn+1)>>1<<1; + } + celt_assert(qn <= 256); + return qn; +} + +struct band_ctx { + int encode; + const CELTMode *m; + int i; + int intensity; + int spread; + int tf_change; + ec_ctx *ec; + opus_int32 remaining_bits; + const celt_ener *bandE; + opus_uint32 seed; +}; + +struct split_ctx { + int inv; + int imid; + int iside; + int delta; + int itheta; + int qalloc; +}; + +static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, + celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0, + int LM, + int stereo, int *fill) +{ + int qn; + int itheta=0; + int delta; + int imid, iside; + int qalloc; + int pulse_cap; + int offset; + opus_int32 tell; + int inv=0; + int encode; + const CELTMode *m; + int i; + int intensity; + ec_ctx *ec; + const celt_ener *bandE; + + encode = ctx->encode; + m = ctx->m; + i = ctx->i; + intensity = ctx->intensity; + ec = ctx->ec; + bandE = ctx->bandE; + + /* Decide on the resolution to give to the split parameter theta */ + pulse_cap = m->logN[i]+LM*(1<<BITRES); + offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET); + qn = compute_qn(N, *b, offset, pulse_cap, stereo); + if (stereo && i>=intensity) + qn = 1; + if (encode) + { + /* theta is the atan() of the ratio between the (normalized) + side and mid. With just that parameter, we can re-scale both + mid and side because we know that 1) they have unit norm and + 2) they are orthogonal. */ + itheta = stereo_itheta(X, Y, stereo, N); + } + tell = ec_tell_frac(ec); + if (qn!=1) + { + if (encode) + itheta = (itheta*qn+8192)>>14; + + /* Entropy coding of the angle. We use a uniform pdf for the + time split, a step for stereo, and a triangular one for the rest. */ + if (stereo && N>2) + { + int p0 = 3; + int x = itheta; + int x0 = qn/2; + int ft = p0*(x0+1) + x0; + /* Use a probability of p0 up to itheta=8192 and then use 1 after */ + if (encode) + { + ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); + } else { + int fs; + fs=ec_decode(ec,ft); + if (fs<(x0+1)*p0) + x=fs/p0; + else + x=x0+1+(fs-(x0+1)*p0); + ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft); + itheta = x; + } + } else if (B0>1 || stereo) { + /* Uniform pdf */ + if (encode) + ec_enc_uint(ec, itheta, qn+1); + else + itheta = ec_dec_uint(ec, qn+1); + } else { + int fs=1, ft; + ft = ((qn>>1)+1)*((qn>>1)+1); + if (encode) + { + int fl; + + fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta; + fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 : + ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); + + ec_encode(ec, fl, fl+fs, ft); + } else { + /* Triangular pdf */ + int fl=0; + int fm; + fm = ec_decode(ec, ft); + + if (fm < ((qn>>1)*((qn>>1) + 1)>>1)) + { + itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1; + fs = itheta + 1; + fl = itheta*(itheta + 1)>>1; + } + else + { + itheta = (2*(qn + 1) + - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1; + fs = qn + 1 - itheta; + fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1); + } + + ec_dec_update(ec, fl, fl+fs, ft); + } + } + itheta = (opus_int32)itheta*16384/qn; + if (encode && stereo) + { + if (itheta==0) + intensity_stereo(m, X, Y, bandE, i, N); + else + stereo_split(X, Y, N); + } + /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate. + Let's do that at higher complexity */ + } else if (stereo) { + if (encode) + { + inv = itheta > 8192; + if (inv) + { + int j; + for (j=0;j<N;j++) + Y[j] = -Y[j]; + } + intensity_stereo(m, X, Y, bandE, i, N); + } + if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES) + { + if (encode) + ec_enc_bit_logp(ec, inv, 2); + else + inv = ec_dec_bit_logp(ec, 2); + } else + inv = 0; + itheta = 0; + } + qalloc = ec_tell_frac(ec) - tell; + *b -= qalloc; + + if (itheta == 0) + { + imid = 32767; + iside = 0; + *fill &= (1<<B)-1; + delta = -16384; + } else if (itheta == 16384) + { + imid = 0; + iside = 32767; + *fill &= ((1<<B)-1)<<B; + delta = 16384; + } else { + imid = bitexact_cos((opus_int16)itheta); + iside = bitexact_cos((opus_int16)(16384-itheta)); + /* This is the mid vs side allocation that minimizes squared error + in that band. */ + delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid)); + } + + sctx->inv = inv; + sctx->imid = imid; + sctx->iside = iside; + sctx->delta = delta; + sctx->itheta = itheta; + sctx->qalloc = qalloc; +} +static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b, + celt_norm *lowband_out) +{ +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif + int c; + int stereo; + celt_norm *x = X; + int encode; + ec_ctx *ec; + + encode = ctx->encode; + ec = ctx->ec; + + stereo = Y != NULL; + c=0; do { + int sign=0; + if (ctx->remaining_bits>=1<<BITRES) + { + if (encode) + { + sign = x[0]<0; + ec_enc_bits(ec, sign, 1); + } else { + sign = ec_dec_bits(ec, 1); + } + ctx->remaining_bits -= 1<<BITRES; + b-=1<<BITRES; + } + if (resynth) + x[0] = sign ? -NORM_SCALING : NORM_SCALING; + x = Y; + } while (++c<1+stereo); + if (lowband_out) + lowband_out[0] = SHR16(X[0],4); + return 1; +} + +/* This function is responsible for encoding and decoding a mono partition. + It can split the band in two and transmit the energy difference with + the two half-bands. It can be called recursively so bands can end up being + split in 8 parts. */ +static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, + int N, int b, int B, celt_norm *lowband, + int LM, + opus_val16 gain, int fill) +{ + const unsigned char *cache; + int q; + int curr_bits; + int imid=0, iside=0; + int B0=B; + opus_val16 mid=0, side=0; + unsigned cm=0; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif + celt_norm *Y=NULL; + int encode; + const CELTMode *m; + int i; + int spread; + ec_ctx *ec; + + encode = ctx->encode; + m = ctx->m; + i = ctx->i; + spread = ctx->spread; + ec = ctx->ec; + + /* If we need 1.5 more bit than we can produce, split the band in two. */ + cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i]; + if (LM != -1 && b > cache[cache[0]]+12 && N>2) + { + int mbits, sbits, delta; + int itheta; + int qalloc; + struct split_ctx sctx; + celt_norm *next_lowband2=NULL; + opus_int32 rebalance; + + N >>= 1; + Y = X+N; + LM -= 1; + if (B==1) + fill = (fill&1)|(fill<<1); + B = (B+1)>>1; + + compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, + LM, 0, &fill); + imid = sctx.imid; + iside = sctx.iside; + delta = sctx.delta; + itheta = sctx.itheta; + qalloc = sctx.qalloc; +#ifdef OPUS_FIXED_POINT + mid = imid; + side = iside; +#else + mid = (1.f/32768)*imid; + side = (1.f/32768)*iside; +#endif + + /* Give more bits to low-energy MDCTs than they would otherwise deserve */ + if (B0>1 && (itheta&0x3fff)) + { + if (itheta > 8192) + /* Rough approximation for pre-echo masking */ + delta -= delta>>(4-LM); + else + /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */ + delta = IMIN(0, delta + (N<<BITRES>>(5-LM))); + } + mbits = IMAX(0, IMIN(b, (b-delta)/2)); + sbits = b-mbits; + ctx->remaining_bits -= qalloc; + + if (lowband) + next_lowband2 = lowband+N; /* >32-bit split case */ + + rebalance = ctx->remaining_bits; + if (mbits >= sbits) + { + cm = quant_partition(ctx, X, N, mbits, B, + lowband, LM, + MULT16_16_P15(gain,mid), fill); + rebalance = mbits - (rebalance-ctx->remaining_bits); + if (rebalance > 3<<BITRES && itheta!=0) + sbits += rebalance - (3<<BITRES); + cm |= quant_partition(ctx, Y, N, sbits, B, + next_lowband2, LM, + MULT16_16_P15(gain,side), fill>>B)<<(B0>>1); + } else { + cm = quant_partition(ctx, Y, N, sbits, B, + next_lowband2, LM, + MULT16_16_P15(gain,side), fill>>B)<<(B0>>1); + rebalance = sbits - (rebalance-ctx->remaining_bits); + if (rebalance > 3<<BITRES && itheta!=16384) + mbits += rebalance - (3<<BITRES); + cm |= quant_partition(ctx, X, N, mbits, B, + lowband, LM, + MULT16_16_P15(gain,mid), fill); + } + } else { + /* This is the basic no-split case */ + q = bits2pulses(m, i, LM, b); + curr_bits = pulses2bits(m, i, LM, q); + ctx->remaining_bits -= curr_bits; + + /* Ensures we can never bust the budget */ + while (ctx->remaining_bits < 0 && q > 0) + { + ctx->remaining_bits += curr_bits; + q--; + curr_bits = pulses2bits(m, i, LM, q); + ctx->remaining_bits -= curr_bits; + } + + if (q!=0) + { + int K = get_pulses(q); + + /* Finally do the actual quantization */ + if (encode) + { + cm = alg_quant(X, N, K, spread, B, ec +#ifdef RESYNTH + , gain +#endif + ); + } else { + cm = alg_unquant(X, N, K, spread, B, ec, gain); + } + } else { + /* If there's no pulse, fill the band anyway */ + int j; + if (resynth) + { + unsigned cm_mask; + /* B can be as large as 16, so this shift might overflow an int on a + 16-bit platform; use a long to get defined behavior.*/ + cm_mask = (unsigned)(1UL<<B)-1; + fill &= cm_mask; + if (!fill) + { + for (j=0;j<N;j++) + X[j] = 0; + } else { + if (lowband == NULL) + { + /* Noise */ + for (j=0;j<N;j++) + { + ctx->seed = celt_lcg_rand(ctx->seed); + X[j] = (celt_norm)((opus_int32)ctx->seed>>20); + } + cm = cm_mask; + } else { + /* Folded spectrum */ + for (j=0;j<N;j++) + { + opus_val16 tmp; + ctx->seed = celt_lcg_rand(ctx->seed); + /* About 48 dB below the "normal" folding level */ + tmp = QCONST16(1.0f/256, 10); + tmp = (ctx->seed)&0x8000 ? tmp : -tmp; + X[j] = lowband[j]+tmp; + } + cm = fill; + } + renormalise_vector(X, N, gain); + } + } + } + } + + return cm; +} + + +/* This function is responsible for encoding and decoding a band for the mono case. */ +static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, + int N, int b, int B, celt_norm *lowband, + int LM, celt_norm *lowband_out, + opus_val16 gain, celt_norm *lowband_scratch, int fill) +{ + int N0=N; + int N_B=N; + int N_B0; + int B0=B; + int time_divide=0; + int recombine=0; + int longBlocks; + unsigned cm=0; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif + int k; + int encode; + int tf_change; + + encode = ctx->encode; + tf_change = ctx->tf_change; + + longBlocks = B0==1; + + N_B /= B; + + /* Special case for one sample */ + if (N==1) + { + return quant_band_n1(ctx, X, NULL, b, lowband_out); + } + + if (tf_change>0) + recombine = tf_change; + /* Band recombining to increase frequency resolution */ + + if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) + { + int j; + for (j=0;j<N;j++) + lowband_scratch[j] = lowband[j]; + lowband = lowband_scratch; + } + + for (k=0;k<recombine;k++) + { + static const unsigned char bit_interleave_table[16]={ + 0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3 + }; + if (encode) + haar1(X, N>>k, 1<<k); + if (lowband) + haar1(lowband, N>>k, 1<<k); + fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2; + } + B>>=recombine; + N_B<<=recombine; + + /* Increasing the time resolution */ + while ((N_B&1) == 0 && tf_change<0) + { + if (encode) + haar1(X, N_B, B); + if (lowband) + haar1(lowband, N_B, B); + fill |= fill<<B; + B <<= 1; + N_B >>= 1; + time_divide++; + tf_change++; + } + B0=B; + N_B0 = N_B; + + /* Reorganize the samples in time order instead of frequency order */ + if (B0>1) + { + if (encode) + deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks); + if (lowband) + deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks); + } + + cm = quant_partition(ctx, X, N, b, B, lowband, + LM, gain, fill); + + /* This code is used by the decoder and by the resynthesis-enabled encoder */ + if (resynth) + { + /* Undo the sample reorganization going from time order to frequency order */ + if (B0>1) + interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks); + + /* Undo time-freq changes that we did earlier */ + N_B = N_B0; + B = B0; + for (k=0;k<time_divide;k++) + { + B >>= 1; + N_B <<= 1; + cm |= cm>>B; + haar1(X, N_B, B); + } + + for (k=0;k<recombine;k++) + { + static const unsigned char bit_deinterleave_table[16]={ + 0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F, + 0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF + }; + cm = bit_deinterleave_table[cm]; + haar1(X, N0>>k, 1<<k); + } + B<<=recombine; + + /* Scale output for later folding */ + if (lowband_out) + { + int j; + opus_val16 n; + n = celt_sqrt(SHL32(EXTEND32(N0),22)); + for (j=0;j<N0;j++) + lowband_out[j] = MULT16_16_Q15(n,X[j]); + } + cm &= (1<<B)-1; + } + return cm; +} + + +/* This function is responsible for encoding and decoding a band for the stereo case. */ +static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, + int N, int b, int B, celt_norm *lowband, + int LM, celt_norm *lowband_out, + celt_norm *lowband_scratch, int fill) +{ + int imid=0, iside=0; + int inv = 0; + opus_val16 mid=0, side=0; + unsigned cm=0; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !ctx->encode; +#endif + int mbits, sbits, delta; + int itheta; + int qalloc; + struct split_ctx sctx; + int orig_fill; + int encode; + ec_ctx *ec; + + encode = ctx->encode; + ec = ctx->ec; + + /* Special case for one sample */ + if (N==1) + { + return quant_band_n1(ctx, X, Y, b, lowband_out); + } + + orig_fill = fill; + + compute_theta(ctx, &sctx, X, Y, N, &b, B, B, + LM, 1, &fill); + inv = sctx.inv; + imid = sctx.imid; + iside = sctx.iside; + delta = sctx.delta; + itheta = sctx.itheta; + qalloc = sctx.qalloc; +#ifdef OPUS_FIXED_POINT + mid = imid; + side = iside; +#else + mid = (1.f/32768)*imid; + side = (1.f/32768)*iside; +#endif + + /* This is a special case for N=2 that only works for stereo and takes + advantage of the fact that mid and side are orthogonal to encode + the side with just one bit. */ + if (N==2) + { + int c; + int sign=0; + celt_norm *x2, *y2; + mbits = b; + sbits = 0; + /* Only need one bit for the side. */ + if (itheta != 0 && itheta != 16384) + sbits = 1<<BITRES; + mbits -= sbits; + c = itheta > 8192; + ctx->remaining_bits -= qalloc+sbits; + + x2 = c ? Y : X; + y2 = c ? X : Y; + if (sbits) + { + if (encode) + { + /* Here we only need to encode a sign for the side. */ + sign = x2[0]*y2[1] - x2[1]*y2[0] < 0; + ec_enc_bits(ec, sign, 1); + } else { + sign = ec_dec_bits(ec, 1); + } + } + sign = 1-2*sign; + /* We use orig_fill here because we want to fold the side, but if + itheta==16384, we'll have cleared the low bits of fill. */ + cm = quant_band(ctx, x2, N, mbits, B, lowband, + LM, lowband_out, Q15ONE, lowband_scratch, orig_fill); + /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse), + and there's no need to worry about mixing with the other channel. */ + y2[0] = -sign*x2[1]; + y2[1] = sign*x2[0]; + if (resynth) + { + celt_norm tmp; + X[0] = MULT16_16_Q15(mid, X[0]); + X[1] = MULT16_16_Q15(mid, X[1]); + Y[0] = MULT16_16_Q15(side, Y[0]); + Y[1] = MULT16_16_Q15(side, Y[1]); + tmp = X[0]; + X[0] = SUB16(tmp,Y[0]); + Y[0] = ADD16(tmp,Y[0]); + tmp = X[1]; + X[1] = SUB16(tmp,Y[1]); + Y[1] = ADD16(tmp,Y[1]); + } + } else { + /* "Normal" split code */ + opus_int32 rebalance; + + mbits = IMAX(0, IMIN(b, (b-delta)/2)); + sbits = b-mbits; + ctx->remaining_bits -= qalloc; + + rebalance = ctx->remaining_bits; + if (mbits >= sbits) + { + /* In stereo mode, we do not apply a scaling to the mid because we need the normalized + mid for folding later. */ + cm = quant_band(ctx, X, N, mbits, B, + lowband, LM, lowband_out, + Q15ONE, lowband_scratch, fill); + rebalance = mbits - (rebalance-ctx->remaining_bits); + if (rebalance > 3<<BITRES && itheta!=0) + sbits += rebalance - (3<<BITRES); + + /* For a stereo split, the high bits of fill are always zero, so no + folding will be done to the side. */ + cm |= quant_band(ctx, Y, N, sbits, B, + NULL, LM, NULL, + side, NULL, fill>>B); + } else { + /* For a stereo split, the high bits of fill are always zero, so no + folding will be done to the side. */ + cm = quant_band(ctx, Y, N, sbits, B, + NULL, LM, NULL, + side, NULL, fill>>B); + rebalance = sbits - (rebalance-ctx->remaining_bits); + if (rebalance > 3<<BITRES && itheta!=16384) + mbits += rebalance - (3<<BITRES); + /* In stereo mode, we do not apply a scaling to the mid because we need the normalized + mid for folding later. */ + cm |= quant_band(ctx, X, N, mbits, B, + lowband, LM, lowband_out, + Q15ONE, lowband_scratch, fill); + } + } + + + /* This code is used by the decoder and by the resynthesis-enabled encoder */ + if (resynth) + { + if (N!=2) + stereo_merge(X, Y, mid, N); + if (inv) + { + int j; + for (j=0;j<N;j++) + Y[j] = -Y[j]; + } + } + return cm; +} + + +void quant_all_bands(int encode, const CELTMode *m, int start, int end, + celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, + int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, + opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed) +{ + int i; + opus_int32 remaining_bits; + const opus_int16 * OPUS_RESTRICT eBands = m->eBands; + celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2; + VARDECL(celt_norm, _norm); + celt_norm *lowband_scratch; + int B; + int M; + int lowband_offset; + int update_lowband = 1; + int C = Y_ != NULL ? 2 : 1; + int norm_offset; +#ifdef RESYNTH + int resynth = 1; +#else + int resynth = !encode; +#endif + struct band_ctx ctx; + SAVE_STACK; + + M = 1<<LM; + B = shortBlocks ? M : 1; + norm_offset = M*eBands[start]; + /* No need to allocate norm for the last band because we don't need an + output in that band. */ + ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm); + norm = _norm; + norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset; + /* We can use the last band as scratch space because we don't need that + scratch space for the last band. */ + lowband_scratch = X_+M*eBands[m->nbEBands-1]; + + lowband_offset = 0; + ctx.bandE = bandE; + ctx.ec = ec; + ctx.encode = encode; + ctx.intensity = intensity; + ctx.m = m; + ctx.seed = *seed; + ctx.spread = spread; + for (i=start;i<end;i++) + { + opus_int32 tell; + int b; + int N; + opus_int32 curr_balance; + int effective_lowband=-1; + celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y; + int tf_change=0; + unsigned x_cm; + unsigned y_cm; + int last; + + ctx.i = i; + last = (i==end-1); + + X = X_+M*eBands[i]; + if (Y_!=NULL) + Y = Y_+M*eBands[i]; + else + Y = NULL; + N = M*eBands[i+1]-M*eBands[i]; + tell = ec_tell_frac(ec); + + /* Compute how many bits we want to allocate to this band */ + if (i != start) + balance -= tell; + remaining_bits = total_bits-tell-1; + ctx.remaining_bits = remaining_bits; + if (i <= codedBands-1) + { + curr_balance = balance / IMIN(3, codedBands-i); + b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance))); + } else { + b = 0; + } + + if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0)) + lowband_offset = i; + + tf_change = tf_res[i]; + ctx.tf_change = tf_change; + if (i>=m->effEBands) + { + X=norm; + if (Y_!=NULL) + Y = norm; + lowband_scratch = NULL; + } + if (i==end-1) + lowband_scratch = NULL; + + /* Get a conservative estimate of the collapse_mask's for the bands we're + going to be folding from. */ + if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0)) + { + int fold_start; + int fold_end; + int fold_i; + /* This ensures we never repeat spectral content within one band */ + effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N); + fold_start = lowband_offset; + while(M*eBands[--fold_start] > effective_lowband+norm_offset); + fold_end = lowband_offset-1; + while(M*eBands[++fold_end] < effective_lowband+norm_offset+N); + x_cm = y_cm = 0; + fold_i = fold_start; do { + x_cm |= collapse_masks[fold_i*C+0]; + y_cm |= collapse_masks[fold_i*C+C-1]; + } while (++fold_i<fold_end); + } + /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost + always) be non-zero. */ + else + x_cm = y_cm = (1<<B)-1; + + if (dual_stereo && i==intensity) + { + int j; + + /* Switch off dual stereo to do intensity. */ + dual_stereo = 0; + if (resynth) + for (j=0;j<M*eBands[i]-norm_offset;j++) + norm[j] = HALF32(norm[j]+norm2[j]); + } + if (dual_stereo) + { + x_cm = quant_band(&ctx, X, N, b/2, B, + effective_lowband != -1 ? norm+effective_lowband : NULL, LM, + last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm); + y_cm = quant_band(&ctx, Y, N, b/2, B, + effective_lowband != -1 ? norm2+effective_lowband : NULL, LM, + last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm); + } else { + if (Y!=NULL) + { + x_cm = quant_band_stereo(&ctx, X, Y, N, b, B, + effective_lowband != -1 ? norm+effective_lowband : NULL, LM, + last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm); + } else { + x_cm = quant_band(&ctx, X, N, b, B, + effective_lowband != -1 ? norm+effective_lowband : NULL, LM, + last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm); + } + y_cm = x_cm; + } + collapse_masks[i*C+0] = (unsigned char)x_cm; + collapse_masks[i*C+C-1] = (unsigned char)y_cm; + balance += pulses[i] + tell; + + /* Update the folding position only as long as we have 1 bit/sample depth. */ + update_lowband = b>(N<<BITRES); + } + *seed = ctx.seed; + + RESTORE_STACK; +} + diff --git a/drivers/opus/celt/bands.h b/drivers/opus/celt/bands.h new file mode 100644 index 0000000000..fe1e47097a --- /dev/null +++ b/drivers/opus/celt/bands.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2008-2009 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef BANDS_H +#define BANDS_H + +#include "arch.h" +#include "opus_modes.h" +#include "entenc.h" +#include "entdec.h" +#include "rate.h" + +/** Compute the amplitude (sqrt energy) in each of the bands + * @param m Mode data + * @param X Spectrum + * @param bandE Square root of the energy for each band (returned) + */ +void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M); + +/*void compute_noise_energies(const CELTMode *m, const celt_sig *X, const opus_val16 *tonality, celt_ener *bandE);*/ + +/** Normalise each band of X such that the energy in each band is + equal to 1 + * @param m Mode data + * @param X Spectrum (returned normalised) + * @param bandE Square root of the energy for each band + */ +void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M); + +/** Denormalise each band of X to restore full amplitude + * @param m Mode data + * @param X Spectrum (returned de-normalised) + * @param bandE Square root of the energy for each band + */ +void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, + celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start, int end, int C, int M); + +#define SPREAD_NONE (0) +#define SPREAD_LIGHT (1) +#define SPREAD_NORMAL (2) +#define SPREAD_AGGRESSIVE (3) + +int spreading_decision(const CELTMode *m, celt_norm *X, int *average, + int last_decision, int *hf_average, int *tapset_decision, int update_hf, + int end, int C, int M); + +#ifdef MEASURE_NORM_MSE +void measure_norm_mse(const CELTMode *m, float *X, float *X0, float *bandE, float *bandE0, int M, int N, int C); +#endif + +void haar1(celt_norm *X, int N0, int stride); + +/** Quantisation/encoding of the residual spectrum + * @param encode flag that indicates whether we're encoding (1) or decoding (0) + * @param m Mode data + * @param start First band to process + * @param end Last band to process + 1 + * @param X Residual (normalised) + * @param Y Residual (normalised) for second channel (or NULL for mono) + * @param collapse_masks Anti-collapse tracking mask + * @param bandE Square root of the energy for each band + * @param pulses Bit allocation (per band) for PVQ + * @param shortBlocks Zero for long blocks, non-zero for short blocks + * @param spread Amount of spreading to use + * @param dual_stereo Zero for MS stereo, non-zero for dual stereo + * @param intensity First band to use intensity stereo + * @param tf_res Time-frequency resolution change + * @param total_bits Total number of bits that can be used for the frame (including the ones already spent) + * @param balance Number of unallocated bits + * @param en Entropy coder state + * @param LM log2() of the number of 2.5 subframes in the frame + * @param codedBands Last band to receive bits + 1 + * @param seed Random generator seed + */ +void quant_all_bands(int encode, const CELTMode *m, int start, int end, + celt_norm * X, celt_norm * Y, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, + int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, + opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int M, int codedBands, opus_uint32 *seed); + +void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, + int start, int end, opus_val16 *logE, opus_val16 *prev1logE, + opus_val16 *prev2logE, int *pulses, opus_uint32 seed); + +opus_uint32 celt_lcg_rand(opus_uint32 seed); + +int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev); + +#endif /* BANDS_H */ diff --git a/drivers/opus/celt/celt.c b/drivers/opus/celt/celt.c new file mode 100644 index 0000000000..b894e1e13f --- /dev/null +++ b/drivers/opus/celt/celt.c @@ -0,0 +1,223 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2010 Xiph.Org Foundation + Copyright (c) 2008 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#define CELT_C + +#include "os_support.h" +#include "mdct.h" +#include <math.h> +#include "celt.h" +#include "pitch.h" +#include "bands.h" +#include "opus_modes.h" +#include "entcode.h" +#include "quant_bands.h" +#include "rate.h" +#include "stack_alloc.h" +#include "mathops.h" +#include "float_cast.h" +#include <stdarg.h> +#include "celt_lpc.h" +#include "vq.h" + +#ifndef PACKAGE_VERSION +#define PACKAGE_VERSION "unknown" +#endif + + +int resampling_factor(opus_int32 rate) +{ + int ret; + switch (rate) + { + case 48000: + ret = 1; + break; + case 24000: + ret = 2; + break; + case 16000: + ret = 3; + break; + case 12000: + ret = 4; + break; + case 8000: + ret = 6; + break; + default: +#ifndef CUSTOM_MODES + celt_assert(0); +#endif + ret = 0; + break; + } + return ret; +} + +#ifndef OVERRIDE_COMB_FILTER_CONST +static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, + opus_val16 g10, opus_val16 g11, opus_val16 g12) +{ + opus_val32 x0, x1, x2, x3, x4; + int i; + x4 = x[-T-2]; + x3 = x[-T-1]; + x2 = x[-T]; + x1 = x[-T+1]; + for (i=0;i<N;i++) + { + x0=x[i-T+2]; + y[i] = x[i] + + MULT16_32_Q15(g10,x2) + + MULT16_32_Q15(g11,ADD32(x1,x3)) + + MULT16_32_Q15(g12,ADD32(x0,x4)); + x4=x3; + x3=x2; + x2=x1; + x1=x0; + } + +} +#endif + +void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, + opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, + const opus_val16 *window, int overlap) +{ + int i; + /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ + opus_val16 g00, g01, g02, g10, g11, g12; + opus_val32 x0, x1, x2, x3, x4; + static const opus_val16 gains[3][3] = { + {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, + {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, + {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; + + if (g0==0 && g1==0) + { + /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ + if (x!=y) + OPUS_MOVE(y, x, N); + return; + } + g00 = MULT16_16_Q15(g0, gains[tapset0][0]); + g01 = MULT16_16_Q15(g0, gains[tapset0][1]); + g02 = MULT16_16_Q15(g0, gains[tapset0][2]); + g10 = MULT16_16_Q15(g1, gains[tapset1][0]); + g11 = MULT16_16_Q15(g1, gains[tapset1][1]); + g12 = MULT16_16_Q15(g1, gains[tapset1][2]); + x1 = x[-T1+1]; + x2 = x[-T1 ]; + x3 = x[-T1-1]; + x4 = x[-T1-2]; + for (i=0;i<overlap;i++) + { + opus_val16 f; + x0=x[i-T1+2]; + f = MULT16_16_Q15(window[i],window[i]); + y[i] = x[i] + + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0]) + + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1])) + + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2])) + + MULT16_32_Q15(MULT16_16_Q15(f,g10),x2) + + MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3)) + + MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4)); + x4=x3; + x3=x2; + x2=x1; + x1=x0; + + } + if (g1==0) + { + /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */ + if (x!=y) + OPUS_MOVE(y+overlap, x+overlap, N-overlap); + return; + } + + /* Compute the part with the constant filter. */ + comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12); +} + +const signed char tf_select_table[4][8] = { + {0, -1, 0, -1, 0,-1, 0,-1}, + {0, -1, 0, -2, 1, 0, 1,-1}, + {0, -2, 0, -3, 2, 0, 1,-1}, + {0, -2, 0, -3, 3, 0, 1,-1}, +}; + + +void init_caps(const CELTMode *m,int *cap,int LM,int C) +{ + int i; + for (i=0;i<m->nbEBands;i++) + { + int N; + N=(m->eBands[i+1]-m->eBands[i])<<LM; + cap[i] = (m->cache.caps[m->nbEBands*(2*LM+C-1)+i]+64)*C*N>>2; + } +} + + + +const char *opus_strerror(int error) +{ + static const char * const error_strings[8] = { + "success", + "invalid argument", + "buffer too small", + "internal error", + "corrupted stream", + "request not implemented", + "invalid state", + "memory allocation failed" + }; + if (error > 0 || error < -7) + return "unknown error"; + else + return error_strings[-error]; +} + +const char *opus_get_version_string(void) +{ + return "libopus " PACKAGE_VERSION +#ifdef OPUS_FIXED_POINT + "-fixed" +#endif +#ifdef FUZZING + "-fuzzing" +#endif + ; +} diff --git a/drivers/opus/celt/celt.h b/drivers/opus/celt/celt.h new file mode 100644 index 0000000000..5deea1f0aa --- /dev/null +++ b/drivers/opus/celt/celt.h @@ -0,0 +1,218 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2008 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/** + @file celt.h + @brief Contains all the functions for encoding and decoding audio + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef CELT_H +#define CELT_H + +#include "opus_types.h" +#include "opus_defines.h" +#include "opus_custom.h" +#include "entenc.h" +#include "entdec.h" +#include "arch.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define CELTEncoder OpusCustomEncoder +#define CELTDecoder OpusCustomDecoder +#define CELTMode OpusCustomMode + +typedef struct { + int valid; + float tonality; + float tonality_slope; + float noisiness; + float activity; + float music_prob; + int bandwidth; +}AnalysisInfo; + +#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) + +#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr))) + +/* Encoder/decoder Requests */ + +/* Expose this option again when variable framesize actually works */ +#define OPUS_FRAMESIZE_VARIABLE 5010 /**< Optimize the frame size dynamically */ + + +#define CELT_SET_PREDICTION_REQUEST 10002 +/** Controls the use of interframe prediction. + 0=Independent frames + 1=Short term interframe prediction allowed + 2=Long term prediction allowed + */ +#define CELT_SET_PREDICTION(x) CELT_SET_PREDICTION_REQUEST, __opus_check_int(x) + +#define CELT_SET_INPUT_CLIPPING_REQUEST 10004 +#define CELT_SET_INPUT_CLIPPING(x) CELT_SET_INPUT_CLIPPING_REQUEST, __opus_check_int(x) + +#define CELT_GET_AND_CLEAR_ERROR_REQUEST 10007 +#define CELT_GET_AND_CLEAR_ERROR(x) CELT_GET_AND_CLEAR_ERROR_REQUEST, __opus_check_int_ptr(x) + +#define CELT_SET_CHANNELS_REQUEST 10008 +#define CELT_SET_CHANNELS(x) CELT_SET_CHANNELS_REQUEST, __opus_check_int(x) + + +/* Internal */ +#define CELT_SET_START_BAND_REQUEST 10010 +#define CELT_SET_START_BAND(x) CELT_SET_START_BAND_REQUEST, __opus_check_int(x) + +#define CELT_SET_END_BAND_REQUEST 10012 +#define CELT_SET_END_BAND(x) CELT_SET_END_BAND_REQUEST, __opus_check_int(x) + +#define CELT_GET_MODE_REQUEST 10015 +/** Get the CELTMode used by an encoder or decoder */ +#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x) + +#define CELT_SET_SIGNALLING_REQUEST 10016 +#define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x) + +#define CELT_SET_TONALITY_REQUEST 10018 +#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x) +#define CELT_SET_TONALITY_SLOPE_REQUEST 10020 +#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x) + +#define CELT_SET_ANALYSIS_REQUEST 10022 +#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x) + +#define OPUS_SET_LFE_REQUEST 10024 +#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x) + +#define OPUS_SET_ENERGY_MASK_REQUEST 10026 +#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) + +/* Encoder stuff */ + +int celt_encoder_get_size(int channels); + +int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc); + +int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels, + int arch); + + + +/* Decoder stuff */ + +int celt_decoder_get_size(int channels); + + +int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels); + +int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec); + +#define celt_encoder_ctl opus_custom_encoder_ctl +#define celt_decoder_ctl opus_custom_decoder_ctl + + +#ifdef CUSTOM_MODES +#define OPUS_CUSTOM_NOSTATIC +#else +#define OPUS_CUSTOM_NOSTATIC static OPUS_INLINE +#endif + +static const unsigned char trim_icdf[11] = {126, 124, 119, 109, 87, 41, 19, 9, 4, 2, 0}; +/* Probs: NONE: 21.875%, LIGHT: 6.25%, NORMAL: 65.625%, AGGRESSIVE: 6.25% */ +static const unsigned char spread_icdf[4] = {25, 23, 2, 0}; + +static const unsigned char tapset_icdf[3]={2,1,0}; + +#ifdef CUSTOM_MODES +static const unsigned char toOpusTable[20] = { + 0xE0, 0xE8, 0xF0, 0xF8, + 0xC0, 0xC8, 0xD0, 0xD8, + 0xA0, 0xA8, 0xB0, 0xB8, + 0x00, 0x00, 0x00, 0x00, + 0x80, 0x88, 0x90, 0x98, +}; + +static const unsigned char fromOpusTable[16] = { + 0x80, 0x88, 0x90, 0x98, + 0x40, 0x48, 0x50, 0x58, + 0x20, 0x28, 0x30, 0x38, + 0x00, 0x08, 0x10, 0x18 +}; + +static OPUS_INLINE int toOpus(unsigned char c) +{ + int ret=0; + if (c<0xA0) + ret = toOpusTable[c>>3]; + if (ret == 0) + return -1; + else + return ret|(c&0x7); +} + +static OPUS_INLINE int fromOpus(unsigned char c) +{ + if (c<0x80) + return -1; + else + return fromOpusTable[(c>>3)-16] | (c&0x7); +} +#endif /* CUSTOM_MODES */ + +#define COMBFILTER_MAXPERIOD 1024 +#define COMBFILTER_MINPERIOD 15 + +extern const signed char tf_select_table[4][8]; + +int resampling_factor(opus_int32 rate); + +void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, + int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip); + +void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, + opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, + const opus_val16 *window, int overlap); + +void init_caps(const CELTMode *m,int *cap,int LM,int C); + +#ifdef RESYNTH +void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch); + +void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, + celt_sig * OPUS_RESTRICT out_mem[], int C, int LM); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* CELT_H */ diff --git a/drivers/opus/celt/celt_decoder.c b/drivers/opus/celt/celt_decoder.c new file mode 100644 index 0000000000..93791feab4 --- /dev/null +++ b/drivers/opus/celt/celt_decoder.c @@ -0,0 +1,1195 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2010 Xiph.Org Foundation + Copyright (c) 2008 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#define CELT_DECODER_C + +#include "cpu_support.h" +#include "os_support.h" +#include "mdct.h" +#include <math.h> +#include "celt.h" +#include "pitch.h" +#include "bands.h" +#include "opus_modes.h" +#include "entcode.h" +#include "quant_bands.h" +#include "rate.h" +#include "stack_alloc.h" +#include "mathops.h" +#include "float_cast.h" +#include <stdarg.h> +#include "celt_lpc.h" +#include "vq.h" + +/**********************************************************************/ +/* */ +/* DECODER */ +/* */ +/**********************************************************************/ +#define DECODE_BUFFER_SIZE 2048 + +/** Decoder state + @brief Decoder state + */ +struct OpusCustomDecoder { + const OpusCustomMode *mode; + int overlap; + int channels; + int stream_channels; + + int downsample; + int start, end; + int signalling; + int arch; + + /* Everything beyond this point gets cleared on a reset */ +#define DECODER_RESET_START rng + + opus_uint32 rng; + int error; + int last_pitch_index; + int loss_count; + int postfilter_period; + int postfilter_period_old; + opus_val16 postfilter_gain; + opus_val16 postfilter_gain_old; + int postfilter_tapset; + int postfilter_tapset_old; + + celt_sig preemph_memD[2]; + + celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */ + /* opus_val16 lpc[], Size = channels*LPC_ORDER */ + /* opus_val16 oldEBands[], Size = 2*mode->nbEBands */ + /* opus_val16 oldLogE[], Size = 2*mode->nbEBands */ + /* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */ + /* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */ +}; + +int celt_decoder_get_size(int channels) +{ + const CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); + return opus_custom_decoder_get_size(mode, channels); +} + +OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int channels) +{ + int size = sizeof(struct CELTDecoder) + + (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig) + + channels*LPC_ORDER*sizeof(opus_val16) + + 4*2*mode->nbEBands*sizeof(opus_val16); + return size; +} + +#ifdef CUSTOM_MODES +CELTDecoder *opus_custom_decoder_create(const CELTMode *mode, int channels, int *error) +{ + int ret; + CELTDecoder *st = (CELTDecoder *)opus_alloc(opus_custom_decoder_get_size(mode, channels)); + ret = opus_custom_decoder_init(st, mode, channels); + if (ret != OPUS_OK) + { + opus_custom_decoder_destroy(st); + st = NULL; + } + if (error) + *error = ret; + return st; +} +#endif /* CUSTOM_MODES */ + +int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels) +{ + int ret; + ret = opus_custom_decoder_init(st, opus_custom_mode_create(48000, 960, NULL), channels); + if (ret != OPUS_OK) + return ret; + st->downsample = resampling_factor(sampling_rate); + if (st->downsample==0) + return OPUS_BAD_ARG; + else + return OPUS_OK; +} + +OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMode *mode, int channels) +{ + if (channels < 0 || channels > 2) + return OPUS_BAD_ARG; + + if (st==NULL) + return OPUS_ALLOC_FAIL; + + OPUS_CLEAR((char*)st, opus_custom_decoder_get_size(mode, channels)); + + st->mode = mode; + st->overlap = mode->overlap; + st->stream_channels = st->channels = channels; + + st->downsample = 1; + st->start = 0; + st->end = st->mode->effEBands; + st->signalling = 1; + st->arch = opus_select_arch(); + + st->loss_count = 0; + + opus_custom_decoder_ctl(st, OPUS_RESET_STATE); + + return OPUS_OK; +} + +#ifdef CUSTOM_MODES +void opus_custom_decoder_destroy(CELTDecoder *st) +{ + opus_free(st); +} +#endif /* CUSTOM_MODES */ + +static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x) +{ +#ifdef OPUS_FIXED_POINT + x = PSHR32(x, SIG_SHIFT); + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return EXTRACT16(x); +#else + return (opus_val16)x; +#endif +} + +#ifndef RESYNTH +static +#endif +void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, celt_sig * OPUS_RESTRICT scratch) +{ + int c; + int Nd; + int apply_downsampling=0; + opus_val16 coef0; + + coef0 = coef[0]; + Nd = N/downsample; + c=0; do { + int j; + celt_sig * OPUS_RESTRICT x; + opus_val16 * OPUS_RESTRICT y; + celt_sig m = mem[c]; + x =in[c]; + y = pcm+c; +#ifdef CUSTOM_MODES + if (coef[1] != 0) + { + opus_val16 coef1 = coef[1]; + opus_val16 coef3 = coef[3]; + for (j=0;j<N;j++) + { + celt_sig tmp = x[j] + m + VERY_SMALL; + m = MULT16_32_Q15(coef0, tmp) + - MULT16_32_Q15(coef1, x[j]); + tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2); + scratch[j] = tmp; + } + apply_downsampling=1; + } else +#endif + if (downsample>1) + { + /* Shortcut for the standard (non-custom modes) case */ + for (j=0;j<N;j++) + { + celt_sig tmp = x[j] + m + VERY_SMALL; + m = MULT16_32_Q15(coef0, tmp); + scratch[j] = tmp; + } + apply_downsampling=1; + } else { + /* Shortcut for the standard (non-custom modes) case */ + for (j=0;j<N;j++) + { + celt_sig tmp = x[j] + m + VERY_SMALL; + m = MULT16_32_Q15(coef0, tmp); + y[j*C] = SCALEOUT(SIG2WORD16(tmp)); + } + } + mem[c] = m; + + if (apply_downsampling) + { + /* Perform down-sampling */ + for (j=0;j<Nd;j++) + y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); + } + } while (++c<C); +} + +/** Compute the IMDCT and apply window for all sub-frames and + all channels in a frame */ +#ifndef RESYNTH +static +#endif +void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X, + celt_sig * OPUS_RESTRICT out_mem[], int C, int LM) +{ + int b, c; + int B; + int N; + int shift; + const int overlap = OVERLAP(mode); + + if (shortBlocks) + { + B = shortBlocks; + N = mode->shortMdctSize; + shift = mode->maxLM; + } else { + B = 1; + N = mode->shortMdctSize<<LM; + shift = mode->maxLM-LM; + } + c=0; do { + /* IMDCT on the interleaved the sub-frames, overlap-add is performed by the IMDCT */ + for (b=0;b<B;b++) + clt_mdct_backward(&mode->mdct, &X[b+c*N*B], out_mem[c]+N*b, mode->window, overlap, shift, B); + } while (++c<C); +} + +static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec) +{ + int i, curr, tf_select; + int tf_select_rsv; + int tf_changed; + int logp; + opus_uint32 budget; + opus_uint32 tell; + + budget = dec->storage*8; + tell = ec_tell(dec); + logp = isTransient ? 2 : 4; + tf_select_rsv = LM>0 && tell+logp+1<=budget; + budget -= tf_select_rsv; + tf_changed = curr = 0; + for (i=start;i<end;i++) + { + if (tell+logp<=budget) + { + curr ^= ec_dec_bit_logp(dec, logp); + tell = ec_tell(dec); + tf_changed |= curr; + } + tf_res[i] = curr; + logp = isTransient ? 4 : 5; + } + tf_select = 0; + if (tf_select_rsv && + tf_select_table[LM][4*isTransient+0+tf_changed] != + tf_select_table[LM][4*isTransient+2+tf_changed]) + { + tf_select = ec_dec_bit_logp(dec, 1); + } + for (i=start;i<end;i++) + { + tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]]; + } +} + +/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save + CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The + current value corresponds to a pitch of 66.67 Hz. */ +#define PLC_PITCH_LAG_MAX (720) +/* The minimum pitch lag to allow in the pitch-based PLC. This corresponds to a + pitch of 480 Hz. */ +#define PLC_PITCH_LAG_MIN (100) + +static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM) +{ + int c; + int i; + const int C = st->channels; + celt_sig *decode_mem[2]; + celt_sig *out_syn[2]; + opus_val16 *lpc; + opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE; + const OpusCustomMode *mode; + int nbEBands; + int overlap; + int start; + int downsample; + int loss_count; + int noise_based; + const opus_int16 *eBands; + VARDECL(celt_sig, scratch); + SAVE_STACK; + + mode = st->mode; + nbEBands = mode->nbEBands; + overlap = mode->overlap; + eBands = mode->eBands; + + c=0; do { + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); + out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; + } while (++c<C); + lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C); + oldBandE = lpc+C*LPC_ORDER; + oldLogE = oldBandE + 2*nbEBands; + oldLogE2 = oldLogE + 2*nbEBands; + backgroundLogE = oldLogE2 + 2*nbEBands; + + loss_count = st->loss_count; + start = st->start; + downsample = st->downsample; + noise_based = loss_count >= 5 || start != 0; + ALLOC(scratch, noise_based?N*C:N, celt_sig); + if (noise_based) + { + /* Noise-based PLC/CNG */ + celt_sig *freq; + VARDECL(celt_norm, X); + opus_uint32 seed; + opus_val16 *plcLogE; + int end; + int effEnd; + + end = st->end; + effEnd = IMAX(start, IMIN(end, mode->effEBands)); + + /* Share the interleaved signal MDCT coefficient buffer with the + deemphasis scratch buffer. */ + freq = scratch; + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ + + if (loss_count >= 5) + plcLogE = backgroundLogE; + else { + /* Energy decay */ + opus_val16 decay = loss_count==0 ? + QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT); + c=0; do + { + for (i=start;i<end;i++) + oldBandE[c*nbEBands+i] -= decay; + } while (++c<C); + plcLogE = oldBandE; + } + seed = st->rng; + for (c=0;c<C;c++) + { + for (i=start;i<effEnd;i++) + { + int j; + int boffs; + int blen; + boffs = N*c+(eBands[i]<<LM); + blen = (eBands[i+1]-eBands[i])<<LM; + for (j=0;j<blen;j++) + { + seed = celt_lcg_rand(seed); + X[boffs+j] = (celt_norm)((opus_int32)seed>>20); + } + renormalise_vector(X+boffs, blen, Q15ONE); + } + } + st->rng = seed; + + denormalise_bands(mode, X, freq, plcLogE, start, effEnd, C, 1<<LM); + + c=0; do { + int bound = eBands[effEnd]<<LM; + if (downsample!=1) + bound = IMIN(bound, N/downsample); + for (i=bound;i<N;i++) + freq[c*N+i] = 0; + } while (++c<C); + c=0; do { + OPUS_MOVE(decode_mem[c], decode_mem[c]+N, + DECODE_BUFFER_SIZE-N+(overlap>>1)); + } while (++c<C); + compute_inv_mdcts(mode, 0, freq, out_syn, C, LM); + } else { + /* Pitch-based PLC */ + const opus_val16 *window; + opus_val16 fade = Q15ONE; + int pitch_index; + VARDECL(opus_val32, etmp); + VARDECL(opus_val16, exc); + + if (loss_count == 0) + { + VARDECL( opus_val16, lp_pitch_buf ); + ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 ); + pitch_downsample(decode_mem, lp_pitch_buf, + DECODE_BUFFER_SIZE, C, st->arch); + pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf, + DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX, + PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index, st->arch); + pitch_index = PLC_PITCH_LAG_MAX-pitch_index; + st->last_pitch_index = pitch_index; + } else { + pitch_index = st->last_pitch_index; + fade = QCONST16(.8f,15); + } + + ALLOC(etmp, overlap, opus_val32); + ALLOC(exc, MAX_PERIOD, opus_val16); + window = mode->window; + c=0; do { + opus_val16 decay; + opus_val16 attenuation; + opus_val32 S1=0; + celt_sig *buf; + int extrapolation_offset; + int extrapolation_len; + int exc_length; + int j; + + buf = decode_mem[c]; + for (i=0;i<MAX_PERIOD;i++) { + exc[i] = ROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD+i], SIG_SHIFT); + } + + if (loss_count == 0) + { + opus_val32 ac[LPC_ORDER+1]; + /* Compute LPC coefficients for the last MAX_PERIOD samples before + the first loss so we can work in the excitation-filter domain. */ + _celt_autocorr(exc, ac, window, overlap, + LPC_ORDER, MAX_PERIOD, st->arch); + /* Add a noise floor of -40 dB. */ +#ifdef OPUS_FIXED_POINT + ac[0] += SHR32(ac[0],13); +#else + ac[0] *= 1.0001f; +#endif + /* Use lag windowing to stabilize the Levinson-Durbin recursion. */ + for (i=1;i<=LPC_ORDER;i++) + { + /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/ +#ifdef OPUS_FIXED_POINT + ac[i] -= MULT16_32_Q15(2*i*i, ac[i]); +#else + ac[i] -= ac[i]*(0.008f*0.008f)*i*i; +#endif + } + _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER); + } + /* We want the excitation for 2 pitch periods in order to look for a + decaying signal, but we can't get more than MAX_PERIOD. */ + exc_length = IMIN(2*pitch_index, MAX_PERIOD); + /* Initialize the LPC history with the samples just before the start + of the region for which we're computing the excitation. */ + { + opus_val16 lpc_mem[LPC_ORDER]; + for (i=0;i<LPC_ORDER;i++) + { + lpc_mem[i] = + ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT); + } + /* Compute the excitation for exc_length samples before the loss. */ + celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER, + exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem); + } + + /* Check if the waveform is decaying, and if so how fast. + We do this to avoid adding energy when concealing in a segment + with decaying energy. */ + { + opus_val32 E1=1, E2=1; + int decay_length; +#ifdef OPUS_FIXED_POINT + int shift = IMAX(0,2*celt_zlog2(celt_maxabs16(&exc[MAX_PERIOD-exc_length], exc_length))-20); +#endif + decay_length = exc_length>>1; + for (i=0;i<decay_length;i++) + { + opus_val16 e; + e = exc[MAX_PERIOD-decay_length+i]; + E1 += SHR32(MULT16_16(e, e), shift); + e = exc[MAX_PERIOD-2*decay_length+i]; + E2 += SHR32(MULT16_16(e, e), shift); + } + E1 = MIN32(E1, E2); + decay = celt_sqrt(frac_div32(SHR32(E1, 1), E2)); + } + + /* Move the decoder memory one frame to the left to give us room to + add the data for the new frame. We ignore the overlap that extends + past the end of the buffer, because we aren't going to use it. */ + OPUS_MOVE(buf, buf+N, DECODE_BUFFER_SIZE-N); + + /* Extrapolate from the end of the excitation with a period of + "pitch_index", scaling down each period by an additional factor of + "decay". */ + extrapolation_offset = MAX_PERIOD-pitch_index; + /* We need to extrapolate enough samples to cover a complete MDCT + window (including overlap/2 samples on both sides). */ + extrapolation_len = N+overlap; + /* We also apply fading if this is not the first loss. */ + attenuation = MULT16_16_Q15(fade, decay); + for (i=j=0;i<extrapolation_len;i++,j++) + { + opus_val16 tmp; + if (j >= pitch_index) { + j -= pitch_index; + attenuation = MULT16_16_Q15(attenuation, decay); + } + buf[DECODE_BUFFER_SIZE-N+i] = + SHL32(EXTEND32(MULT16_16_Q15(attenuation, + exc[extrapolation_offset+j])), SIG_SHIFT); + /* Compute the energy of the previously decoded signal whose + excitation we're copying. */ + tmp = ROUND16( + buf[DECODE_BUFFER_SIZE-MAX_PERIOD-N+extrapolation_offset+j], + SIG_SHIFT); + S1 += SHR32(MULT16_16(tmp, tmp), 8); + } + + { + opus_val16 lpc_mem[LPC_ORDER]; + /* Copy the last decoded samples (prior to the overlap region) to + synthesis filter memory so we can have a continuous signal. */ + for (i=0;i<LPC_ORDER;i++) + lpc_mem[i] = ROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT); + /* Apply the synthesis filter to convert the excitation back into + the signal domain. */ + celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER, + buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER, + lpc_mem); + } + + /* Check if the synthesis energy is higher than expected, which can + happen with the signal changes during our window. If so, + attenuate. */ + { + opus_val32 S2=0; + for (i=0;i<extrapolation_len;i++) + { + opus_val16 tmp = ROUND16(buf[DECODE_BUFFER_SIZE-N+i], SIG_SHIFT); + S2 += SHR32(MULT16_16(tmp, tmp), 8); + } + /* This checks for an "explosion" in the synthesis. */ +#ifdef OPUS_FIXED_POINT + if (!(S1 > SHR32(S2,2))) +#else + /* The float test is written this way to catch NaNs in the output + of the IIR filter at the same time. */ + if (!(S1 > 0.2f*S2)) +#endif + { + for (i=0;i<extrapolation_len;i++) + buf[DECODE_BUFFER_SIZE-N+i] = 0; + } else if (S1 < S2) + { + opus_val16 ratio = celt_sqrt(frac_div32(SHR32(S1,1)+1,S2+1)); + for (i=0;i<overlap;i++) + { + opus_val16 tmp_g = Q15ONE + - MULT16_16_Q15(window[i], Q15ONE-ratio); + buf[DECODE_BUFFER_SIZE-N+i] = + MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]); + } + for (i=overlap;i<extrapolation_len;i++) + { + buf[DECODE_BUFFER_SIZE-N+i] = + MULT16_32_Q15(ratio, buf[DECODE_BUFFER_SIZE-N+i]); + } + } + } + + /* Apply the pre-filter to the MDCT overlap for the next frame because + the post-filter will be re-applied in the decoder after the MDCT + overlap. */ + comb_filter(etmp, buf+DECODE_BUFFER_SIZE, + st->postfilter_period, st->postfilter_period, overlap, + -st->postfilter_gain, -st->postfilter_gain, + st->postfilter_tapset, st->postfilter_tapset, NULL, 0); + + /* Simulate TDAC on the concealed audio so that it blends with the + MDCT of the next frame. */ + for (i=0;i<overlap/2;i++) + { + buf[DECODE_BUFFER_SIZE+i] = + MULT16_32_Q15(window[i], etmp[overlap-1-i]) + + MULT16_32_Q15(window[overlap-i-1], etmp[i]); + } + } while (++c<C); + } + + deemphasis(out_syn, pcm, N, C, downsample, + mode->preemph, st->preemph_memD, scratch); + + st->loss_count = loss_count+1; + + RESTORE_STACK; +} + +int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec) +{ + int c, i, N; + int spread_decision; + opus_int32 bits; + ec_dec _dec; + VARDECL(celt_sig, freq); + VARDECL(celt_norm, X); + VARDECL(int, fine_quant); + VARDECL(int, pulses); + VARDECL(int, cap); + VARDECL(int, offsets); + VARDECL(int, fine_priority); + VARDECL(int, tf_res); + VARDECL(unsigned char, collapse_masks); + celt_sig *decode_mem[2]; + celt_sig *out_syn[2]; + opus_val16 *lpc; + opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE; + + int shortBlocks; + int isTransient; + int intra_ener; + const int CC = st->channels; + int LM, M; + int effEnd; + int codedBands; + int alloc_trim; + int postfilter_pitch; + opus_val16 postfilter_gain; + int intensity=0; + int dual_stereo=0; + opus_int32 total_bits; + opus_int32 balance; + opus_int32 tell; + int dynalloc_logp; + int postfilter_tapset; + int anti_collapse_rsv; + int anti_collapse_on=0; + int silence; + int C = st->stream_channels; + const OpusCustomMode *mode; + int nbEBands; + int overlap; + const opus_int16 *eBands; + ALLOC_STACK; + + mode = st->mode; + nbEBands = mode->nbEBands; + overlap = mode->overlap; + eBands = mode->eBands; + frame_size *= st->downsample; + + c=0; do { + decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap); + } while (++c<CC); + lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC); + oldBandE = lpc+CC*LPC_ORDER; + oldLogE = oldBandE + 2*nbEBands; + oldLogE2 = oldLogE + 2*nbEBands; + backgroundLogE = oldLogE2 + 2*nbEBands; + +#ifdef CUSTOM_MODES + if (st->signalling && data!=NULL) + { + int data0=data[0]; + /* Convert "standard mode" to Opus header */ + if (mode->Fs==48000 && mode->shortMdctSize==120) + { + data0 = fromOpus(data0); + if (data0<0) + return OPUS_INVALID_PACKET; + } + st->end = IMAX(1, mode->effEBands-2*(data0>>5)); + LM = (data0>>3)&0x3; + C = 1 + ((data0>>2)&0x1); + data++; + len--; + if (LM>mode->maxLM) + return OPUS_INVALID_PACKET; + if (frame_size < mode->shortMdctSize<<LM) + return OPUS_BUFFER_TOO_SMALL; + else + frame_size = mode->shortMdctSize<<LM; + } else { +#else + { +#endif + for (LM=0;LM<=mode->maxLM;LM++) + if (mode->shortMdctSize<<LM==frame_size) + break; + if (LM>mode->maxLM) + return OPUS_BAD_ARG; + } + M=1<<LM; + + if (len<0 || len>1275 || pcm==NULL) + return OPUS_BAD_ARG; + + N = M*mode->shortMdctSize; + + effEnd = st->end; + if (effEnd > mode->effEBands) + effEnd = mode->effEBands; + + if (data == NULL || len<=1) + { + celt_decode_lost(st, pcm, N, LM); + RESTORE_STACK; + return frame_size/st->downsample; + } + + if (dec == NULL) + { + ec_dec_init(&_dec,(unsigned char*)data,len); + dec = &_dec; + } + + if (C==1) + { + for (i=0;i<nbEBands;i++) + oldBandE[i]=MAX16(oldBandE[i],oldBandE[nbEBands+i]); + } + + total_bits = len*8; + tell = ec_tell(dec); + + if (tell >= total_bits) + silence = 1; + else if (tell==1) + silence = ec_dec_bit_logp(dec, 15); + else + silence = 0; + if (silence) + { + /* Pretend we've read all the remaining bits */ + tell = len*8; + dec->nbits_total+=tell-ec_tell(dec); + } + + postfilter_gain = 0; + postfilter_pitch = 0; + postfilter_tapset = 0; + if (st->start==0 && tell+16 <= total_bits) + { + if(ec_dec_bit_logp(dec, 1)) + { + int qg, octave; + octave = ec_dec_uint(dec, 6); + postfilter_pitch = (16<<octave)+ec_dec_bits(dec, 4+octave)-1; + qg = ec_dec_bits(dec, 3); + if (ec_tell(dec)+2<=total_bits) + postfilter_tapset = ec_dec_icdf(dec, tapset_icdf, 2); + postfilter_gain = QCONST16(.09375f,15)*(qg+1); + } + tell = ec_tell(dec); + } + + if (LM > 0 && tell+3 <= total_bits) + { + isTransient = ec_dec_bit_logp(dec, 3); + tell = ec_tell(dec); + } + else + isTransient = 0; + + if (isTransient) + shortBlocks = M; + else + shortBlocks = 0; + + /* Decode the global flags (first symbols in the stream) */ + intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0; + /* Get band energies */ + unquant_coarse_energy(mode, st->start, st->end, oldBandE, + intra_ener, dec, C, LM); + + ALLOC(tf_res, nbEBands, int); + tf_decode(st->start, st->end, isTransient, tf_res, LM, dec); + + tell = ec_tell(dec); + spread_decision = SPREAD_NORMAL; + if (tell+4 <= total_bits) + spread_decision = ec_dec_icdf(dec, spread_icdf, 5); + + ALLOC(cap, nbEBands, int); + + init_caps(mode,cap,LM,C); + + ALLOC(offsets, nbEBands, int); + + dynalloc_logp = 6; + total_bits<<=BITRES; + tell = ec_tell_frac(dec); + for (i=st->start;i<st->end;i++) + { + int width, quanta; + int dynalloc_loop_logp; + int boost; + width = C*(eBands[i+1]-eBands[i])<<LM; + /* quanta is 6 bits, but no more than 1 bit/sample + and no less than 1/8 bit/sample */ + quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width)); + dynalloc_loop_logp = dynalloc_logp; + boost = 0; + while (tell+(dynalloc_loop_logp<<BITRES) < total_bits && boost < cap[i]) + { + int flag; + flag = ec_dec_bit_logp(dec, dynalloc_loop_logp); + tell = ec_tell_frac(dec); + if (!flag) + break; + boost += quanta; + total_bits -= quanta; + dynalloc_loop_logp = 1; + } + offsets[i] = boost; + /* Making dynalloc more likely */ + if (boost>0) + dynalloc_logp = IMAX(2, dynalloc_logp-1); + } + + ALLOC(fine_quant, nbEBands, int); + alloc_trim = tell+(6<<BITRES) <= total_bits ? + ec_dec_icdf(dec, trim_icdf, 7) : 5; + + bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1; + anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; + bits -= anti_collapse_rsv; + + ALLOC(pulses, nbEBands, int); + ALLOC(fine_priority, nbEBands, int); + + codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, + alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, + fine_quant, fine_priority, C, LM, dec, 0, 0, 0); + + unquant_fine_energy(mode, st->start, st->end, oldBandE, fine_quant, dec, C); + + /* Decode fixed codebook */ + ALLOC(collapse_masks, C*nbEBands, unsigned char); + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ + + quant_all_bands(0, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, + NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res, + len*(8<<BITRES)-anti_collapse_rsv, balance, dec, LM, codedBands, &st->rng); + + if (anti_collapse_rsv > 0) + { + anti_collapse_on = ec_dec_bits(dec, 1); + } + + unquant_energy_finalise(mode, st->start, st->end, oldBandE, + fine_quant, fine_priority, len*8-ec_tell(dec), dec, C); + + if (anti_collapse_on) + anti_collapse(mode, X, collapse_masks, LM, C, N, + st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); + + ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */ + + if (silence) + { + for (i=0;i<C*nbEBands;i++) + oldBandE[i] = -QCONST16(28.f,DB_SHIFT); + for (i=0;i<C*N;i++) + freq[i] = 0; + } else { + /* Synthesis */ + denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M); + } + c=0; do { + OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2); + } while (++c<CC); + + c=0; do { + int bound = M*eBands[effEnd]; + if (st->downsample!=1) + bound = IMIN(bound, N/st->downsample); + for (i=bound;i<N;i++) + freq[c*N+i] = 0; + } while (++c<C); + + c=0; do { + out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N; + } while (++c<CC); + + if (CC==2&&C==1) + { + for (i=0;i<N;i++) + freq[N+i] = freq[i]; + } + if (CC==1&&C==2) + { + for (i=0;i<N;i++) + freq[i] = HALF32(ADD32(freq[i],freq[N+i])); + } + + /* Compute inverse MDCTs */ + compute_inv_mdcts(mode, shortBlocks, freq, out_syn, CC, LM); + + c=0; do { + st->postfilter_period=IMAX(st->postfilter_period, COMBFILTER_MINPERIOD); + st->postfilter_period_old=IMAX(st->postfilter_period_old, COMBFILTER_MINPERIOD); + comb_filter(out_syn[c], out_syn[c], st->postfilter_period_old, st->postfilter_period, mode->shortMdctSize, + st->postfilter_gain_old, st->postfilter_gain, st->postfilter_tapset_old, st->postfilter_tapset, + mode->window, overlap); + if (LM!=0) + comb_filter(out_syn[c]+mode->shortMdctSize, out_syn[c]+mode->shortMdctSize, st->postfilter_period, postfilter_pitch, N-mode->shortMdctSize, + st->postfilter_gain, postfilter_gain, st->postfilter_tapset, postfilter_tapset, + mode->window, overlap); + + } while (++c<CC); + st->postfilter_period_old = st->postfilter_period; + st->postfilter_gain_old = st->postfilter_gain; + st->postfilter_tapset_old = st->postfilter_tapset; + st->postfilter_period = postfilter_pitch; + st->postfilter_gain = postfilter_gain; + st->postfilter_tapset = postfilter_tapset; + if (LM!=0) + { + st->postfilter_period_old = st->postfilter_period; + st->postfilter_gain_old = st->postfilter_gain; + st->postfilter_tapset_old = st->postfilter_tapset; + } + + if (C==1) { + for (i=0;i<nbEBands;i++) + oldBandE[nbEBands+i]=oldBandE[i]; + } + + /* In case start or end were to change */ + if (!isTransient) + { + for (i=0;i<2*nbEBands;i++) + oldLogE2[i] = oldLogE[i]; + for (i=0;i<2*nbEBands;i++) + oldLogE[i] = oldBandE[i]; + for (i=0;i<2*nbEBands;i++) + backgroundLogE[i] = MIN16(backgroundLogE[i] + M*QCONST16(0.001f,DB_SHIFT), oldBandE[i]); + } else { + for (i=0;i<2*nbEBands;i++) + oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); + } + c=0; do + { + for (i=0;i<st->start;i++) + { + oldBandE[c*nbEBands+i]=0; + oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); + } + for (i=st->end;i<nbEBands;i++) + { + oldBandE[c*nbEBands+i]=0; + oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); + } + } while (++c<2); + st->rng = dec->rng; + + /* We reuse freq[] as scratch space for the de-emphasis */ + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, freq); + st->loss_count = 0; + RESTORE_STACK; + if (ec_tell(dec) > 8*len) + return OPUS_INTERNAL_ERROR; + if(ec_get_error(dec)) + st->error = 1; + return frame_size/st->downsample; +} + + +#ifdef CUSTOM_MODES + +#ifdef OPUS_FIXED_POINT +int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) +{ + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); +} + +#ifndef DISABLE_FLOAT_API +int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) +{ + int j, ret, C, N; + VARDECL(opus_int16, out); + ALLOC_STACK; + + if (pcm==NULL) + return OPUS_BAD_ARG; + + C = st->channels; + N = frame_size; + + ALLOC(out, C*N, opus_int16); + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + if (ret>0) + for (j=0;j<C*ret;j++) + pcm[j]=out[j]*(1.f/32768.f); + + RESTORE_STACK; + return ret; +} +#endif /* DISABLE_FLOAT_API */ + +#else + +int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) +{ + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); +} + +int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) +{ + int j, ret, C, N; + VARDECL(celt_sig, out); + ALLOC_STACK; + + if (pcm==NULL) + return OPUS_BAD_ARG; + + C = st->channels; + N = frame_size; + ALLOC(out, C*N, celt_sig); + + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + + if (ret>0) + for (j=0;j<C*ret;j++) + pcm[j] = FLOAT2INT16 (out[j]); + + RESTORE_STACK; + return ret; +} + +#endif +#endif /* CUSTOM_MODES */ + +int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...) +{ + va_list ap; + + va_start(ap, request); + switch (request) + { + case CELT_SET_START_BAND_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<0 || value>=st->mode->nbEBands) + goto bad_arg; + st->start = value; + } + break; + case CELT_SET_END_BAND_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<1 || value>st->mode->nbEBands) + goto bad_arg; + st->end = value; + } + break; + case CELT_SET_CHANNELS_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<1 || value>2) + goto bad_arg; + st->stream_channels = value; + } + break; + case CELT_GET_AND_CLEAR_ERROR_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (value==NULL) + goto bad_arg; + *value=st->error; + st->error = 0; + } + break; + case OPUS_GET_LOOKAHEAD_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (value==NULL) + goto bad_arg; + *value = st->overlap/st->downsample; + } + break; + case OPUS_RESET_STATE: + { + int i; + opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2; + lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels); + oldBandE = lpc+st->channels*LPC_ORDER; + oldLogE = oldBandE + 2*st->mode->nbEBands; + oldLogE2 = oldLogE + 2*st->mode->nbEBands; + OPUS_CLEAR((char*)&st->DECODER_RESET_START, + opus_custom_decoder_get_size(st->mode, st->channels)- + ((char*)&st->DECODER_RESET_START - (char*)st)); + for (i=0;i<2*st->mode->nbEBands;i++) + oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); + } + break; + case OPUS_GET_PITCH_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (value==NULL) + goto bad_arg; + *value = st->postfilter_period; + } + break; + case CELT_GET_MODE_REQUEST: + { + const CELTMode ** value = va_arg(ap, const CELTMode**); + if (value==0) + goto bad_arg; + *value=st->mode; + } + break; + case CELT_SET_SIGNALLING_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->signalling = value; + } + break; + case OPUS_GET_FINAL_RANGE_REQUEST: + { + opus_uint32 * value = va_arg(ap, opus_uint32 *); + if (value==0) + goto bad_arg; + *value=st->rng; + } + break; + default: + goto bad_request; + } + va_end(ap); + return OPUS_OK; +bad_arg: + va_end(ap); + return OPUS_BAD_ARG; +bad_request: + va_end(ap); + return OPUS_UNIMPLEMENTED; +} diff --git a/drivers/opus/celt/celt_encoder.c b/drivers/opus/celt/celt_encoder.c new file mode 100644 index 0000000000..a61e41f42d --- /dev/null +++ b/drivers/opus/celt/celt_encoder.c @@ -0,0 +1,2353 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2010 Xiph.Org Foundation + Copyright (c) 2008 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#define CELT_ENCODER_C + +#include "cpu_support.h" +#include "os_support.h" +#include "mdct.h" +#include <math.h> +#include "celt.h" +#include "pitch.h" +#include "bands.h" +#include "opus_modes.h" +#include "entcode.h" +#include "quant_bands.h" +#include "rate.h" +#include "stack_alloc.h" +#include "mathops.h" +#include "float_cast.h" +#include <stdarg.h> +#include "celt_lpc.h" +#include "vq.h" + + +/** Encoder state + @brief Encoder state + */ +struct OpusCustomEncoder { + const OpusCustomMode *mode; /**< Mode used by the encoder */ + int overlap; + int channels; + int stream_channels; + + int force_intra; + int clip; + int disable_pf; + int complexity; + int upsample; + int start, end; + + opus_int32 bitrate; + int vbr; + int signalling; + int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */ + int loss_rate; + int lsb_depth; + int variable_duration; + int lfe; + int arch; + + /* Everything beyond this point gets cleared on a reset */ +#define ENCODER_RESET_START rng + + opus_uint32 rng; + int spread_decision; + opus_val32 delayedIntra; + int tonal_average; + int lastCodedBands; + int hf_average; + int tapset_decision; + + int prefilter_period; + opus_val16 prefilter_gain; + int prefilter_tapset; +#ifdef RESYNTH + int prefilter_period_old; + opus_val16 prefilter_gain_old; + int prefilter_tapset_old; +#endif + int consec_transient; + AnalysisInfo analysis; + + opus_val32 preemph_memE[2]; + opus_val32 preemph_memD[2]; + + /* VBR-related parameters */ + opus_int32 vbr_reservoir; + opus_int32 vbr_drift; + opus_int32 vbr_offset; + opus_int32 vbr_count; + opus_val32 overlap_max; + opus_val16 stereo_saving; + int intensity; + opus_val16 *energy_mask; + opus_val16 spec_avg; + +#ifdef RESYNTH + /* +MAX_PERIOD/2 to make space for overlap */ + celt_sig syn_mem[2][2*MAX_PERIOD+MAX_PERIOD/2]; +#endif + + celt_sig in_mem[1]; /* Size = channels*mode->overlap */ + /* celt_sig prefilter_mem[], Size = channels*COMBFILTER_MAXPERIOD */ + /* opus_val16 oldBandE[], Size = channels*mode->nbEBands */ + /* opus_val16 oldLogE[], Size = channels*mode->nbEBands */ + /* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */ +}; + +int celt_encoder_get_size(int channels) +{ + CELTMode *mode = opus_custom_mode_create(48000, 960, NULL); + return opus_custom_encoder_get_size(mode, channels); +} + +OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int channels) +{ + int size = sizeof(struct CELTEncoder) + + (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */ + + channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */ + + 3*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */ + /* opus_val16 oldLogE[channels*mode->nbEBands]; */ + /* opus_val16 oldLogE2[channels*mode->nbEBands]; */ + return size; +} + +#ifdef CUSTOM_MODES +CELTEncoder *opus_custom_encoder_create(const CELTMode *mode, int channels, int *error) +{ + int ret; + CELTEncoder *st = (CELTEncoder *)opus_alloc(opus_custom_encoder_get_size(mode, channels)); + /* init will handle the NULL case */ + ret = opus_custom_encoder_init(st, mode, channels); + if (ret != OPUS_OK) + { + opus_custom_encoder_destroy(st); + st = NULL; + } + if (error) + *error = ret; + return st; +} +#endif /* CUSTOM_MODES */ + +static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode, + int channels, int arch) +{ + if (channels < 0 || channels > 2) + return OPUS_BAD_ARG; + + if (st==NULL || mode==NULL) + return OPUS_ALLOC_FAIL; + + OPUS_CLEAR((char*)st, opus_custom_encoder_get_size(mode, channels)); + + st->mode = mode; + st->overlap = mode->overlap; + st->stream_channels = st->channels = channels; + + st->upsample = 1; + st->start = 0; + st->end = st->mode->effEBands; + st->signalling = 1; + + st->arch = arch; + + st->constrained_vbr = 1; + st->clip = 1; + + st->bitrate = OPUS_BITRATE_MAX; + st->vbr = 0; + st->force_intra = 0; + st->complexity = 5; + st->lsb_depth=24; + + opus_custom_encoder_ctl(st, OPUS_RESET_STATE); + + return OPUS_OK; +} + +#ifdef CUSTOM_MODES +int opus_custom_encoder_init(CELTEncoder *st, const CELTMode *mode, int channels) +{ + return opus_custom_encoder_init_arch(st, mode, channels, opus_select_arch()); +} +#endif + +int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels, + int arch) +{ + int ret; + ret = opus_custom_encoder_init_arch(st, + opus_custom_mode_create(48000, 960, NULL), channels, arch); + if (ret != OPUS_OK) + return ret; + st->upsample = resampling_factor(sampling_rate); + return OPUS_OK; +} + +#ifdef CUSTOM_MODES +void opus_custom_encoder_destroy(CELTEncoder *st) +{ + opus_free(st); +} +#endif /* CUSTOM_MODES */ + + +static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C, + opus_val16 *tf_estimate, int *tf_chan) +{ + int i; + VARDECL(opus_val16, tmp); + opus_val32 mem0,mem1; + int is_transient = 0; + opus_int32 mask_metric = 0; + int c; + opus_val16 tf_max; + int len2; + /* Table of 6*64/x, trained on real data to minimize the average error */ + static const unsigned char inv_table[128] = { + 255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25, + 23, 22, 21, 20, 19, 18, 17, 16, 16, 15, 15, 14, 13, 13, 12, 12, + 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8, + 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, + }; + SAVE_STACK; + ALLOC(tmp, len, opus_val16); + + len2=len/2; + for (c=0;c<C;c++) + { + opus_val32 mean; + opus_int32 unmask=0; + opus_val32 norm; + opus_val16 maxE; + mem0=0; + mem1=0; + /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */ + for (i=0;i<len;i++) + { + opus_val32 x,y; + x = SHR32(in[i+c*len],SIG_SHIFT); + y = ADD32(mem0, x); +#ifdef OPUS_FIXED_POINT + mem0 = mem1 + y - SHL32(x,1); + mem1 = x - SHR32(y,1); +#else + mem0 = mem1 + y - 2*x; + mem1 = x - .5f*y; +#endif + tmp[i] = EXTRACT16(SHR32(y,2)); + /*printf("%f ", tmp[i]);*/ + } + /*printf("\n");*/ + /* First few samples are bad because we don't propagate the memory */ + for (i=0;i<12;i++) + tmp[i] = 0; + +#ifdef OPUS_FIXED_POINT + /* Normalize tmp to max range */ + { + int shift=0; + shift = 14-celt_ilog2(1+celt_maxabs16(tmp, len)); + if (shift!=0) + { + for (i=0;i<len;i++) + tmp[i] = SHL16(tmp[i], shift); + } + } +#endif + + mean=0; + mem0=0; + /* Grouping by two to reduce complexity */ + /* Forward pass to compute the post-echo threshold*/ + for (i=0;i<len2;i++) + { + opus_val16 x2 = PSHR32(MULT16_16(tmp[2*i],tmp[2*i]) + MULT16_16(tmp[2*i+1],tmp[2*i+1]),16); + mean += x2; +#ifdef OPUS_FIXED_POINT + /* FIXME: Use PSHR16() instead */ + tmp[i] = mem0 + PSHR32(x2-mem0,4); +#else + tmp[i] = mem0 + MULT16_16_P15(QCONST16(.0625f,15),x2-mem0); +#endif + mem0 = tmp[i]; + } + + mem0=0; + maxE=0; + /* Backward pass to compute the pre-echo threshold */ + for (i=len2-1;i>=0;i--) + { +#ifdef OPUS_FIXED_POINT + /* FIXME: Use PSHR16() instead */ + tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3); +#else + tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0); +#endif + mem0 = tmp[i]; + maxE = MAX16(maxE, mem0); + } + /*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/ + + /* Compute the ratio of the "frame energy" over the harmonic mean of the energy. + This essentially corresponds to a bitrate-normalized temporal noise-to-mask + ratio */ + + /* As a compromise with the old transient detector, frame energy is the + geometric mean of the energy and half the max */ +#ifdef OPUS_FIXED_POINT + /* Costs two sqrt() to avoid overflows */ + mean = MULT16_16(celt_sqrt(mean), celt_sqrt(MULT16_16(maxE,len2>>1))); +#else + mean = celt_sqrt(mean * maxE*.5*len2); +#endif + /* Inverse of the mean energy in Q15+6 */ + norm = SHL32(EXTEND32(len2),6+14)/ADD32(EPSILON,SHR32(mean,1)); + /* Compute harmonic mean discarding the unreliable boundaries + The data is smooth, so we only take 1/4th of the samples */ + unmask=0; + for (i=12;i<len2-5;i+=4) + { + int id; +#ifdef OPUS_FIXED_POINT + id = IMAX(0,IMIN(127,MULT16_32_Q15(tmp[i],norm))); /* Do not round to nearest */ +#else + id = IMAX(0,IMIN(127,(int)floor(64*norm*tmp[i]))); /* Do not round to nearest */ +#endif + unmask += inv_table[id]; + } + /*printf("%d\n", unmask);*/ + /* Normalize, compensate for the 1/4th of the sample and the factor of 6 in the inverse table */ + unmask = 64*unmask*4/(6*(len2-17)); + if (unmask>mask_metric) + { + *tf_chan = c; + mask_metric = unmask; + } + } + is_transient = mask_metric>200; + + /* Arbitrary metric for VBR boost */ + tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42); + /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ + *tf_estimate = celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),MIN16(163,tf_max)),14)-QCONST32(0.139,28))); + /*printf("%d %f\n", tf_max, mask_metric);*/ + RESTORE_STACK; +#ifdef FUZZING + is_transient = rand()&0x1; +#endif + /*printf("%d %f %d\n", is_transient, (float)*tf_estimate, tf_max);*/ + return is_transient; +} + +/* Looks for sudden increases of energy to decide whether we need to patch + the transient decision */ +int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands, + int end, int C) +{ + int i, c; + opus_val32 mean_diff=0; + opus_val16 spread_old[26]; + /* Apply an aggressive (-6 dB/Bark) spreading function to the old frame to + avoid false detection caused by irrelevant bands */ + if (C==1) + { + spread_old[0] = oldE[0]; + for (i=1;i<end;i++) + spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), oldE[i]); + } else { + spread_old[0] = MAX16(oldE[0],oldE[nbEBands]); + for (i=1;i<end;i++) + spread_old[i] = MAX16(spread_old[i-1]-QCONST16(1.0f, DB_SHIFT), + MAX16(oldE[i],oldE[i+nbEBands])); + } + for (i=end-2;i>=0;i--) + spread_old[i] = MAX16(spread_old[i], spread_old[i+1]-QCONST16(1.0f, DB_SHIFT)); + /* Compute mean increase */ + c=0; do { + for (i=2;i<end-1;i++) + { + opus_val16 x1, x2; + x1 = MAX16(0, newE[i]); + x2 = MAX16(0, spread_old[i]); + mean_diff = ADD32(mean_diff, EXTEND32(MAX16(0, SUB16(x1, x2)))); + } + } while (++c<C); + mean_diff = DIV32(mean_diff, C*(end-3)); + /*printf("%f %f %d\n", mean_diff, max_diff, count);*/ + return mean_diff > QCONST16(1.f, DB_SHIFT); +} + +/** Apply window and compute the MDCT for all sub-frames and + all channels in a frame */ +static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in, + celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample) +{ + const int overlap = OVERLAP(mode); + int N; + int B; + int shift; + int i, b, c; + if (shortBlocks) + { + B = shortBlocks; + N = mode->shortMdctSize; + shift = mode->maxLM; + } else { + B = 1; + N = mode->shortMdctSize<<LM; + shift = mode->maxLM-LM; + } + c=0; do { + for (b=0;b<B;b++) + { + /* Interleaving the sub-frames while doing the MDCTs */ + clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B); + } + } while (++c<CC); + if (CC==2&&C==1) + { + for (i=0;i<B*N;i++) + out[i] = ADD32(HALF32(out[i]), HALF32(out[B*N+i])); + } + if (upsample != 1) + { + c=0; do + { + int bound = B*N/upsample; + for (i=0;i<bound;i++) + out[c*B*N+i] *= upsample; + for (;i<B*N;i++) + out[c*B*N+i] = 0; + } while (++c<C); + } +} + + +void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, + int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip) +{ + int i; + opus_val16 coef0; + celt_sig m; + int Nu; + + coef0 = coef[0]; + + + Nu = N/upsample; + if (upsample!=1) + { + for (i=0;i<N;i++) + inp[i] = 0; + } + for (i=0;i<Nu;i++) + { + celt_sig x; + + x = SCALEIN(pcmp[CC*i]); +#ifndef OPUS_FIXED_POINT + /* Replace NaNs with zeros */ + if (!(x==x)) + x = 0; +#endif + inp[i*upsample] = x; + } + +#ifndef OPUS_FIXED_POINT + if (clip) + { + /* Clip input to avoid encoding non-portable files */ + for (i=0;i<Nu;i++) + inp[i*upsample] = MAX32(-65536.f, MIN32(65536.f,inp[i*upsample])); + } +#else + (void)clip; /* Avoids a warning about clip being unused. */ +#endif + m = *mem; +#ifdef CUSTOM_MODES + if (coef[1] != 0) + { + opus_val16 coef1 = coef[1]; + opus_val16 coef2 = coef[2]; + for (i=0;i<N;i++) + { + celt_sig x, tmp; + x = inp[i]; + /* Apply pre-emphasis */ + tmp = MULT16_16(coef2, x); + inp[i] = tmp + m; + m = MULT16_32_Q15(coef1, inp[i]) - MULT16_32_Q15(coef0, tmp); + } + } else +#endif + { + for (i=0;i<N;i++) + { + celt_sig x; + x = SHL32(inp[i], SIG_SHIFT); + /* Apply pre-emphasis */ + inp[i] = x + m; + m = - MULT16_32_Q15(coef0, x); + } + } + *mem = m; +} + + + +static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias) +{ + int i; + opus_val32 L1; + L1 = 0; + for (i=0;i<N;i++) + L1 += EXTEND32(ABS16(tmp[i])); + /* When in doubt, prefer good freq resolution */ + L1 = MAC16_32_Q15(L1, LM*bias, L1); + return L1; + +} + +static int tf_analysis(const CELTMode *m, int len, int isTransient, + int *tf_res, int lambda, celt_norm *X, int N0, int LM, + int *tf_sum, opus_val16 tf_estimate, int tf_chan) +{ + int i; + VARDECL(int, metric); + int cost0; + int cost1; + VARDECL(int, path0); + VARDECL(int, path1); + VARDECL(celt_norm, tmp); + VARDECL(celt_norm, tmp_1); + int sel; + int selcost[2]; + int tf_select=0; + opus_val16 bias; + + SAVE_STACK; + bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(.5f,14)-tf_estimate)); + /*printf("%f ", bias);*/ + + ALLOC(metric, len, int); + ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); + ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); + ALLOC(path0, len, int); + ALLOC(path1, len, int); + + *tf_sum = 0; + for (i=0;i<len;i++) + { + int j, k, N; + int narrow; + opus_val32 L1, best_L1; + int best_level=0; + N = (m->eBands[i+1]-m->eBands[i])<<LM; + /* band is too narrow to be split down to LM=-1 */ + narrow = (m->eBands[i+1]-m->eBands[i])==1; + for (j=0;j<N;j++) + tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)]; + /* Just add the right channel if we're in stereo */ + /*if (C==2) + for (j=0;j<N;j++) + tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/ + L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias); + best_L1 = L1; + /* Check the -1 case for transients */ + if (isTransient && !narrow) + { + for (j=0;j<N;j++) + tmp_1[j] = tmp[j]; + haar1(tmp_1, N>>LM, 1<<LM); + L1 = l1_metric(tmp_1, N, LM+1, bias); + if (L1<best_L1) + { + best_L1 = L1; + best_level = -1; + } + } + /*printf ("%f ", L1);*/ + for (k=0;k<LM+!(isTransient||narrow);k++) + { + int B; + + if (isTransient) + B = (LM-k-1); + else + B = k+1; + + haar1(tmp, N>>k, 1<<k); + + L1 = l1_metric(tmp, N, B, bias); + + if (L1 < best_L1) + { + best_L1 = L1; + best_level = k+1; + } + } + /*printf ("%d ", isTransient ? LM-best_level : best_level);*/ + /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */ + if (isTransient) + metric[i] = 2*best_level; + else + metric[i] = -2*best_level; + *tf_sum += (isTransient ? LM : 0) - metric[i]/2; + /* For bands that can't be split to -1, set the metric to the half-way point to avoid + biasing the decision */ + if (narrow && (metric[i]==0 || metric[i]==-2*LM)) + metric[i]-=1; + /*printf("%d ", metric[i]);*/ + } + /*printf("\n");*/ + /* Search for the optimal tf resolution, including tf_select */ + tf_select = 0; + for (sel=0;sel<2;sel++) + { + cost0 = 0; + cost1 = isTransient ? 0 : lambda; + for (i=1;i<len;i++) + { + int curr0, curr1; + curr0 = IMIN(cost0, cost1 + lambda); + curr1 = IMIN(cost0 + lambda, cost1); + cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]); + cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]); + } + cost0 = IMIN(cost0, cost1); + selcost[sel]=cost0; + } + /* For now, we're conservative and only allow tf_select=1 for transients. + * If tests confirm it's useful for non-transients, we could allow it. */ + if (selcost[1]<selcost[0] && isTransient) + tf_select=1; + cost0 = 0; + cost1 = isTransient ? 0 : lambda; + /* Viterbi forward pass */ + for (i=1;i<len;i++) + { + int curr0, curr1; + int from0, from1; + + from0 = cost0; + from1 = cost1 + lambda; + if (from0 < from1) + { + curr0 = from0; + path0[i]= 0; + } else { + curr0 = from1; + path0[i]= 1; + } + + from0 = cost0 + lambda; + from1 = cost1; + if (from0 < from1) + { + curr1 = from0; + path1[i]= 0; + } else { + curr1 = from1; + path1[i]= 1; + } + cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]); + cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]); + } + tf_res[len-1] = cost0 < cost1 ? 0 : 1; + /* Viterbi backward pass to check the decisions */ + for (i=len-2;i>=0;i--) + { + if (tf_res[i+1] == 1) + tf_res[i] = path1[i+1]; + else + tf_res[i] = path0[i+1]; + } + /*printf("%d %f\n", *tf_sum, tf_estimate);*/ + RESTORE_STACK; +#ifdef FUZZING + tf_select = rand()&0x1; + tf_res[0] = rand()&0x1; + for (i=1;i<len;i++) + tf_res[i] = tf_res[i-1] ^ ((rand()&0xF) == 0); +#endif + return tf_select; +} + +static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc) +{ + int curr, i; + int tf_select_rsv; + int tf_changed; + int logp; + opus_uint32 budget; + opus_uint32 tell; + budget = enc->storage*8; + tell = ec_tell(enc); + logp = isTransient ? 2 : 4; + /* Reserve space to code the tf_select decision. */ + tf_select_rsv = LM>0 && tell+logp+1 <= budget; + budget -= tf_select_rsv; + curr = tf_changed = 0; + for (i=start;i<end;i++) + { + if (tell+logp<=budget) + { + ec_enc_bit_logp(enc, tf_res[i] ^ curr, logp); + tell = ec_tell(enc); + curr = tf_res[i]; + tf_changed |= curr; + } + else + tf_res[i] = curr; + logp = isTransient ? 4 : 5; + } + /* Only code tf_select if it would actually make a difference. */ + if (tf_select_rsv && + tf_select_table[LM][4*isTransient+0+tf_changed]!= + tf_select_table[LM][4*isTransient+2+tf_changed]) + ec_enc_bit_logp(enc, tf_select, 1); + else + tf_select = 0; + for (i=start;i<end;i++) + tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]]; + /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/ +} + + +static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, + const opus_val16 *bandLogE, int end, int LM, int C, int N0, + AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, + int intensity, opus_val16 surround_trim) +{ + int i; + opus_val32 diff=0; + int c; + int trim_index = 5; + opus_val16 trim = QCONST16(5.f, 8); + opus_val16 logXC, logXC2; + if (C==2) + { + opus_val16 sum = 0; /* Q10 */ + opus_val16 minXC; /* Q10 */ + /* Compute inter-channel correlation for low frequencies */ + for (i=0;i<8;i++) + { + int j; + opus_val32 partial = 0; + for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) + partial = MAC16_16(partial, X[j], X[N0+j]); + sum = ADD16(sum, EXTRACT16(SHR32(partial, 18))); + } + sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum); + sum = MIN16(QCONST16(1.f, 10), ABS16(sum)); + minXC = sum; + for (i=8;i<intensity;i++) + { + int j; + opus_val32 partial = 0; + for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) + partial = MAC16_16(partial, X[j], X[N0+j]); + minXC = MIN16(minXC, ABS16(EXTRACT16(SHR32(partial, 18)))); + } + minXC = MIN16(QCONST16(1.f, 10), ABS16(minXC)); + /*printf ("%f\n", sum);*/ + if (sum > QCONST16(.995f,10)) + trim_index-=4; + else if (sum > QCONST16(.92f,10)) + trim_index-=3; + else if (sum > QCONST16(.85f,10)) + trim_index-=2; + else if (sum > QCONST16(.8f,10)) + trim_index-=1; + /* mid-side savings estimations based on the LF average*/ + logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum)); + /* mid-side savings estimations based on min correlation */ + logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC))); +#ifdef OPUS_FIXED_POINT + /* Compensate for Q20 vs Q14 input and convert output to Q8 */ + logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8); + logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8); +#endif + + trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC)); + *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2)); + } + + /* Estimate spectral tilt */ + c=0; do { + for (i=0;i<end-1;i++) + { + diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end); + } + } while (++c<C); + diff /= C*(end-1); + /*printf("%f\n", diff);*/ + if (diff > QCONST16(2.f, DB_SHIFT)) + trim_index--; + if (diff > QCONST16(8.f, DB_SHIFT)) + trim_index--; + if (diff < -QCONST16(4.f, DB_SHIFT)) + trim_index++; + if (diff < -QCONST16(10.f, DB_SHIFT)) + trim_index++; + trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); + trim -= SHR16(surround_trim, DB_SHIFT-8); + trim -= 2*SHR16(tf_estimate, 14-8); +#ifndef DISABLE_FLOAT_API + if (analysis->valid) + { + trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), + (opus_val16)(QCONST16(2.f, 8)*(analysis->tonality_slope+.05f)))); + } +#endif + +#ifdef OPUS_FIXED_POINT + trim_index = PSHR32(trim, 8); +#else + trim_index = (int)floor(.5f+trim); +#endif + if (trim_index<0) + trim_index = 0; + if (trim_index>10) + trim_index = 10; + /*printf("%d\n", trim_index);*/ +#ifdef FUZZING + trim_index = rand()%11; +#endif + return trim_index; +} + +static int stereo_analysis(const CELTMode *m, const celt_norm *X, + int LM, int N0) +{ + int i; + int thetas; + opus_val32 sumLR = EPSILON, sumMS = EPSILON; + + /* Use the L1 norm to model the entropy of the L/R signal vs the M/S signal */ + for (i=0;i<13;i++) + { + int j; + for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) + { + opus_val32 L, R, M, S; + /* We cast to 32-bit first because of the -32768 case */ + L = EXTEND32(X[j]); + R = EXTEND32(X[N0+j]); + M = ADD32(L, R); + S = SUB32(L, R); + sumLR = ADD32(sumLR, ADD32(ABS32(L), ABS32(R))); + sumMS = ADD32(sumMS, ADD32(ABS32(M), ABS32(S))); + } + } + sumMS = MULT16_32_Q15(QCONST16(0.707107f, 15), sumMS); + thetas = 13; + /* We don't need thetas for lower bands with LM<=1 */ + if (LM<=1) + thetas -= 8; + return MULT16_32_Q15((m->eBands[13]<<(LM+1))+thetas, sumMS) + > MULT16_32_Q15(m->eBands[13]<<(LM+1), sumLR); +} + +static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, + int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, + int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, + int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc) +{ + int i, c; + opus_int32 tot_boost=0; + opus_val16 maxDepth; + VARDECL(opus_val16, follower); + VARDECL(opus_val16, noise_floor); + SAVE_STACK; + ALLOC(follower, C*nbEBands, opus_val16); + ALLOC(noise_floor, C*nbEBands, opus_val16); + for (i=0;i<nbEBands;i++) + offsets[i] = 0; + /* Dynamic allocation code */ + maxDepth=-QCONST16(31.9f, DB_SHIFT); + for (i=0;i<end;i++) + { + /* Noise floor must take into account eMeans, the depth, the width of the bands + and the preemphasis filter (approx. square of bark band ID) */ + noise_floor[i] = MULT16_16(QCONST16(0.0625f, DB_SHIFT),logN[i]) + +QCONST16(.5f,DB_SHIFT)+SHL16(9-lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6) + +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5)); + } + c=0;do + { + for (i=0;i<end;i++) + maxDepth = MAX16(maxDepth, bandLogE[c*nbEBands+i]-noise_floor[i]); + } while (++c<C); + /* Make sure that dynamic allocation can't make us bust the budget */ + if (effectiveBytes > 50 && LM>=1 && !lfe) + { + int last=0; + c=0;do + { + follower[c*nbEBands] = bandLogE2[c*nbEBands]; + for (i=1;i<end;i++) + { + /* The last band to be at least 3 dB higher than the previous one + is the last we'll consider. Otherwise, we run into problems on + bandlimited signals. */ + if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT)) + last=i; + follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]); + } + for (i=last-1;i>=0;i--) + follower[c*nbEBands+i] = MIN16(follower[c*nbEBands+i], MIN16(follower[c*nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i])); + for (i=0;i<end;i++) + follower[c*nbEBands+i] = MAX16(follower[c*nbEBands+i], noise_floor[i]); + } while (++c<C); + if (C==2) + { + for (i=start;i<end;i++) + { + /* Consider 24 dB "cross-talk" */ + follower[nbEBands+i] = MAX16(follower[nbEBands+i], follower[ i]-QCONST16(4.f,DB_SHIFT)); + follower[ i] = MAX16(follower[ i], follower[nbEBands+i]-QCONST16(4.f,DB_SHIFT)); + follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[nbEBands+i]-follower[nbEBands+i])); + } + } else { + for (i=start;i<end;i++) + { + follower[i] = MAX16(0, bandLogE[i]-follower[i]); + } + } + for (i=start;i<end;i++) + follower[i] = MAX16(follower[i], surround_dynalloc[i]); + /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */ + if ((!vbr || constrained_vbr)&&!isTransient) + { + for (i=start;i<end;i++) + follower[i] = HALF16(follower[i]); + } + for (i=start;i<end;i++) + { + int width; + int boost; + int boost_bits; + + if (i<8) + follower[i] *= 2; + if (i>=12) + follower[i] = HALF16(follower[i]); + follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT)); + + width = C*(eBands[i+1]-eBands[i])<<LM; + if (width<6) + { + boost = (int)SHR32(EXTEND32(follower[i]),DB_SHIFT); + boost_bits = boost*width<<BITRES; + } else if (width > 48) { + boost = (int)SHR32(EXTEND32(follower[i])*8,DB_SHIFT); + boost_bits = (boost*width<<BITRES)/8; + } else { + boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT); + boost_bits = boost*6<<BITRES; + } + /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */ + if ((!vbr || (constrained_vbr&&!isTransient)) + && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4) + { + opus_int32 cap = ((effectiveBytes/4)<<BITRES<<3); + offsets[i] = cap-tot_boost; + tot_boost = cap; + break; + } else { + offsets[i] = boost; + tot_boost += boost_bits; + } + } + } + *tot_boost_ = tot_boost; + RESTORE_STACK; + return maxDepth; +} + + +static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem, int CC, int N, + int prefilter_tapset, int *pitch, opus_val16 *gain, int *qgain, int enabled, int nbAvailableBytes) +{ + int c; + VARDECL(celt_sig, _pre); + celt_sig *pre[2]; + const CELTMode *mode; + int pitch_index; + opus_val16 gain1; + opus_val16 pf_threshold; + int pf_on; + int qg; + SAVE_STACK; + + mode = st->mode; + ALLOC(_pre, CC*(N+COMBFILTER_MAXPERIOD), celt_sig); + + pre[0] = _pre; + pre[1] = _pre + (N+COMBFILTER_MAXPERIOD); + + + c=0; do { + OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD); + OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N); + } while (++c<CC); + + if (enabled) + { + VARDECL(opus_val16, pitch_buf); + ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16); + + pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch); + /* Don't search for the fir last 1.5 octave of the range because + there's too many false-positives due to short-term correlation */ + pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N, + COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index, + st->arch); + pitch_index = COMBFILTER_MAXPERIOD-pitch_index; + + gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD, + N, &pitch_index, st->prefilter_period, st->prefilter_gain); + if (pitch_index > COMBFILTER_MAXPERIOD-2) + pitch_index = COMBFILTER_MAXPERIOD-2; + gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); + /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/ + if (st->loss_rate>2) + gain1 = HALF32(gain1); + if (st->loss_rate>4) + gain1 = HALF32(gain1); + if (st->loss_rate>8) + gain1 = 0; + } else { + gain1 = 0; + pitch_index = COMBFILTER_MINPERIOD; + } + + /* Gain threshold for enabling the prefilter/postfilter */ + pf_threshold = QCONST16(.2f,15); + + /* Adjusting the threshold based on rate and continuity */ + if (abs(pitch_index-st->prefilter_period)*10>pitch_index) + pf_threshold += QCONST16(.2f,15); + if (nbAvailableBytes<25) + pf_threshold += QCONST16(.1f,15); + if (nbAvailableBytes<35) + pf_threshold += QCONST16(.1f,15); + if (st->prefilter_gain > QCONST16(.4f,15)) + pf_threshold -= QCONST16(.1f,15); + if (st->prefilter_gain > QCONST16(.55f,15)) + pf_threshold -= QCONST16(.1f,15); + + /* Hard threshold at 0.2 */ + pf_threshold = MAX16(pf_threshold, QCONST16(.2f,15)); + if (gain1<pf_threshold) + { + gain1 = 0; + pf_on = 0; + qg = 0; + } else { + /*This block is not gated by a total bits check only because + of the nbAvailableBytes check above.*/ + if (ABS16(gain1-st->prefilter_gain)<QCONST16(.1f,15)) + gain1=st->prefilter_gain; + +#ifdef OPUS_FIXED_POINT + qg = ((gain1+1536)>>10)/3-1; +#else + qg = (int)floor(.5f+gain1*32/3)-1; +#endif + qg = IMAX(0, IMIN(7, qg)); + gain1 = QCONST16(0.09375f,15)*(qg+1); + pf_on = 1; + } + /*printf("%d %f\n", pitch_index, gain1);*/ + + c=0; do { + int offset = mode->shortMdctSize-st->overlap; + st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); + OPUS_COPY(in+c*(N+st->overlap), st->in_mem+c*(st->overlap), st->overlap); + if (offset) + comb_filter(in+c*(N+st->overlap)+st->overlap, pre[c]+COMBFILTER_MAXPERIOD, + st->prefilter_period, st->prefilter_period, offset, -st->prefilter_gain, -st->prefilter_gain, + st->prefilter_tapset, st->prefilter_tapset, NULL, 0); + + comb_filter(in+c*(N+st->overlap)+st->overlap+offset, pre[c]+COMBFILTER_MAXPERIOD+offset, + st->prefilter_period, pitch_index, N-offset, -st->prefilter_gain, -gain1, + st->prefilter_tapset, prefilter_tapset, mode->window, st->overlap); + OPUS_COPY(st->in_mem+c*(st->overlap), in+c*(N+st->overlap)+N, st->overlap); + + if (N>COMBFILTER_MAXPERIOD) + { + OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD); + } else { + OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N); + OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N); + } + } while (++c<CC); + + RESTORE_STACK; + *gain = gain1; + *pitch = pitch_index; + *qgain = qg; + return pf_on; +} + +static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 base_target, + int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity, + int constrained_vbr, opus_val16 stereo_saving, int tot_boost, + opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth, + int variable_duration, int lfe, int has_surround_mask, opus_val16 surround_masking, + opus_val16 temporal_vbr) +{ + /* The target rate in 8th bits per frame */ + opus_int32 target; + int coded_bins; + int coded_bands; + opus_val16 tf_calibration; + int nbEBands; + const opus_int16 *eBands; + + nbEBands = mode->nbEBands; + eBands = mode->eBands; + + coded_bands = lastCodedBands ? lastCodedBands : nbEBands; + coded_bins = eBands[coded_bands]<<LM; + if (C==2) + coded_bins += eBands[IMIN(intensity, coded_bands)]<<LM; + + target = base_target; + + /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/ +#ifndef DISABLE_FLOAT_API + if (analysis->valid && analysis->activity<.4) + target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity)); +#endif + /* Stereo savings */ + if (C==2) + { + int coded_stereo_bands; + int coded_stereo_dof; + opus_val16 max_frac; + coded_stereo_bands = IMIN(intensity, coded_bands); + coded_stereo_dof = (eBands[coded_stereo_bands]<<LM)-coded_stereo_bands; + /* Maximum fraction of the bits we can save if the signal is mono. */ + max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins); + stereo_saving = MIN16(stereo_saving, QCONST16(1.f, 8)); + /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/ + target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target), + SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8)); + } + /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */ + target += tot_boost-(16<<LM); + /* Apply transient boost, compensating for average boost. */ + tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ? + QCONST16(0.02f,14) : QCONST16(0.04f,14); + target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1); + +#ifndef DISABLE_FLOAT_API + /* Apply tonality boost */ + if (analysis->valid && !lfe) + { + opus_int32 tonal_target; + float tonal; + + /* Tonality boost (compensating for the average). */ + tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f; + tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal); + if (pitch_change) + tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f); + /*printf("%f %f ", analysis->tonality, tonal);*/ + target = tonal_target; + } +#endif + + if (has_surround_mask&&!lfe) + { + opus_int32 surround_target = target + (opus_int32)SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT); + /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/ + target = IMAX(target/4, surround_target); + } + + { + opus_int32 floor_depth; + int bins; + bins = eBands[nbEBands-2]<<LM; + /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/ + floor_depth = (opus_int32)SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT); + floor_depth = IMAX(floor_depth, target>>2); + target = IMIN(target, floor_depth); + /*printf("%f %d\n", maxDepth, floor_depth);*/ + } + + if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000)) + { + opus_val16 rate_factor; +#ifdef OPUS_FIXED_POINT + rate_factor = MAX16(0,(bitrate-32000)); +#else + rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000)); +#endif + if (constrained_vbr) + rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15)); + target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target); + + } + + if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14)) + { + opus_val16 amount; + opus_val16 tvbr_factor; + amount = MULT16_16_Q15(QCONST16(.0000031f, 30), IMAX(0, IMIN(32000, 96000-bitrate))); + tvbr_factor = SHR32(MULT16_16(temporal_vbr, amount), DB_SHIFT); + target += (opus_int32)MULT16_32_Q15(tvbr_factor, target); + } + + /* Don't allow more than doubling the rate */ + target = IMIN(2*base_target, target); + + return target; +} + +int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc) +{ + int i, c, N; + opus_int32 bits; + ec_enc _enc; + VARDECL(celt_sig, in); + VARDECL(celt_sig, freq); + VARDECL(celt_norm, X); + VARDECL(celt_ener, bandE); + VARDECL(opus_val16, bandLogE); + VARDECL(opus_val16, bandLogE2); + VARDECL(int, fine_quant); + VARDECL(opus_val16, error); + VARDECL(int, pulses); + VARDECL(int, cap); + VARDECL(int, offsets); + VARDECL(int, fine_priority); + VARDECL(int, tf_res); + VARDECL(unsigned char, collapse_masks); + celt_sig *prefilter_mem; + opus_val16 *oldBandE, *oldLogE, *oldLogE2; + int shortBlocks=0; + int isTransient=0; + const int CC = st->channels; + const int C = st->stream_channels; + int LM, M; + int tf_select; + int nbFilledBytes, nbAvailableBytes; + int effEnd; + int codedBands; + int tf_sum; + int alloc_trim; + int pitch_index=COMBFILTER_MINPERIOD; + opus_val16 gain1 = 0; + int dual_stereo=0; + int effectiveBytes; + int dynalloc_logp; + opus_int32 vbr_rate; + opus_int32 total_bits; + opus_int32 total_boost; + opus_int32 balance; + opus_int32 tell; + int prefilter_tapset=0; + int pf_on; + int anti_collapse_rsv; + int anti_collapse_on=0; + int silence=0; + int tf_chan = 0; + opus_val16 tf_estimate; + int pitch_change=0; + opus_int32 tot_boost; + opus_val32 sample_max; + opus_val16 maxDepth; + const OpusCustomMode *mode; + int nbEBands; + int overlap; + const opus_int16 *eBands; + int secondMdct; + int signalBandwidth; + int transient_got_disabled=0; + opus_val16 surround_masking=0; + opus_val16 temporal_vbr=0; + opus_val16 surround_trim = 0; + opus_int32 equiv_rate = 510000; + VARDECL(opus_val16, surround_dynalloc); + ALLOC_STACK; + + mode = st->mode; + nbEBands = mode->nbEBands; + overlap = mode->overlap; + eBands = mode->eBands; + tf_estimate = 0; + if (nbCompressedBytes<2 || pcm==NULL) + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } + + frame_size *= st->upsample; + for (LM=0;LM<=mode->maxLM;LM++) + if (mode->shortMdctSize<<LM==frame_size) + break; + if (LM>mode->maxLM) + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } + M=1<<LM; + N = M*mode->shortMdctSize; + + prefilter_mem = st->in_mem+CC*(st->overlap); + oldBandE = (opus_val16*)(st->in_mem+CC*(st->overlap+COMBFILTER_MAXPERIOD)); + oldLogE = oldBandE + CC*nbEBands; + oldLogE2 = oldLogE + CC*nbEBands; + + if (enc==NULL) + { + tell=1; + nbFilledBytes=0; + } else { + tell=ec_tell(enc); + nbFilledBytes=(tell+4)>>3; + } + +#ifdef CUSTOM_MODES + if (st->signalling && enc==NULL) + { + int tmp = (mode->effEBands-st->end)>>1; + st->end = IMAX(1, mode->effEBands-tmp); + compressed[0] = tmp<<5; + compressed[0] |= LM<<3; + compressed[0] |= (C==2)<<2; + /* Convert "standard mode" to Opus header */ + if (mode->Fs==48000 && mode->shortMdctSize==120) + { + int c0 = toOpus(compressed[0]); + if (c0<0) + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } + compressed[0] = c0; + } + compressed++; + nbCompressedBytes--; + } +#else + celt_assert(st->signalling==0); +#endif + + /* Can't produce more than 1275 output bytes */ + nbCompressedBytes = IMIN(nbCompressedBytes,1275); + nbAvailableBytes = nbCompressedBytes - nbFilledBytes; + + if (st->vbr && st->bitrate!=OPUS_BITRATE_MAX) + { + opus_int32 den=mode->Fs>>BITRES; + vbr_rate=(st->bitrate*frame_size+(den>>1))/den; +#ifdef CUSTOM_MODES + if (st->signalling) + vbr_rate -= 8<<BITRES; +#endif + effectiveBytes = vbr_rate>>(3+BITRES); + } else { + opus_int32 tmp; + vbr_rate = 0; + tmp = st->bitrate*frame_size; + if (tell>1) + tmp += tell; + if (st->bitrate!=OPUS_BITRATE_MAX) + nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes, + (tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling)); + effectiveBytes = nbCompressedBytes; + } + if (st->bitrate != OPUS_BITRATE_MAX) + equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50); + + if (enc==NULL) + { + ec_enc_init(&_enc, compressed, nbCompressedBytes); + enc = &_enc; + } + + if (vbr_rate>0) + { + /* Computes the max bit-rate allowed in VBR mode to avoid violating the + target rate and buffering. + We must do this up front so that bust-prevention logic triggers + correctly if we don't have enough bits. */ + if (st->constrained_vbr) + { + opus_int32 vbr_bound; + opus_int32 max_allowed; + /* We could use any multiple of vbr_rate as bound (depending on the + delay). + This is clamped to ensure we use at least two bytes if the encoder + was entirely empty, but to allow 0 in hybrid mode. */ + vbr_bound = vbr_rate; + max_allowed = IMIN(IMAX(tell==1?2:0, + (vbr_rate+vbr_bound-st->vbr_reservoir)>>(BITRES+3)), + nbAvailableBytes); + if(max_allowed < nbAvailableBytes) + { + nbCompressedBytes = nbFilledBytes+max_allowed; + nbAvailableBytes = max_allowed; + ec_enc_shrink(enc, nbCompressedBytes); + } + } + } + total_bits = nbCompressedBytes*8; + + effEnd = st->end; + if (effEnd > mode->effEBands) + effEnd = mode->effEBands; + + ALLOC(in, CC*(N+st->overlap), celt_sig); + + sample_max=MAX32(st->overlap_max, celt_maxabs16(pcm, C*(N-overlap)/st->upsample)); + st->overlap_max=celt_maxabs16(pcm+C*(N-overlap)/st->upsample, C*overlap/st->upsample); + sample_max=MAX32(sample_max, st->overlap_max); +#ifdef OPUS_FIXED_POINT + silence = (sample_max==0); +#else + silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth)); +#endif +#ifdef FUZZING + if ((rand()&0x3F)==0) + silence = 1; +#endif + if (tell==1) + ec_enc_bit_logp(enc, silence, 15); + else + silence=0; + if (silence) + { + /*In VBR mode there is no need to send more than the minimum. */ + if (vbr_rate>0) + { + effectiveBytes=nbCompressedBytes=IMIN(nbCompressedBytes, nbFilledBytes+2); + total_bits=nbCompressedBytes*8; + nbAvailableBytes=2; + ec_enc_shrink(enc, nbCompressedBytes); + } + /* Pretend we've filled all the remaining bits with zeros + (that's what the initialiser did anyway) */ + tell = nbCompressedBytes*8; + enc->nbits_total+=tell-ec_tell(enc); + } + c=0; do { + celt_preemphasis(pcm+c, in+c*(N+st->overlap)+st->overlap, N, CC, st->upsample, + mode->preemph, st->preemph_memE+c, st->clip); + } while (++c<CC); + + + + /* Find pitch period and gain */ + { + int enabled; + int qg; + enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && st->start==0 && !silence && !st->disable_pf + && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE); + + prefilter_tapset = st->tapset_decision; + pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes); + if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && (!st->analysis.valid || st->analysis.tonality > .3) + && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period)) + pitch_change = 1; + if (pf_on==0) + { + if(st->start==0 && tell+16<=total_bits) + ec_enc_bit_logp(enc, 0, 1); + } else { + /*This block is not gated by a total bits check only because + of the nbAvailableBytes check above.*/ + int octave; + ec_enc_bit_logp(enc, 1, 1); + pitch_index += 1; + octave = EC_ILOG(pitch_index)-5; + ec_enc_uint(enc, octave, 6); + ec_enc_bits(enc, pitch_index-(16<<octave), 4+octave); + pitch_index -= 1; + ec_enc_bits(enc, qg, 3); + ec_enc_icdf(enc, prefilter_tapset, tapset_icdf, 2); + } + } + + isTransient = 0; + shortBlocks = 0; + if (st->complexity >= 1 && !st->lfe) + { + isTransient = transient_analysis(in, N+st->overlap, CC, + &tf_estimate, &tf_chan); + } + if (LM>0 && ec_tell(enc)+3<=total_bits) + { + if (isTransient) + shortBlocks = M; + } else { + isTransient = 0; + transient_got_disabled=1; + } + + ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */ + ALLOC(bandE,nbEBands*CC, celt_ener); + ALLOC(bandLogE,nbEBands*CC, opus_val16); + + secondMdct = shortBlocks && st->complexity>=8; + ALLOC(bandLogE2, C*nbEBands, opus_val16); + if (secondMdct) + { + compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample); + compute_band_energies(mode, freq, bandE, effEnd, C, M); + amp2Log2(mode, effEnd, st->end, bandE, bandLogE2, C); + for (i=0;i<C*nbEBands;i++) + bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); + } + + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample); + if (CC==2&&C==1) + tf_chan = 0; + compute_band_energies(mode, freq, bandE, effEnd, C, M); + + if (st->lfe) + { + for (i=2;i<st->end;i++) + { + bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0])); + bandE[i] = MAX32(bandE[i], EPSILON); + } + } + amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); + + ALLOC(surround_dynalloc, C*nbEBands, opus_val16); + for(i=0;i<st->end;i++) + surround_dynalloc[i] = 0; + /* This computes how much masking takes place between surround channels */ + if (st->start==0&&st->energy_mask&&!st->lfe) + { + int mask_end; + int midband; + int count_dynalloc; + opus_val32 mask_avg=0; + opus_val32 diff=0; + int count=0; + mask_end = IMAX(2,st->lastCodedBands); + for (c=0;c<C;c++) + { + for(i=0;i<mask_end;i++) + { + opus_val16 mask; + mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i], + QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); + if (mask > 0) + mask = HALF16(mask); + mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]); + count += eBands[i+1]-eBands[i]; + diff += MULT16_16(mask, 1+2*i-mask_end); + } + } + mask_avg = DIV32_16(mask_avg,count); + mask_avg += QCONST16(.2f, DB_SHIFT); + diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); + /* Again, being conservative */ + diff = HALF32(diff); + diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT)); + /* Find the band that's in the middle of the coded spectrum */ + for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++); + count_dynalloc=0; + for(i=0;i<mask_end;i++) + { + opus_val32 lin; + opus_val16 unmask; + lin = mask_avg + diff*(i-midband); + if (C==2) + unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]); + else + unmask = st->energy_mask[i]; + unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT)); + unmask -= lin; + if (unmask > QCONST16(.25f, DB_SHIFT)) + { + surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT); + count_dynalloc++; + } + } + if (count_dynalloc>=3) + { + /* If we need dynalloc in many bands, it's probably because our + initial masking rate was too low. */ + mask_avg += QCONST16(.25f, DB_SHIFT); + if (mask_avg>0) + { + /* Something went really wrong in the original calculations, + disabling masking. */ + mask_avg = 0; + diff = 0; + for(i=0;i<mask_end;i++) + surround_dynalloc[i] = 0; + } else { + for(i=0;i<mask_end;i++) + surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT)); + } + } + mask_avg += QCONST16(.2f, DB_SHIFT); + /* Convert to 1/64th units used for the trim */ + surround_trim = 64*diff; + /*printf("%d %d ", mask_avg, surround_trim);*/ + surround_masking = mask_avg; + } + /* Temporal VBR (but not for LFE) */ + if (!st->lfe) + { + opus_val16 follow=-QCONST16(10.0f,DB_SHIFT); + opus_val32 frame_avg=0; + opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; + for(i=st->start;i<st->end;i++) + { + follow = MAX16(follow-QCONST16(1.f, DB_SHIFT), bandLogE[i]-offset); + if (C==2) + follow = MAX16(follow, bandLogE[i+nbEBands]-offset); + frame_avg += follow; + } + frame_avg /= (st->end-st->start); + temporal_vbr = SUB16(frame_avg,st->spec_avg); + temporal_vbr = MIN16(QCONST16(3.f, DB_SHIFT), MAX16(-QCONST16(1.5f, DB_SHIFT), temporal_vbr)); + st->spec_avg += MULT16_16_Q15(QCONST16(.02f, 15), temporal_vbr); + } + /*for (i=0;i<21;i++) + printf("%f ", bandLogE[i]); + printf("\n");*/ + + if (!secondMdct) + { + for (i=0;i<C*nbEBands;i++) + bandLogE2[i] = bandLogE[i]; + } + + /* Last chance to catch any transient we might have missed in the + time-domain analysis */ + if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe) + { + if (patch_transient_decision(bandLogE, oldBandE, nbEBands, st->end, C)) + { + isTransient = 1; + shortBlocks = M; + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample); + compute_band_energies(mode, freq, bandE, effEnd, C, M); + amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); + /* Compensate for the scaling of short vs long mdcts */ + for (i=0;i<C*nbEBands;i++) + bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); + tf_estimate = QCONST16(.2f,14); + } + } + + if (LM>0 && ec_tell(enc)+3<=total_bits) + ec_enc_bit_logp(enc, isTransient, 3); + + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ + + /* Band normalisation */ + normalise_bands(mode, freq, X, bandE, effEnd, C, M); + + ALLOC(tf_res, nbEBands, int); + /* Disable variable tf resolution for hybrid and at very low bitrate */ + if (effectiveBytes>=15*C && st->start==0 && st->complexity>=2 && !st->lfe) + { + int lambda; + if (effectiveBytes<40) + lambda = 12; + else if (effectiveBytes<60) + lambda = 6; + else if (effectiveBytes<100) + lambda = 4; + else + lambda = 3; + lambda*=2; + tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan); + for (i=effEnd;i<st->end;i++) + tf_res[i] = tf_res[effEnd-1]; + } else { + tf_sum = 0; + for (i=0;i<st->end;i++) + tf_res[i] = isTransient; + tf_select=0; + } + + ALLOC(error, C*nbEBands, opus_val16); + quant_coarse_energy(mode, st->start, st->end, effEnd, bandLogE, + oldBandE, total_bits, error, enc, + C, LM, nbAvailableBytes, st->force_intra, + &st->delayedIntra, st->complexity >= 4, st->loss_rate, st->lfe); + + tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc); + + if (ec_tell(enc)+4<=total_bits) + { + if (st->lfe) + { + st->tapset_decision = 0; + st->spread_decision = SPREAD_NORMAL; + } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || st->start != 0) + { + if (st->complexity == 0) + st->spread_decision = SPREAD_NONE; + else + st->spread_decision = SPREAD_NORMAL; + } else { + /* Disable new spreading+tapset estimator until we can show it works + better than the old one. So far it seems like spreading_decision() + works best. */ +#if 0 + if (st->analysis.valid) + { + static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)}; + static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)}; + static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)}; + static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)}; + st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision); + st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision); + } else +#endif + { + st->spread_decision = spreading_decision(mode, X, + &st->tonal_average, st->spread_decision, &st->hf_average, + &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); + } + /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/ + /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/ + } + ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); + } + + ALLOC(offsets, nbEBands, int); + + maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets, + st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, + eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); + /* For LFE, everything interesting is in the first band */ + if (st->lfe) + offsets[0] = IMIN(8, effectiveBytes/3); + ALLOC(cap, nbEBands, int); + init_caps(mode,cap,LM,C); + + dynalloc_logp = 6; + total_bits<<=BITRES; + total_boost = 0; + tell = ec_tell_frac(enc); + for (i=st->start;i<st->end;i++) + { + int width, quanta; + int dynalloc_loop_logp; + int boost; + int j; + width = C*(eBands[i+1]-eBands[i])<<LM; + /* quanta is 6 bits, but no more than 1 bit/sample + and no less than 1/8 bit/sample */ + quanta = IMIN(width<<BITRES, IMAX(6<<BITRES, width)); + dynalloc_loop_logp = dynalloc_logp; + boost = 0; + for (j = 0; tell+(dynalloc_loop_logp<<BITRES) < total_bits-total_boost + && boost < cap[i]; j++) + { + int flag; + flag = j<offsets[i]; + ec_enc_bit_logp(enc, flag, dynalloc_loop_logp); + tell = ec_tell_frac(enc); + if (!flag) + break; + boost += quanta; + total_boost += quanta; + dynalloc_loop_logp = 1; + } + /* Making dynalloc more likely */ + if (j) + dynalloc_logp = IMAX(2, dynalloc_logp-1); + offsets[i] = boost; + } + + if (C==2) + { + static const opus_val16 intensity_thresholds[21]= + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 off*/ + { 1, 2, 3, 4, 5, 6, 7, 8,16,24,36,44,50,56,62,67,72,79,88,106,134}; + static const opus_val16 intensity_histeresis[21]= + { 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6, 8, 8}; + + /* Always use MS for 2.5 ms frames until we can do a better analysis */ + if (LM!=0) + dual_stereo = stereo_analysis(mode, X, LM, N); + + st->intensity = hysteresis_decision((opus_val16)(equiv_rate/1000), + intensity_thresholds, intensity_histeresis, 21, st->intensity); + st->intensity = IMIN(st->end,IMAX(st->start, st->intensity)); + } + + alloc_trim = 5; + if (tell+(6<<BITRES) <= total_bits - total_boost) + { + if (st->lfe) + alloc_trim = 5; + else + alloc_trim = alloc_trim_analysis(mode, X, bandLogE, + st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim); + ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); + tell = ec_tell_frac(enc); + } + + /* Variable bitrate */ + if (vbr_rate>0) + { + opus_val16 alpha; + opus_int32 delta; + /* The target rate in 8th bits per frame */ + opus_int32 target, base_target; + opus_int32 min_allowed; + int lm_diff = mode->maxLM - LM; + + /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms. + The CELT allocator will just not be able to use more than that anyway. */ + nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM)); + base_target = vbr_rate - ((40*C+20)<<BITRES); + + if (st->constrained_vbr) + base_target += (st->vbr_offset>>lm_diff); + + target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, + st->lastCodedBands, C, st->intensity, st->constrained_vbr, + st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth, + st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking, + temporal_vbr); + + /* The current offset is removed from the target and the space used + so far is added*/ + target=target+tell; + /* In VBR mode the frame size must not be reduced so much that it would + result in the encoder running out of bits. + The margin of 2 bytes ensures that none of the bust-prevention logic + in the decoder will have triggered so far. */ + min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes; + + nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3); + nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes); + nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes; + + /* By how much did we "miss" the target on that frame */ + delta = target - vbr_rate; + + target=nbAvailableBytes<<(BITRES+3); + + /*If the frame is silent we don't adjust our drift, otherwise + the encoder will shoot to very high rates after hitting a + span of silence, but we do allow the bitres to refill. + This means that we'll undershoot our target in CVBR/VBR modes + on files with lots of silence. */ + if(silence) + { + nbAvailableBytes = 2; + target = 2*8<<BITRES; + delta = 0; + } + + if (st->vbr_count < 970) + { + st->vbr_count++; + alpha = celt_rcp(SHL32(EXTEND32(st->vbr_count+20),16)); + } else + alpha = QCONST16(.001f,15); + /* How many bits have we used in excess of what we're allowed */ + if (st->constrained_vbr) + st->vbr_reservoir += target - vbr_rate; + /*printf ("%d\n", st->vbr_reservoir);*/ + + /* Compute the offset we need to apply in order to reach the target */ + if (st->constrained_vbr) + { + st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift); + st->vbr_offset = -st->vbr_drift; + } + /*printf ("%d\n", st->vbr_drift);*/ + + if (st->constrained_vbr && st->vbr_reservoir < 0) + { + /* We're under the min value -- increase rate */ + int adjust = (-st->vbr_reservoir)/(8<<BITRES); + /* Unless we're just coding silence */ + nbAvailableBytes += silence?0:adjust; + st->vbr_reservoir = 0; + /*printf ("+%d\n", adjust);*/ + } + nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes); + /*printf("%d\n", nbCompressedBytes*50*8);*/ + /* This moves the raw bits to take into account the new compressed size */ + ec_enc_shrink(enc, nbCompressedBytes); + } + + /* Bit allocation */ + ALLOC(fine_quant, nbEBands, int); + ALLOC(pulses, nbEBands, int); + ALLOC(fine_priority, nbEBands, int); + + /* bits = packet size - where we are - safety*/ + bits = (((opus_int32)nbCompressedBytes*8)<<BITRES) - ec_tell_frac(enc) - 1; + anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; + bits -= anti_collapse_rsv; + signalBandwidth = st->end-1; +#ifndef DISABLE_FLOAT_API + if (st->analysis.valid) + { + int min_bandwidth; + if (equiv_rate < (opus_int32)32000*C) + min_bandwidth = 13; + else if (equiv_rate < (opus_int32)48000*C) + min_bandwidth = 16; + else if (equiv_rate < (opus_int32)60000*C) + min_bandwidth = 18; + else if (equiv_rate < (opus_int32)80000*C) + min_bandwidth = 19; + else + min_bandwidth = 20; + signalBandwidth = IMAX(st->analysis.bandwidth, min_bandwidth); + } +#endif + if (st->lfe) + signalBandwidth = 1; + codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, + alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, + fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); + if (st->lastCodedBands) + st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands)); + else + st->lastCodedBands = codedBands; + + quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C); + + /* Residual quantisation */ + ALLOC(collapse_masks, C*nbEBands, unsigned char); + quant_all_bands(1, mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, + bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res, + nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng); + + if (anti_collapse_rsv > 0) + { + anti_collapse_on = st->consec_transient<2; +#ifdef FUZZING + anti_collapse_on = rand()&0x1; +#endif + ec_enc_bits(enc, anti_collapse_on, 1); + } + quant_energy_finalise(mode, st->start, st->end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); + + if (silence) + { + for (i=0;i<C*nbEBands;i++) + oldBandE[i] = -QCONST16(28.f,DB_SHIFT); + } + +#ifdef RESYNTH + /* Re-synthesis of the coded audio if required */ + { + celt_sig *out_mem[2]; + + if (anti_collapse_on) + { + anti_collapse(mode, X, collapse_masks, LM, C, N, + st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); + } + + if (silence) + { + for (i=0;i<C*N;i++) + freq[i] = 0; + } else { + /* Synthesis */ + denormalise_bands(mode, X, freq, oldBandE, st->start, effEnd, C, M); + } + + c=0; do { + OPUS_MOVE(st->syn_mem[c], st->syn_mem[c]+N, 2*MAX_PERIOD-N+overlap/2); + } while (++c<CC); + + if (CC==2&&C==1) + { + for (i=0;i<N;i++) + freq[N+i] = freq[i]; + } + + c=0; do { + out_mem[c] = st->syn_mem[c]+2*MAX_PERIOD-N; + } while (++c<CC); + + compute_inv_mdcts(mode, shortBlocks, freq, out_mem, CC, LM); + + c=0; do { + st->prefilter_period=IMAX(st->prefilter_period, COMBFILTER_MINPERIOD); + st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD); + comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize, + st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset, + mode->window, st->overlap); + if (LM!=0) + comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize, + st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset, + mode->window, overlap); + } while (++c<CC); + + /* We reuse freq[] as scratch space for the de-emphasis */ + deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, freq); + st->prefilter_period_old = st->prefilter_period; + st->prefilter_gain_old = st->prefilter_gain; + st->prefilter_tapset_old = st->prefilter_tapset; + } +#endif + + st->prefilter_period = pitch_index; + st->prefilter_gain = gain1; + st->prefilter_tapset = prefilter_tapset; +#ifdef RESYNTH + if (LM!=0) + { + st->prefilter_period_old = st->prefilter_period; + st->prefilter_gain_old = st->prefilter_gain; + st->prefilter_tapset_old = st->prefilter_tapset; + } +#endif + + if (CC==2&&C==1) { + for (i=0;i<nbEBands;i++) + oldBandE[nbEBands+i]=oldBandE[i]; + } + + if (!isTransient) + { + for (i=0;i<CC*nbEBands;i++) + oldLogE2[i] = oldLogE[i]; + for (i=0;i<CC*nbEBands;i++) + oldLogE[i] = oldBandE[i]; + } else { + for (i=0;i<CC*nbEBands;i++) + oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]); + } + /* In case start or end were to change */ + c=0; do + { + for (i=0;i<st->start;i++) + { + oldBandE[c*nbEBands+i]=0; + oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); + } + for (i=st->end;i<nbEBands;i++) + { + oldBandE[c*nbEBands+i]=0; + oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT); + } + } while (++c<CC); + + if (isTransient || transient_got_disabled) + st->consec_transient++; + else + st->consec_transient=0; + st->rng = enc->rng; + + /* If there's any room left (can only happen for very high rates), + it's already filled with zeros */ + ec_enc_done(enc); + +#ifdef CUSTOM_MODES + if (st->signalling) + nbCompressedBytes++; +#endif + + RESTORE_STACK; + if (ec_get_error(enc)) + return OPUS_INTERNAL_ERROR; + else + return nbCompressedBytes; +} + + +#ifdef CUSTOM_MODES + +#ifdef OPUS_FIXED_POINT +int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) +{ + return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); +} + +#ifndef DISABLE_FLOAT_API +int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) +{ + int j, ret, C, N; + VARDECL(opus_int16, in); + ALLOC_STACK; + + if (pcm==NULL) + return OPUS_BAD_ARG; + + C = st->channels; + N = frame_size; + ALLOC(in, C*N, opus_int16); + + for (j=0;j<C*N;j++) + in[j] = FLOAT2INT16(pcm[j]); + + ret=celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); +#ifdef RESYNTH + for (j=0;j<C*N;j++) + ((float*)pcm)[j]=in[j]*(1.f/32768.f); +#endif + RESTORE_STACK; + return ret; +} +#endif /* DISABLE_FLOAT_API */ +#else + +int opus_custom_encode(CELTEncoder * OPUS_RESTRICT st, const opus_int16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) +{ + int j, ret, C, N; + VARDECL(celt_sig, in); + ALLOC_STACK; + + if (pcm==NULL) + return OPUS_BAD_ARG; + + C=st->channels; + N=frame_size; + ALLOC(in, C*N, celt_sig); + for (j=0;j<C*N;j++) { + in[j] = SCALEOUT(pcm[j]); + } + + ret = celt_encode_with_ec(st,in,frame_size,compressed,nbCompressedBytes, NULL); +#ifdef RESYNTH + for (j=0;j<C*N;j++) + ((opus_int16*)pcm)[j] = FLOAT2INT16(in[j]); +#endif + RESTORE_STACK; + return ret; +} + +int opus_custom_encode_float(CELTEncoder * OPUS_RESTRICT st, const float * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes) +{ + return celt_encode_with_ec(st, pcm, frame_size, compressed, nbCompressedBytes, NULL); +} + +#endif + +#endif /* CUSTOM_MODES */ + +int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) +{ + va_list ap; + + va_start(ap, request); + switch (request) + { + case OPUS_SET_COMPLEXITY_REQUEST: + { + int value = va_arg(ap, opus_int32); + if (value<0 || value>10) + goto bad_arg; + st->complexity = value; + } + break; + case CELT_SET_START_BAND_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<0 || value>=st->mode->nbEBands) + goto bad_arg; + st->start = value; + } + break; + case CELT_SET_END_BAND_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<1 || value>st->mode->nbEBands) + goto bad_arg; + st->end = value; + } + break; + case CELT_SET_PREDICTION_REQUEST: + { + int value = va_arg(ap, opus_int32); + if (value<0 || value>2) + goto bad_arg; + st->disable_pf = value<=1; + st->force_intra = value==0; + } + break; + case OPUS_SET_PACKET_LOSS_PERC_REQUEST: + { + int value = va_arg(ap, opus_int32); + if (value<0 || value>100) + goto bad_arg; + st->loss_rate = value; + } + break; + case OPUS_SET_VBR_CONSTRAINT_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->constrained_vbr = value; + } + break; + case OPUS_SET_VBR_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->vbr = value; + } + break; + case OPUS_SET_BITRATE_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<=500 && value!=OPUS_BITRATE_MAX) + goto bad_arg; + value = IMIN(value, 260000*st->channels); + st->bitrate = value; + } + break; + case CELT_SET_CHANNELS_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<1 || value>2) + goto bad_arg; + st->stream_channels = value; + } + break; + case OPUS_SET_LSB_DEPTH_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<8 || value>24) + goto bad_arg; + st->lsb_depth=value; + } + break; + case OPUS_GET_LSB_DEPTH_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + *value=st->lsb_depth; + } + break; + case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->variable_duration = value; + } + break; + case OPUS_RESET_STATE: + { + int i; + opus_val16 *oldBandE, *oldLogE, *oldLogE2; + oldBandE = (opus_val16*)(st->in_mem+st->channels*(st->overlap+COMBFILTER_MAXPERIOD)); + oldLogE = oldBandE + st->channels*st->mode->nbEBands; + oldLogE2 = oldLogE + st->channels*st->mode->nbEBands; + OPUS_CLEAR((char*)&st->ENCODER_RESET_START, + opus_custom_encoder_get_size(st->mode, st->channels)- + ((char*)&st->ENCODER_RESET_START - (char*)st)); + for (i=0;i<st->channels*st->mode->nbEBands;i++) + oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); + st->vbr_offset = 0; + st->delayedIntra = 1; + st->spread_decision = SPREAD_NORMAL; + st->tonal_average = 256; + st->hf_average = 0; + st->tapset_decision = 0; + } + break; +#ifdef CUSTOM_MODES + case CELT_SET_INPUT_CLIPPING_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->clip = value; + } + break; +#endif + case CELT_SET_SIGNALLING_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->signalling = value; + } + break; + case CELT_SET_ANALYSIS_REQUEST: + { + AnalysisInfo *info = va_arg(ap, AnalysisInfo *); + if (info) + OPUS_COPY(&st->analysis, info, 1); + } + break; + case CELT_GET_MODE_REQUEST: + { + const CELTMode ** value = va_arg(ap, const CELTMode**); + if (value==0) + goto bad_arg; + *value=st->mode; + } + break; + case OPUS_GET_FINAL_RANGE_REQUEST: + { + opus_uint32 * value = va_arg(ap, opus_uint32 *); + if (value==0) + goto bad_arg; + *value=st->rng; + } + break; + case OPUS_SET_LFE_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->lfe = value; + } + break; + case OPUS_SET_ENERGY_MASK_REQUEST: + { + opus_val16 *value = va_arg(ap, opus_val16*); + st->energy_mask = value; + } + break; + default: + goto bad_request; + } + va_end(ap); + return OPUS_OK; +bad_arg: + va_end(ap); + return OPUS_BAD_ARG; +bad_request: + va_end(ap); + return OPUS_UNIMPLEMENTED; +} diff --git a/drivers/opus/celt/celt_lpc.c b/drivers/opus/celt/celt_lpc.c new file mode 100644 index 0000000000..1fa4406bc9 --- /dev/null +++ b/drivers/opus/celt/celt_lpc.c @@ -0,0 +1,309 @@ +/* Copyright (c) 2009-2010 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "celt_lpc.h" +#include "stack_alloc.h" +#include "mathops.h" +#include "pitch.h" + +void _celt_lpc( + opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ +const opus_val32 *ac, /* in: [0...p] autocorrelation values */ +int p +) +{ + int i, j; + opus_val32 r; + opus_val32 error = ac[0]; +#ifdef OPUS_FIXED_POINT + opus_val32 lpc[LPC_ORDER]; +#else + float *lpc = _lpc; +#endif + + for (i = 0; i < p; i++) + lpc[i] = 0; + if (ac[0] != 0) + { + for (i = 0; i < p; i++) { + /* Sum up this iteration's reflection coefficient */ + opus_val32 rr = 0; + for (j = 0; j < i; j++) + rr += MULT32_32_Q31(lpc[j],ac[i - j]); + rr += SHR32(ac[i + 1],3); + r = -frac_div32(SHL32(rr,3), error); + /* Update LPC coefficients and total error */ + lpc[i] = SHR32(r,3); + for (j = 0; j < (i+1)>>1; j++) + { + opus_val32 tmp1, tmp2; + tmp1 = lpc[j]; + tmp2 = lpc[i-1-j]; + lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2); + lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1); + } + + error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); + /* Bail out once we get 30 dB gain */ +#ifdef OPUS_FIXED_POINT + if (error<SHR32(ac[0],10)) + break; +#else + if (error<.001f*ac[0]) + break; +#endif + } + } +#ifdef OPUS_FIXED_POINT + for (i=0;i<p;i++) + _lpc[i] = ROUND16(lpc[i],16); +#endif +} + +void celt_fir(const opus_val16 *_x, + const opus_val16 *num, + opus_val16 *_y, + int N, + int ord, + opus_val16 *mem) +{ + int i,j; + VARDECL(opus_val16, rnum); + VARDECL(opus_val16, x); + SAVE_STACK; + + ALLOC(rnum, ord, opus_val16); + ALLOC(x, N+ord, opus_val16); + for(i=0;i<ord;i++) + rnum[i] = num[ord-i-1]; + for(i=0;i<ord;i++) + x[i] = mem[ord-i-1]; + for (i=0;i<N;i++) + x[i+ord]=_x[i]; + for(i=0;i<ord;i++) + mem[i] = _x[N-i-1]; +#ifdef SMALL_FOOTPRINT + for (i=0;i<N;i++) + { + opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); + for (j=0;j<ord;j++) + { + sum = MAC16_16(sum,rnum[j],x[i+j]); + } + _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); + } +#else + for (i=0;i<N-3;i+=4) + { + opus_val32 sum[4]={0,0,0,0}; + xcorr_kernel(rnum, x+i, sum, ord); + _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); + _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); + _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); + _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT))); + } + for (;i<N;i++) + { + opus_val32 sum = 0; + for (j=0;j<ord;j++) + sum = MAC16_16(sum,rnum[j],x[i+j]); + _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); + } +#endif + RESTORE_STACK; +} + +void celt_iir(const opus_val32 *_x, + const opus_val16 *den, + opus_val32 *_y, + int N, + int ord, + opus_val16 *mem) +{ +#ifdef SMALL_FOOTPRINT + int i,j; + for (i=0;i<N;i++) + { + opus_val32 sum = _x[i]; + for (j=0;j<ord;j++) + { + sum -= MULT16_16(den[j],mem[j]); + } + for (j=ord-1;j>=1;j--) + { + mem[j]=mem[j-1]; + } + mem[0] = ROUND16(sum,SIG_SHIFT); + _y[i] = sum; + } +#else + int i,j; + VARDECL(opus_val16, rden); + VARDECL(opus_val16, y); + SAVE_STACK; + + celt_assert((ord&3)==0); + ALLOC(rden, ord, opus_val16); + ALLOC(y, N+ord, opus_val16); + for(i=0;i<ord;i++) + rden[i] = den[ord-i-1]; + for(i=0;i<ord;i++) + y[i] = -mem[ord-i-1]; + for(;i<N+ord;i++) + y[i]=0; + for (i=0;i<N-3;i+=4) + { + /* Unroll by 4 as if it were an FIR filter */ + opus_val32 sum[4]; + sum[0]=_x[i]; + sum[1]=_x[i+1]; + sum[2]=_x[i+2]; + sum[3]=_x[i+3]; + xcorr_kernel(rden, y+i, sum, ord); + + /* Patch up the result to compensate for the fact that this is an IIR */ + y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT); + _y[i ] = sum[0]; + sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); + y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT); + _y[i+1] = sum[1]; + sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); + sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); + y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT); + _y[i+2] = sum[2]; + + sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); + sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); + sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); + y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT); + _y[i+3] = sum[3]; + } + for (;i<N;i++) + { + opus_val32 sum = _x[i]; + for (j=0;j<ord;j++) + sum -= MULT16_16(rden[j],y[i+j]); + y[i+ord] = ROUND16(sum,SIG_SHIFT); + _y[i] = sum; + } + for(i=0;i<ord;i++) + mem[i] = _y[N-i-1]; + RESTORE_STACK; +#endif +} + +int _celt_autocorr( + const opus_val16 *x, /* in: [0...n-1] samples x */ + opus_val32 *ac, /* out: [0...lag-1] ac values */ + const opus_val16 *window, + int overlap, + int lag, + int n, + int arch + ) +{ + opus_val32 d; + int i, k; + int fastN=n-lag; + int shift; + const opus_val16 *xptr; + VARDECL(opus_val16, xx); + SAVE_STACK; + ALLOC(xx, n, opus_val16); + celt_assert(n>0); + celt_assert(overlap>=0); + if (overlap == 0) + { + xptr = x; + } else { + for (i=0;i<n;i++) + xx[i] = x[i]; + for (i=0;i<overlap;i++) + { + xx[i] = MULT16_16_Q15(x[i],window[i]); + xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]); + } + xptr = xx; + } + shift=0; +#ifdef OPUS_FIXED_POINT + { + opus_val32 ac0; + ac0 = 1+(n<<7); + if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9); + for(i=(n&1);i<n;i+=2) + { + ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9); + ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9); + } + + shift = celt_ilog2(ac0)-30+10; + shift = (shift)/2; + if (shift>0) + { + for(i=0;i<n;i++) + xx[i] = PSHR32(xptr[i], shift); + xptr = xx; + } else + shift = 0; + } +#endif + celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); + for (k=0;k<=lag;k++) + { + for (i = k+fastN, d = 0; i < n; i++) + d = MAC16_16(d, xptr[i], xptr[i-k]); + ac[k] += d; + } +#ifdef OPUS_FIXED_POINT + shift = 2*shift; + if (shift<=0) + ac[0] += SHL32((opus_int32)1, -shift); + if (ac[0] < 268435456) + { + int shift2 = 29 - EC_ILOG(ac[0]); + for (i=0;i<=lag;i++) + ac[i] = SHL32(ac[i], shift2); + shift -= shift2; + } else if (ac[0] >= 536870912) + { + int shift2=1; + if (ac[0] >= 1073741824) + shift2++; + for (i=0;i<=lag;i++) + ac[i] = SHR32(ac[i], shift2); + shift += shift2; + } +#endif + + RESTORE_STACK; + return shift; +} diff --git a/drivers/opus/celt/celt_lpc.h b/drivers/opus/celt/celt_lpc.h new file mode 100644 index 0000000000..dc2a0a3d26 --- /dev/null +++ b/drivers/opus/celt/celt_lpc.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2009-2010 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef PLC_H +#define PLC_H + +#include "arch.h" + +#define LPC_ORDER 24 + +void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p); + +void celt_fir(const opus_val16 *x, + const opus_val16 *num, + opus_val16 *y, + int N, + int ord, + opus_val16 *mem); + +void celt_iir(const opus_val32 *x, + const opus_val16 *den, + opus_val32 *y, + int N, + int ord, + opus_val16 *mem); + +int _celt_autocorr(const opus_val16 *x, opus_val32 *ac, + const opus_val16 *window, int overlap, int lag, int n, int arch); + +#endif /* PLC_H */ diff --git a/drivers/opus/celt/cpu_support.h b/drivers/opus/celt/cpu_support.h new file mode 100644 index 0000000000..d68dbe62c5 --- /dev/null +++ b/drivers/opus/celt/cpu_support.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2010 Xiph.Org Foundation + * Copyright (c) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef CPU_SUPPORT_H +#define CPU_SUPPORT_H + +#include "opus_types.h" +#include "opus_defines.h" + +#if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM) +#include "arm/armcpu.h" + +/* We currently support 4 ARM variants: + * arch[0] -> ARMv4 + * arch[1] -> ARMv5E + * arch[2] -> ARMv6 + * arch[3] -> NEON + */ +#define OPUS_ARCHMASK 3 + +#else +#define OPUS_ARCHMASK 0 + +static OPUS_INLINE int opus_select_arch(void) +{ + return 0; +} +#endif + +#endif diff --git a/drivers/opus/celt/cwrs.c b/drivers/opus/celt/cwrs.c new file mode 100644 index 0000000000..b866aa9210 --- /dev/null +++ b/drivers/opus/celt/cwrs.c @@ -0,0 +1,697 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2007-2009 Timothy B. Terriberry + Written by Timothy B. Terriberry and Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "os_support.h" +#include "cwrs.h" +#include "mathops.h" +#include "arch.h" + +#ifdef CUSTOM_MODES + +/*Guaranteed to return a conservatively large estimate of the binary logarithm + with frac bits of fractional precision. + Tested for all possible 32-bit inputs with frac=4, where the maximum + overestimation is 0.06254243 bits.*/ +int log2_frac(opus_uint32 val, int frac) +{ + int l; + l=EC_ILOG(val); + if(val&(val-1)){ + /*This is (val>>l-16), but guaranteed to round up, even if adding a bias + before the shift would cause overflow (e.g., for 0xFFFFxxxx). + Doesn't work for val=0, but that case fails the test above.*/ + if(l>16)val=((val-1)>>(l-16))+1; + else val<<=16-l; + l=(l-1)<<frac; + /*Note that we always need one iteration, since the rounding up above means + that we might need to adjust the integer part of the logarithm.*/ + do{ + int b; + b=(int)(val>>16); + l+=b<<frac; + val=(val+b)>>b; + val=(val*val+0x7FFF)>>15; + } + while(frac-->0); + /*If val is not exactly 0x8000, then we have to round up the remainder.*/ + return l+(val>0x8000); + } + /*Exact powers of two require no rounding.*/ + else return (l-1)<<frac; +} +#endif + +/*Although derived separately, the pulse vector coding scheme is equivalent to + a Pyramid Vector Quantizer \cite{Fis86}. + Some additional notes about an early version appear at + http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering + and the definitions of some terms have evolved since that was written. + + The conversion from a pulse vector to an integer index (encoding) and back + (decoding) is governed by two related functions, V(N,K) and U(N,K). + + V(N,K) = the number of combinations, with replacement, of N items, taken K + at a time, when a sign bit is added to each item taken at least once (i.e., + the number of N-dimensional unit pulse vectors with K pulses). + One way to compute this is via + V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1, + where choose() is the binomial function. + A table of values for N<10 and K<10 looks like: + V[10][10] = { + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {1, 2, 2, 2, 2, 2, 2, 2, 2, 2}, + {1, 4, 8, 12, 16, 20, 24, 28, 32, 36}, + {1, 6, 18, 38, 66, 102, 146, 198, 258, 326}, + {1, 8, 32, 88, 192, 360, 608, 952, 1408, 1992}, + {1, 10, 50, 170, 450, 1002, 1970, 3530, 5890, 9290}, + {1, 12, 72, 292, 912, 2364, 5336, 10836, 20256, 35436}, + {1, 14, 98, 462, 1666, 4942, 12642, 28814, 59906, 115598}, + {1, 16, 128, 688, 2816, 9424, 27008, 68464, 157184, 332688}, + {1, 18, 162, 978, 4482, 16722, 53154, 148626, 374274, 864146} + }; + + U(N,K) = the number of such combinations wherein N-1 objects are taken at + most K-1 at a time. + This is given by + U(N,K) = sum(k=0...K-1,V(N-1,k)) + = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0. + The latter expression also makes clear that U(N,K) is half the number of such + combinations wherein the first object is taken at least once. + Although it may not be clear from either of these definitions, U(N,K) is the + natural function to work with when enumerating the pulse vector codebooks, + not V(N,K). + U(N,K) is not well-defined for N=0, but with the extension + U(0,K) = K>0 ? 0 : 1, + the function becomes symmetric: U(N,K) = U(K,N), with a similar table: + U[10][10] = { + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 1, 3, 5, 7, 9, 11, 13, 15, 17}, + {0, 1, 5, 13, 25, 41, 61, 85, 113, 145}, + {0, 1, 7, 25, 63, 129, 231, 377, 575, 833}, + {0, 1, 9, 41, 129, 321, 681, 1289, 2241, 3649}, + {0, 1, 11, 61, 231, 681, 1683, 3653, 7183, 13073}, + {0, 1, 13, 85, 377, 1289, 3653, 8989, 19825, 40081}, + {0, 1, 15, 113, 575, 2241, 7183, 19825, 48639, 108545}, + {0, 1, 17, 145, 833, 3649, 13073, 40081, 108545, 265729} + }; + + With this extension, V(N,K) may be written in terms of U(N,K): + V(N,K) = U(N,K) + U(N,K+1) + for all N>=0, K>=0. + Thus U(N,K+1) represents the number of combinations where the first element + is positive or zero, and U(N,K) represents the number of combinations where + it is negative. + With a large enough table of U(N,K) values, we could write O(N) encoding + and O(min(N*log(K),N+K)) decoding routines, but such a table would be + prohibitively large for small embedded devices (K may be as large as 32767 + for small N, and N may be as large as 200). + + Both functions obey the same recurrence relation: + V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1), + U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1), + for all N>0, K>0, with different initial conditions at N=0 or K=0. + This allows us to construct a row of one of the tables above given the + previous row or the next row. + Thus we can derive O(NK) encoding and decoding routines with O(K) memory + using only addition and subtraction. + + When encoding, we build up from the U(2,K) row and work our way forwards. + When decoding, we need to start at the U(N,K) row and work our way backwards, + which requires a means of computing U(N,K). + U(N,K) may be computed from two previous values with the same N: + U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2) + for all N>1, and since U(N,K) is symmetric, a similar relation holds for two + previous values with the same K: + U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K) + for all K>1. + This allows us to construct an arbitrary row of the U(N,K) table by starting + with the first two values, which are constants. + This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K) + multiplications. + Similar relations can be derived for V(N,K), but are not used here. + + For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree + polynomial for fixed N. + The first few are + U(1,K) = 1, + U(2,K) = 2*K-1, + U(3,K) = (2*K-2)*K+1, + U(4,K) = (((4*K-6)*K+8)*K-3)/3, + U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3, + and + V(1,K) = 2, + V(2,K) = 4*K, + V(3,K) = 4*K*K+2, + V(4,K) = 8*(K*K+2)*K/3, + V(5,K) = ((4*K*K+20)*K*K+6)/3, + for all K>0. + This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for + small N (and indeed decoding is also O(N) for N<3). + + @ARTICLE{Fis86, + author="Thomas R. Fischer", + title="A Pyramid Vector Quantizer", + journal="IEEE Transactions on Information Theory", + volume="IT-32", + number=4, + pages="568--583", + month=Jul, + year=1986 + }*/ + +#if !defined(SMALL_FOOTPRINT) + +/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/ +# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)]) +/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N + with K pulses allocated to it.*/ +# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1)) + +/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)). + Thus, the number of entries in row I is the larger of the maximum number of + pulses we will ever allocate for a given N=I (K=128, or however many fit in + 32 bits, whichever is smaller), plus one, and the maximum N for which + K=I-1 pulses fit in 32 bits. + The largest band size in an Opus Custom mode is 208. + Otherwise, we can limit things to the set of N which can be achieved by + splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48, + 44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/ +#if defined(CUSTOM_MODES) +static const opus_uint32 CELT_PVQ_U_DATA[1488]={ +#else +static const opus_uint32 CELT_PVQ_U_DATA[1272]={ +#endif + /*N=0, K=0...176:*/ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +#if defined(CUSTOM_MODES) + /*...208:*/ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, +#endif + /*N=1, K=1...176:*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +#if defined(CUSTOM_MODES) + /*...208:*/ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +#endif + /*N=2, K=2...176:*/ + 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, + 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, + 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, + 115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143, + 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173, + 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203, + 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, + 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263, + 265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293, + 295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323, + 325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351, +#if defined(CUSTOM_MODES) + /*...208:*/ + 353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381, + 383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411, + 413, 415, +#endif + /*N=3, K=3...176:*/ + 13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613, + 685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861, + 1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785, + 3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385, + 6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661, + 9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961, + 13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745, + 17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013, + 21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765, + 26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001, + 31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721, + 37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925, + 43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613, + 50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785, + 57461, 58141, 58825, 59513, 60205, 60901, 61601, +#if defined(CUSTOM_MODES) + /*...208:*/ + 62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565, + 70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013, + 78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113, +#endif + /*N=4, K=4...176:*/ + 63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017, + 7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775, + 30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153, + 82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193, + 161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575, + 267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217, + 410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951, + 597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609, + 833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023, + 1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407, + 1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759, + 1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175, + 2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751, + 2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583, + 3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767, + 3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399, + 4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575, + 5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391, + 6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943, + 7085049, 7207551, +#if defined(CUSTOM_MODES) + /*...208:*/ + 7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783, + 8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967, + 9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199, + 10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177, + 11912575, +#endif + /*N=5, K=5...176:*/ + 321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041, + 50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401, + 330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241, + 1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241, + 2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801, + 4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849, + 8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849, + 13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809, + 20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881, + 29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641, + 40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081, + 55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609, + 73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049, + 95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641, + 122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041, + 155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321, + 193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969, + 238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889, + 290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401, + 351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241, + 420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561, + 500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929, + 590359041, 604167209, 618216201, 632508801, +#if defined(CUSTOM_MODES) + /*...208:*/ + 647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241, + 755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161, + 878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329, + 1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041, + 1143412929, 1166053121, 1189027881, 1212340489, 1235994241, +#endif + /*N=6, K=6...96:*/ + 1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047, + 335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409, + 2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793, + 11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455, + 29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189, + 64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651, + 128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185, + 235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647, + 402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229, + 655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283, + 1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135, + 1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187, + 2011371957, 2120032959, +#if defined(CUSTOM_MODES) + /*...109:*/ + 2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U, + 3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U, + 4012305913U, +#endif + /*N=7, K=7...54*/ + 8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777, + 1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233, + 19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013, + 88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805, + 292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433, + 793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821, + 1667010073, 1870535785, 2094367717, +#if defined(CUSTOM_MODES) + /*...60:*/ + 2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U, +#endif + /*N=8, K=8...37*/ + 48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767, + 9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017, + 104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351, + 638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615, + 2229491905U, +#if defined(CUSTOM_MODES) + /*...40:*/ + 2691463695U, 3233240945U, 3866006015U, +#endif + /*N=9, K=9...28:*/ + 265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777, + 39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145, + 628496897, 872893441, 1196924561, 1621925137, 2173806145U, +#if defined(CUSTOM_MODES) + /*...29:*/ + 2883810113U, +#endif + /*N=10, K=10...24:*/ + 1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073, + 254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U, + 3375210671U, + /*N=11, K=11...19:*/ + 8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585, + 948062325, 1616336765, +#if defined(CUSTOM_MODES) + /*...20:*/ + 2684641785U, +#endif + /*N=12, K=12...18:*/ + 45046719, 103274625, 224298231, 464387817, 921406335, 1759885185, + 3248227095U, + /*N=13, K=13...16:*/ + 251595969, 579168825, 1267854873, 2653649025U, + /*N=14, K=14:*/ + 1409933619 +}; + +#if defined(CUSTOM_MODES) +static const opus_uint32 *const CELT_PVQ_U_ROW[15]={ + CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415, + CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030, + CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389, + CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455, + CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473 +}; +#else +static const opus_uint32 *const CELT_PVQ_U_ROW[15]={ + CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351, + CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870, + CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178, + CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240, + CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257 +}; +#endif + +#if defined(CUSTOM_MODES) +void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){ + int k; + /*_maxk==0 => there's nothing to do.*/ + celt_assert(_maxk>0); + _bits[0]=0; + for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac); +} +#endif + +static opus_uint32 icwrs(int _n,const int *_y){ + opus_uint32 i; + int j; + int k; + celt_assert(_n>=2); + j=_n-1; + i=_y[j]<0; + k=abs(_y[j]); + do{ + j--; + i+=CELT_PVQ_U(_n-j,k); + k+=abs(_y[j]); + if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1); + } + while(j>0); + return i; +} + +void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ + celt_assert(_k>0); + ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k)); +} + +static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){ + opus_uint32 p; + int s; + int k0; + celt_assert(_k>0); + celt_assert(_n>1); + while(_n>2){ + opus_uint32 q; + /*Lots of pulses case:*/ + if(_k>=_n){ + const opus_uint32 *row; + row=CELT_PVQ_U_ROW[_n]; + /*Are the pulses in this dimension negative?*/ + p=row[_k+1]; + s=-(_i>=p); + _i-=p&s; + /*Count how many pulses were placed in this dimension.*/ + k0=_k; + q=row[_n]; + if(q>_i){ + celt_assert(p>q); + _k=_n; + do p=CELT_PVQ_U_ROW[--_k][_n]; + while(p>_i); + } + else for(p=row[_k];p>_i;p=row[_k])_k--; + _i-=p; + *_y++=(k0-_k+s)^s; + } + /*Lots of dimensions case:*/ + else{ + /*Are there any pulses in this dimension at all?*/ + p=CELT_PVQ_U_ROW[_k][_n]; + q=CELT_PVQ_U_ROW[_k+1][_n]; + if(p<=_i&&_i<q){ + _i-=p; + *_y++=0; + } + else{ + /*Are the pulses in this dimension negative?*/ + s=-(_i>=q); + _i-=q&s; + /*Count how many pulses were placed in this dimension.*/ + k0=_k; + do p=CELT_PVQ_U_ROW[--_k][_n]; + while(p>_i); + _i-=p; + *_y++=(k0-_k+s)^s; + } + } + _n--; + } + /*_n==2*/ + p=2*_k+1; + s=-(_i>=p); + _i-=p&s; + k0=_k; + _k=(_i+1)>>1; + if(_k)_i-=2*_k-1; + *_y++=(k0-_k+s)^s; + /*_n==1*/ + s=-(int)_i; + *_y=(_k+s)^s; +} + +void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ + cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y); +} + +#else /* SMALL_FOOTPRINT */ + +/*Computes the next row/column of any recurrence that obeys the relation + u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1]. + _ui0 is the base case for the new row/column.*/ +static OPUS_INLINE void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){ + opus_uint32 ui1; + unsigned j; + /*This do-while will overrun the array if we don't have storage for at least + 2 values.*/ + j=1; do { + ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0); + _ui[j-1]=_ui0; + _ui0=ui1; + } while (++j<_len); + _ui[j-1]=_ui0; +} + +/*Computes the previous row/column of any recurrence that obeys the relation + u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1]. + _ui0 is the base case for the new row/column.*/ +static OPUS_INLINE void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){ + opus_uint32 ui1; + unsigned j; + /*This do-while will overrun the array if we don't have storage for at least + 2 values.*/ + j=1; do { + ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0); + _ui[j-1]=_ui0; + _ui0=ui1; + } while (++j<_n); + _ui[j-1]=_ui0; +} + +/*Compute V(_n,_k), as well as U(_n,0..._k+1). + _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/ +static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){ + opus_uint32 um2; + unsigned len; + unsigned k; + len=_k+2; + /*We require storage at least 3 values (e.g., _k>0).*/ + celt_assert(len>=3); + _u[0]=0; + _u[1]=um2=1; + /*If _n==0, _u[0] should be 1 and the rest should be 0.*/ + /*If _n==1, _u[i] should be 1 for i>1.*/ + celt_assert(_n>=2); + /*If _k==0, the following do-while loop will overflow the buffer.*/ + celt_assert(_k>0); + k=2; + do _u[k]=(k<<1)-1; + while(++k<len); + for(k=2;k<_n;k++)unext(_u+1,_k+1,1); + return _u[_k]+_u[_k+1]; +} + +/*Returns the _i'th combination of _k elements chosen from a set of size _n + with associated sign bits. + _y: Returns the vector of pulses. + _u: Must contain entries [0..._k+1] of row _n of U() on input. + Its contents will be destructively modified.*/ +static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){ + int j; + celt_assert(_n>0); + j=0; + do{ + opus_uint32 p; + int s; + int yj; + p=_u[_k+1]; + s=-(_i>=p); + _i-=p&s; + yj=_k; + p=_u[_k]; + while(p>_i)p=_u[--_k]; + _i-=p; + yj-=_k; + _y[j]=(yj+s)^s; + uprev(_u,_k+2,0); + } + while(++j<_n); +} + +/*Returns the index of the given combination of K elements chosen from a set + of size 1 with associated sign bits. + _y: The vector of pulses, whose sum of absolute values is K. + _k: Returns K.*/ +static OPUS_INLINE opus_uint32 icwrs1(const int *_y,int *_k){ + *_k=abs(_y[0]); + return _y[0]<0; +} + +/*Returns the index of the given combination of K elements chosen from a set + of size _n with associated sign bits. + _y: The vector of pulses, whose sum of absolute values must be _k. + _nc: Returns V(_n,_k).*/ +static OPUS_INLINE opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y, + opus_uint32 *_u){ + opus_uint32 i; + int j; + int k; + /*We can't unroll the first two iterations of the loop unless _n>=2.*/ + celt_assert(_n>=2); + _u[0]=0; + for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1; + i=icwrs1(_y+_n-1,&k); + j=_n-2; + i+=_u[k]; + k+=abs(_y[j]); + if(_y[j]<0)i+=_u[k+1]; + while(j-->0){ + unext(_u,_k+2,0); + i+=_u[k]; + k+=abs(_y[j]); + if(_y[j]<0)i+=_u[k+1]; + } + *_nc=_u[k]+_u[k+1]; + return i; +} + +#ifdef CUSTOM_MODES +void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){ + int k; + /*_maxk==0 => there's nothing to do.*/ + celt_assert(_maxk>0); + _bits[0]=0; + if (_n==1) + { + for (k=1;k<=_maxk;k++) + _bits[k] = 1<<_frac; + } + else { + VARDECL(opus_uint32,u); + SAVE_STACK; + ALLOC(u,_maxk+2U,opus_uint32); + ncwrs_urow(_n,_maxk,u); + for(k=1;k<=_maxk;k++) + _bits[k]=log2_frac(u[k]+u[k+1],_frac); + RESTORE_STACK; + } +} +#endif /* CUSTOM_MODES */ + +void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){ + opus_uint32 i; + VARDECL(opus_uint32,u); + opus_uint32 nc; + SAVE_STACK; + celt_assert(_k>0); + ALLOC(u,_k+2U,opus_uint32); + i=icwrs(_n,_k,&nc,_y,u); + ec_enc_uint(_enc,i,nc); + RESTORE_STACK; +} + +void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){ + VARDECL(opus_uint32,u); + SAVE_STACK; + celt_assert(_k>0); + ALLOC(u,_k+2U,opus_uint32); + cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u); + RESTORE_STACK; +} + +#endif /* SMALL_FOOTPRINT */ diff --git a/drivers/opus/celt/cwrs.h b/drivers/opus/celt/cwrs.h new file mode 100644 index 0000000000..7dfbd076d1 --- /dev/null +++ b/drivers/opus/celt/cwrs.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2007-2009 Timothy B. Terriberry + Written by Timothy B. Terriberry and Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef CWRS_H +#define CWRS_H + +#include "arch.h" +#include "stack_alloc.h" +#include "entenc.h" +#include "entdec.h" + +#ifdef CUSTOM_MODES +int log2_frac(opus_uint32 val, int frac); +#endif + +void get_required_bits(opus_int16 *bits, int N, int K, int frac); + +void encode_pulses(const int *_y, int N, int K, ec_enc *enc); + +void decode_pulses(int *_y, int N, int K, ec_dec *dec); + +#endif /* CWRS_H */ diff --git a/drivers/opus/celt/ecintrin.h b/drivers/opus/celt/ecintrin.h new file mode 100644 index 0000000000..2263cff6bd --- /dev/null +++ b/drivers/opus/celt/ecintrin.h @@ -0,0 +1,87 @@ +/* Copyright (c) 2003-2008 Timothy B. Terriberry + Copyright (c) 2008 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/*Some common macros for potential platform-specific optimization.*/ +#include "opus_types.h" +#include <math.h> +#include <limits.h> +#include "arch.h" +#if !defined(_ecintrin_H) +# define _ecintrin_H (1) + +/*Some specific platforms may have optimized intrinsic or OPUS_INLINE assembly + versions of these functions which can substantially improve performance. + We define macros for them to allow easy incorporation of these non-ANSI + features.*/ + +/*Modern gcc (4.x) can compile the naive versions of min and max with cmov if + given an appropriate architecture, but the branchless bit-twiddling versions + are just as fast, and do not require any special target architecture. + Earlier gcc versions (3.x) compiled both code to the same assembly + instructions, because of the way they represented ((_b)>(_a)) internally.*/ +# define EC_MINI(_a,_b) ((_a)+(((_b)-(_a))&-((_b)<(_a)))) + +/*Count leading zeros. + This macro should only be used for implementing ec_ilog(), if it is defined. + All other code should use EC_ILOG() instead.*/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) +# include <intrin.h> +/*In _DEBUG mode this is not an intrinsic by default.*/ +# pragma intrinsic(_BitScanReverse) + +static __inline int ec_bsr(unsigned long _x){ + unsigned long ret; + _BitScanReverse(&ret,_x); + return (int)ret; +} +# define EC_CLZ0 (1) +# define EC_CLZ(_x) (-ec_bsr(_x)) +#elif defined(ENABLE_TI_DSPLIB) +# include "dsplib.h" +# define EC_CLZ0 (31) +# define EC_CLZ(_x) (_lnorm(_x)) +#elif __GNUC_PREREQ(3,4) +# if INT_MAX>=2147483647 +# define EC_CLZ0 ((int)sizeof(unsigned)*CHAR_BIT) +# define EC_CLZ(_x) (__builtin_clz(_x)) +# elif LONG_MAX>=2147483647L +# define EC_CLZ0 ((int)sizeof(unsigned long)*CHAR_BIT) +# define EC_CLZ(_x) (__builtin_clzl(_x)) +# endif +#endif + +#if defined(EC_CLZ) +/*Note that __builtin_clz is not defined when _x==0, according to the gcc + documentation (and that of the BSR instruction that implements it on x86). + The majority of the time we can never pass it zero. + When we need to, it can be special cased.*/ +# define EC_ILOG(_x) (EC_CLZ0-EC_CLZ(_x)) +#else +int ec_ilog(opus_uint32 _v); +# define EC_ILOG(_x) (ec_ilog(_x)) +#endif +#endif diff --git a/drivers/opus/celt/entcode.c b/drivers/opus/celt/entcode.c new file mode 100644 index 0000000000..fd817a9db5 --- /dev/null +++ b/drivers/opus/celt/entcode.c @@ -0,0 +1,93 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "entcode.h" +#include "arch.h" + +#if !defined(EC_CLZ) +/*This is a fallback for systems where we don't know how to access + a BSR or CLZ instruction (see ecintrin.h). + If you are optimizing Opus on a new platform and it has a native CLZ or + BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be + an easy performance win.*/ +int ec_ilog(opus_uint32 _v){ + /*On a Pentium M, this branchless version tested as the fastest on + 1,000,000,000 random 32-bit integers, edging out a similar version with + branches, and a 256-entry LUT version.*/ + int ret; + int m; + ret=!!_v; + m=!!(_v&0xFFFF0000)<<4; + _v>>=m; + ret|=m; + m=!!(_v&0xFF00)<<3; + _v>>=m; + ret|=m; + m=!!(_v&0xF0)<<2; + _v>>=m; + ret|=m; + m=!!(_v&0xC)<<1; + _v>>=m; + ret|=m; + ret+=!!(_v&0x2); + return ret; +} +#endif + +opus_uint32 ec_tell_frac(ec_ctx *_this){ + opus_uint32 nbits; + opus_uint32 r; + int l; + int i; + /*To handle the non-integral number of bits still left in the encoder/decoder + state, we compute the worst-case number of bits of val that must be + encoded to ensure that the value is inside the range for any possible + subsequent bits. + The computation here is independent of val itself (the decoder does not + even track that value), even though the real number of bits used after + ec_enc_done() may be 1 smaller if rng is a power of two and the + corresponding trailing bits of val are all zeros. + If we did try to track that special case, then coding a value with a + probability of 1/(1<<n) might sometimes appear to use more than n bits. + This may help explain the surprising result that a newly initialized + encoder or decoder claims to have used 1 bit.*/ + nbits=_this->nbits_total<<BITRES; + l=EC_ILOG(_this->rng); + r=_this->rng>>(l-16); + for(i=BITRES;i-->0;){ + int b; + r=r*r>>15; + b=(int)(r>>16); + l=l<<1|b; + r>>=b; + } + return nbits-l; +} diff --git a/drivers/opus/celt/entcode.h b/drivers/opus/celt/entcode.h new file mode 100644 index 0000000000..dd13e49e50 --- /dev/null +++ b/drivers/opus/celt/entcode.h @@ -0,0 +1,117 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "opus_types.h" +#include "opus_defines.h" + +#if !defined(_entcode_H) +# define _entcode_H (1) +# include <limits.h> +# include <stddef.h> +# include "ecintrin.h" + +/*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a + larger type, you can speed up the decoder by using it here.*/ +typedef opus_uint32 ec_window; +typedef struct ec_ctx ec_ctx; +typedef struct ec_ctx ec_enc; +typedef struct ec_ctx ec_dec; + +# define EC_WINDOW_SIZE ((int)sizeof(ec_window)*CHAR_BIT) + +/*The number of bits to use for the range-coded part of unsigned integers.*/ +# define EC_UINT_BITS (8) + +/*The resolution of fractional-precision bit usage measurements, i.e., + 3 => 1/8th bits.*/ +# define BITRES 3 + +/*The entropy encoder/decoder context. + We use the same structure for both, so that common functions like ec_tell() + can be used on either one.*/ +struct ec_ctx{ + /*Buffered input/output.*/ + unsigned char *buf; + /*The size of the buffer.*/ + opus_uint32 storage; + /*The offset at which the last byte containing raw bits was read/written.*/ + opus_uint32 end_offs; + /*Bits that will be read from/written at the end.*/ + ec_window end_window; + /*Number of valid bits in end_window.*/ + int nend_bits; + /*The total number of whole bits read/written. + This does not include partial bits currently in the range coder.*/ + int nbits_total; + /*The offset at which the next range coder byte will be read/written.*/ + opus_uint32 offs; + /*The number of values in the current range.*/ + opus_uint32 rng; + /*In the decoder: the difference between the top of the current range and + the input value, minus one. + In the encoder: the low end of the current range.*/ + opus_uint32 val; + /*In the decoder: the saved normalization factor from ec_decode(). + In the encoder: the number of oustanding carry propagating symbols.*/ + opus_uint32 ext; + /*A buffered input/output symbol, awaiting carry propagation.*/ + int rem; + /*Nonzero if an error occurred.*/ + int error; +}; + +static OPUS_INLINE opus_uint32 ec_range_bytes(ec_ctx *_this){ + return _this->offs; +} + +static OPUS_INLINE unsigned char *ec_get_buffer(ec_ctx *_this){ + return _this->buf; +} + +static OPUS_INLINE int ec_get_error(ec_ctx *_this){ + return _this->error; +} + +/*Returns the number of bits "used" by the encoded or decoded symbols so far. + This same number can be computed in either the encoder or the decoder, and is + suitable for making coding decisions. + Return: The number of bits. + This will always be slightly larger than the exact value (e.g., all + rounding error is in the positive direction).*/ +static OPUS_INLINE int ec_tell(ec_ctx *_this){ + return _this->nbits_total-EC_ILOG(_this->rng); +} + +/*Returns the number of bits "used" by the encoded or decoded symbols so far. + This same number can be computed in either the encoder or the decoder, and is + suitable for making coding decisions. + Return: The number of bits scaled by 2**BITRES. + This will always be slightly larger than the exact value (e.g., all + rounding error is in the positive direction).*/ +opus_uint32 ec_tell_frac(ec_ctx *_this); + +#endif diff --git a/drivers/opus/celt/entdec.c b/drivers/opus/celt/entdec.c new file mode 100644 index 0000000000..383da571c9 --- /dev/null +++ b/drivers/opus/celt/entdec.c @@ -0,0 +1,245 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include <stddef.h> +#include "os_support.h" +#include "arch.h" +#include "entdec.h" +#include "mfrngcod.h" + +/*A range decoder. + This is an entropy decoder based upon \cite{Mar79}, which is itself a + rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}. + It is very similar to arithmetic encoding, except that encoding is done with + digits in any base, instead of with bits, and so it is faster when using + larger bases (i.e.: a byte). + The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$ + is the base, longer than the theoretical optimum, but to my knowledge there + is no published justification for this claim. + This only seems true when using near-infinite precision arithmetic so that + the process is carried out with no rounding errors. + + An excellent description of implementation details is available at + http://www.arturocampos.com/ac_range.html + A recent work \cite{MNW98} which proposes several changes to arithmetic + encoding for efficiency actually re-discovers many of the principles + behind range encoding, and presents a good theoretical analysis of them. + + End of stream is handled by writing out the smallest number of bits that + ensures that the stream will be correctly decoded regardless of the value of + any subsequent bits. + ec_tell() can be used to determine how many bits were needed to decode + all the symbols thus far; other data can be packed in the remaining bits of + the input buffer. + @PHDTHESIS{Pas76, + author="Richard Clark Pasco", + title="Source coding algorithms for fast data compression", + school="Dept. of Electrical Engineering, Stanford University", + address="Stanford, CA", + month=May, + year=1976 + } + @INPROCEEDINGS{Mar79, + author="Martin, G.N.N.", + title="Range encoding: an algorithm for removing redundancy from a digitised + message", + booktitle="Video & Data Recording Conference", + year=1979, + address="Southampton", + month=Jul + } + @ARTICLE{MNW98, + author="Alistair Moffat and Radford Neal and Ian H. Witten", + title="Arithmetic Coding Revisited", + journal="{ACM} Transactions on Information Systems", + year=1998, + volume=16, + number=3, + pages="256--294", + month=Jul, + URL="http://www.stanford.edu/class/ee398a/handouts/papers/Moffat98ArithmCoding.pdf" + }*/ + +static int ec_read_byte(ec_dec *_this){ + return _this->offs<_this->storage?_this->buf[_this->offs++]:0; +} + +static int ec_read_byte_from_end(ec_dec *_this){ + return _this->end_offs<_this->storage? + _this->buf[_this->storage-++(_this->end_offs)]:0; +} + +/*Normalizes the contents of val and rng so that rng lies entirely in the + high-order symbol.*/ +static void ec_dec_normalize(ec_dec *_this){ + /*If the range is too small, rescale it and input some bits.*/ + while(_this->rng<=EC_CODE_BOT){ + int sym; + _this->nbits_total+=EC_SYM_BITS; + _this->rng<<=EC_SYM_BITS; + /*Use up the remaining bits from our last symbol.*/ + sym=_this->rem; + /*Read the next value from the input.*/ + _this->rem=ec_read_byte(_this); + /*Take the rest of the bits we need from this new symbol.*/ + sym=(sym<<EC_SYM_BITS|_this->rem)>>(EC_SYM_BITS-EC_CODE_EXTRA); + /*And subtract them from val, capped to be less than EC_CODE_TOP.*/ + _this->val=((_this->val<<EC_SYM_BITS)+(EC_SYM_MAX&~sym))&(EC_CODE_TOP-1); + } +} + +void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage){ + _this->buf=_buf; + _this->storage=_storage; + _this->end_offs=0; + _this->end_window=0; + _this->nend_bits=0; + /*This is the offset from which ec_tell() will subtract partial bits. + The final value after the ec_dec_normalize() call will be the same as in + the encoder, but we have to compensate for the bits that are added there.*/ + _this->nbits_total=EC_CODE_BITS+1 + -((EC_CODE_BITS-EC_CODE_EXTRA)/EC_SYM_BITS)*EC_SYM_BITS; + _this->offs=0; + _this->rng=1U<<EC_CODE_EXTRA; + _this->rem=ec_read_byte(_this); + _this->val=_this->rng-1-(_this->rem>>(EC_SYM_BITS-EC_CODE_EXTRA)); + _this->error=0; + /*Normalize the interval.*/ + ec_dec_normalize(_this); +} + +unsigned ec_decode(ec_dec *_this,unsigned _ft){ + unsigned s; + _this->ext=_this->rng/_ft; + s=(unsigned)(_this->val/_this->ext); + return _ft-EC_MINI(s+1,_ft); +} + +unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){ + unsigned s; + _this->ext=_this->rng>>_bits; + s=(unsigned)(_this->val/_this->ext); + return (1U<<_bits)-EC_MINI(s+1U,1U<<_bits); +} + +void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){ + opus_uint32 s; + s=IMUL32(_this->ext,_ft-_fh); + _this->val-=s; + _this->rng=_fl>0?IMUL32(_this->ext,_fh-_fl):_this->rng-s; + ec_dec_normalize(_this); +} + +/*The probability of having a "one" is 1/(1<<_logp).*/ +int ec_dec_bit_logp(ec_dec *_this,unsigned _logp){ + opus_uint32 r; + opus_uint32 d; + opus_uint32 s; + int ret; + r=_this->rng; + d=_this->val; + s=r>>_logp; + ret=d<s; + if(!ret)_this->val=d-s; + _this->rng=ret?s:r-s; + ec_dec_normalize(_this); + return ret; +} + +int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){ + opus_uint32 r; + opus_uint32 d; + opus_uint32 s; + opus_uint32 t; + int ret; + s=_this->rng; + d=_this->val; + r=s>>_ftb; + ret=-1; + do{ + t=s; + s=IMUL32(r,_icdf[++ret]); + } + while(d<s); + _this->val=d-s; + _this->rng=t-s; + ec_dec_normalize(_this); + return ret; +} + +opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){ + unsigned ft; + unsigned s; + int ftb; + /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/ + celt_assert(_ft>1); + _ft--; + ftb=EC_ILOG(_ft); + if(ftb>EC_UINT_BITS){ + opus_uint32 t; + ftb-=EC_UINT_BITS; + ft=(unsigned)(_ft>>ftb)+1; + s=ec_decode(_this,ft); + ec_dec_update(_this,s,s+1,ft); + t=(opus_uint32)s<<ftb|ec_dec_bits(_this,ftb); + if(t<=_ft)return t; + _this->error=1; + return _ft; + } + else{ + _ft++; + s=ec_decode(_this,(unsigned)_ft); + ec_dec_update(_this,s,s+1,(unsigned)_ft); + return s; + } +} + +opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){ + ec_window window; + int available; + opus_uint32 ret; + window=_this->end_window; + available=_this->nend_bits; + if((unsigned)available<_bits){ + do{ + window|=(ec_window)ec_read_byte_from_end(_this)<<available; + available+=EC_SYM_BITS; + } + while(available<=EC_WINDOW_SIZE-EC_SYM_BITS); + } + ret=(opus_uint32)window&(((opus_uint32)1<<_bits)-1U); + window>>=_bits; + available-=_bits; + _this->end_window=window; + _this->nend_bits=available; + _this->nbits_total+=_bits; + return ret; +} diff --git a/drivers/opus/celt/entdec.h b/drivers/opus/celt/entdec.h new file mode 100644 index 0000000000..d8ab318730 --- /dev/null +++ b/drivers/opus/celt/entdec.h @@ -0,0 +1,100 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(_entdec_H) +# define _entdec_H (1) +# include <limits.h> +# include "entcode.h" + +/*Initializes the decoder. + _buf: The input buffer to use. + Return: 0 on success, or a negative value on error.*/ +void ec_dec_init(ec_dec *_this,unsigned char *_buf,opus_uint32 _storage); + +/*Calculates the cumulative frequency for the next symbol. + This can then be fed into the probability model to determine what that + symbol is, and the additional frequency information required to advance to + the next symbol. + This function cannot be called more than once without a corresponding call to + ec_dec_update(), or decoding will not proceed correctly. + _ft: The total frequency of the symbols in the alphabet the next symbol was + encoded with. + Return: A cumulative frequency representing the encoded symbol. + If the cumulative frequency of all the symbols before the one that + was encoded was fl, and the cumulative frequency of all the symbols + up to and including the one encoded is fh, then the returned value + will fall in the range [fl,fh).*/ +unsigned ec_decode(ec_dec *_this,unsigned _ft); + +/*Equivalent to ec_decode() with _ft==1<<_bits.*/ +unsigned ec_decode_bin(ec_dec *_this,unsigned _bits); + +/*Advance the decoder past the next symbol using the frequency information the + symbol was encoded with. + Exactly one call to ec_decode() must have been made so that all necessary + intermediate calculations are performed. + _fl: The cumulative frequency of all symbols that come before the symbol + decoded. + _fh: The cumulative frequency of all symbols up to and including the symbol + decoded. + Together with _fl, this defines the range [_fl,_fh) in which the value + returned above must fall. + _ft: The total frequency of the symbols in the alphabet the symbol decoded + was encoded in. + This must be the same as passed to the preceding call to ec_decode().*/ +void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft); + +/* Decode a bit that has a 1/(1<<_logp) probability of being a one */ +int ec_dec_bit_logp(ec_dec *_this,unsigned _logp); + +/*Decodes a symbol given an "inverse" CDF table. + No call to ec_dec_update() is necessary after this call. + _icdf: The "inverse" CDF, such that symbol s falls in the range + [s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb. + The values must be monotonically non-increasing, and the last value + must be 0. + _ftb: The number of bits of precision in the cumulative distribution. + Return: The decoded symbol s.*/ +int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb); + +/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream. + The bits must have been encoded with ec_enc_uint(). + No call to ec_dec_update() is necessary after this call. + _ft: The number of integers that can be decoded (one more than the max). + This must be at least one, and no more than 2**32-1. + Return: The decoded bits.*/ +opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft); + +/*Extracts a sequence of raw bits from the stream. + The bits must have been encoded with ec_enc_bits(). + No call to ec_dec_update() is necessary after this call. + _ftb: The number of bits to extract. + This must be between 0 and 25, inclusive. + Return: The decoded bits.*/ +opus_uint32 ec_dec_bits(ec_dec *_this,unsigned _ftb); + +#endif diff --git a/drivers/opus/celt/entenc.c b/drivers/opus/celt/entenc.c new file mode 100644 index 0000000000..299329c63f --- /dev/null +++ b/drivers/opus/celt/entenc.c @@ -0,0 +1,294 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if defined(OPUS_HAVE_CONFIG_H) +# include "opus_config.h" +#endif +#include "os_support.h" +#include "arch.h" +#include "entenc.h" +#include "mfrngcod.h" + +/*A range encoder. + See entdec.c and the references for implementation details \cite{Mar79,MNW98}. + + @INPROCEEDINGS{Mar79, + author="Martin, G.N.N.", + title="Range encoding: an algorithm for removing redundancy from a digitised + message", + booktitle="Video \& Data Recording Conference", + year=1979, + address="Southampton", + month=Jul + } + @ARTICLE{MNW98, + author="Alistair Moffat and Radford Neal and Ian H. Witten", + title="Arithmetic Coding Revisited", + journal="{ACM} Transactions on Information Systems", + year=1998, + volume=16, + number=3, + pages="256--294", + month=Jul, + URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf" + }*/ + +static int ec_write_byte(ec_enc *_this,unsigned _value){ + if(_this->offs+_this->end_offs>=_this->storage)return -1; + _this->buf[_this->offs++]=(unsigned char)_value; + return 0; +} + +static int ec_write_byte_at_end(ec_enc *_this,unsigned _value){ + if(_this->offs+_this->end_offs>=_this->storage)return -1; + _this->buf[_this->storage-++(_this->end_offs)]=(unsigned char)_value; + return 0; +} + +/*Outputs a symbol, with a carry bit. + If there is a potential to propagate a carry over several symbols, they are + buffered until it can be determined whether or not an actual carry will + occur. + If the counter for the buffered symbols overflows, then the stream becomes + undecodable. + This gives a theoretical limit of a few billion symbols in a single packet on + 32-bit systems. + The alternative is to truncate the range in order to force a carry, but + requires similar carry tracking in the decoder, needlessly slowing it down.*/ +static void ec_enc_carry_out(ec_enc *_this,int _c){ + if(_c!=EC_SYM_MAX){ + /*No further carry propagation possible, flush buffer.*/ + int carry; + carry=_c>>EC_SYM_BITS; + /*Don't output a byte on the first write. + This compare should be taken care of by branch-prediction thereafter.*/ + if(_this->rem>=0)_this->error|=ec_write_byte(_this,_this->rem+carry); + if(_this->ext>0){ + unsigned sym; + sym=(EC_SYM_MAX+carry)&EC_SYM_MAX; + do _this->error|=ec_write_byte(_this,sym); + while(--(_this->ext)>0); + } + _this->rem=_c&EC_SYM_MAX; + } + else _this->ext++; +} + +static void ec_enc_normalize(ec_enc *_this){ + /*If the range is too small, output some bits and rescale it.*/ + while(_this->rng<=EC_CODE_BOT){ + ec_enc_carry_out(_this,(int)(_this->val>>EC_CODE_SHIFT)); + /*Move the next-to-high-order symbol into the high-order position.*/ + _this->val=(_this->val<<EC_SYM_BITS)&(EC_CODE_TOP-1); + _this->rng<<=EC_SYM_BITS; + _this->nbits_total+=EC_SYM_BITS; + } +} + +void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size){ + _this->buf=_buf; + _this->end_offs=0; + _this->end_window=0; + _this->nend_bits=0; + /*This is the offset from which ec_tell() will subtract partial bits.*/ + _this->nbits_total=EC_CODE_BITS+1; + _this->offs=0; + _this->rng=EC_CODE_TOP; + _this->rem=-1; + _this->val=0; + _this->ext=0; + _this->storage=_size; + _this->error=0; +} + +void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft){ + opus_uint32 r; + r=_this->rng/_ft; + if(_fl>0){ + _this->val+=_this->rng-IMUL32(r,(_ft-_fl)); + _this->rng=IMUL32(r,(_fh-_fl)); + } + else _this->rng-=IMUL32(r,(_ft-_fh)); + ec_enc_normalize(_this); +} + +void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits){ + opus_uint32 r; + r=_this->rng>>_bits; + if(_fl>0){ + _this->val+=_this->rng-IMUL32(r,((1U<<_bits)-_fl)); + _this->rng=IMUL32(r,(_fh-_fl)); + } + else _this->rng-=IMUL32(r,((1U<<_bits)-_fh)); + ec_enc_normalize(_this); +} + +/*The probability of having a "one" is 1/(1<<_logp).*/ +void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp){ + opus_uint32 r; + opus_uint32 s; + opus_uint32 l; + r=_this->rng; + l=_this->val; + s=r>>_logp; + r-=s; + if(_val)_this->val=l+r; + _this->rng=_val?s:r; + ec_enc_normalize(_this); +} + +void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){ + opus_uint32 r; + r=_this->rng>>_ftb; + if(_s>0){ + _this->val+=_this->rng-IMUL32(r,_icdf[_s-1]); + _this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]); + } + else _this->rng-=IMUL32(r,_icdf[_s]); + ec_enc_normalize(_this); +} + +void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){ + unsigned ft; + unsigned fl; + int ftb; + /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/ + celt_assert(_ft>1); + _ft--; + ftb=EC_ILOG(_ft); + if(ftb>EC_UINT_BITS){ + ftb-=EC_UINT_BITS; + ft=(_ft>>ftb)+1; + fl=(unsigned)(_fl>>ftb); + ec_encode(_this,fl,fl+1,ft); + ec_enc_bits(_this,_fl&(((opus_uint32)1<<ftb)-1U),ftb); + } + else ec_encode(_this,_fl,_fl+1,_ft+1); +} + +void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _bits){ + ec_window window; + int used; + window=_this->end_window; + used=_this->nend_bits; + celt_assert(_bits>0); + if(used+_bits>EC_WINDOW_SIZE){ + do{ + _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX); + window>>=EC_SYM_BITS; + used-=EC_SYM_BITS; + } + while(used>=EC_SYM_BITS); + } + window|=(ec_window)_fl<<used; + used+=_bits; + _this->end_window=window; + _this->nend_bits=used; + _this->nbits_total+=_bits; +} + +void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits){ + int shift; + unsigned mask; + celt_assert(_nbits<=EC_SYM_BITS); + shift=EC_SYM_BITS-_nbits; + mask=((1<<_nbits)-1)<<shift; + if(_this->offs>0){ + /*The first byte has been finalized.*/ + _this->buf[0]=(unsigned char)((_this->buf[0]&~mask)|_val<<shift); + } + else if(_this->rem>=0){ + /*The first byte is still awaiting carry propagation.*/ + _this->rem=(_this->rem&~mask)|_val<<shift; + } + else if(_this->rng<=(EC_CODE_TOP>>_nbits)){ + /*The renormalization loop has never been run.*/ + _this->val=(_this->val&~((opus_uint32)mask<<EC_CODE_SHIFT))| + (opus_uint32)_val<<(EC_CODE_SHIFT+shift); + } + /*The encoder hasn't even encoded _nbits of data yet.*/ + else _this->error=-1; +} + +void ec_enc_shrink(ec_enc *_this,opus_uint32 _size){ + celt_assert(_this->offs+_this->end_offs<=_size); + OPUS_MOVE(_this->buf+_size-_this->end_offs, + _this->buf+_this->storage-_this->end_offs,_this->end_offs); + _this->storage=_size; +} + +void ec_enc_done(ec_enc *_this){ + ec_window window; + int used; + opus_uint32 msk; + opus_uint32 end; + int l; + /*We output the minimum number of bits that ensures that the symbols encoded + thus far will be decoded correctly regardless of the bits that follow.*/ + l=EC_CODE_BITS-EC_ILOG(_this->rng); + msk=(EC_CODE_TOP-1)>>l; + end=(_this->val+msk)&~msk; + if((end|msk)>=_this->val+_this->rng){ + l++; + msk>>=1; + end=(_this->val+msk)&~msk; + } + while(l>0){ + ec_enc_carry_out(_this,(int)(end>>EC_CODE_SHIFT)); + end=(end<<EC_SYM_BITS)&(EC_CODE_TOP-1); + l-=EC_SYM_BITS; + } + /*If we have a buffered byte flush it into the output buffer.*/ + if(_this->rem>=0||_this->ext>0)ec_enc_carry_out(_this,0); + /*If we have buffered extra bits, flush them as well.*/ + window=_this->end_window; + used=_this->nend_bits; + while(used>=EC_SYM_BITS){ + _this->error|=ec_write_byte_at_end(_this,(unsigned)window&EC_SYM_MAX); + window>>=EC_SYM_BITS; + used-=EC_SYM_BITS; + } + /*Clear any excess space and add any remaining extra bits to the last byte.*/ + if(!_this->error){ + OPUS_CLEAR(_this->buf+_this->offs, + _this->storage-_this->offs-_this->end_offs); + if(used>0){ + /*If there's no range coder data at all, give up.*/ + if(_this->end_offs>=_this->storage)_this->error=-1; + else{ + l=-l; + /*If we've busted, don't add too many extra bits to the last byte; it + would corrupt the range coder data, and that's more important.*/ + if(_this->offs+_this->end_offs>=_this->storage&&l<used){ + window&=(1<<l)-1; + _this->error=-1; + } + _this->buf[_this->storage-_this->end_offs-1]|=(unsigned char)window; + } + } + } +} diff --git a/drivers/opus/celt/entenc.h b/drivers/opus/celt/entenc.h new file mode 100644 index 0000000000..796bc4d572 --- /dev/null +++ b/drivers/opus/celt/entenc.h @@ -0,0 +1,110 @@ +/* Copyright (c) 2001-2011 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(_entenc_H) +# define _entenc_H (1) +# include <stddef.h> +# include "entcode.h" + +/*Initializes the encoder. + _buf: The buffer to store output bytes in. + _size: The size of the buffer, in chars.*/ +void ec_enc_init(ec_enc *_this,unsigned char *_buf,opus_uint32 _size); +/*Encodes a symbol given its frequency information. + The frequency information must be discernable by the decoder, assuming it + has read only the previous symbols from the stream. + It is allowable to change the frequency information, or even the entire + source alphabet, so long as the decoder can tell from the context of the + previously encoded information that it is supposed to do so as well. + _fl: The cumulative frequency of all symbols that come before the one to be + encoded. + _fh: The cumulative frequency of all symbols up to and including the one to + be encoded. + Together with _fl, this defines the range [_fl,_fh) in which the + decoded value will fall. + _ft: The sum of the frequencies of all the symbols*/ +void ec_encode(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _ft); + +/*Equivalent to ec_encode() with _ft==1<<_bits.*/ +void ec_encode_bin(ec_enc *_this,unsigned _fl,unsigned _fh,unsigned _bits); + +/* Encode a bit that has a 1/(1<<_logp) probability of being a one */ +void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp); + +/*Encodes a symbol given an "inverse" CDF table. + _s: The index of the symbol to encode. + _icdf: The "inverse" CDF, such that symbol _s falls in the range + [_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb. + The values must be monotonically non-increasing, and the last value + must be 0. + _ftb: The number of bits of precision in the cumulative distribution.*/ +void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb); + +/*Encodes a raw unsigned integer in the stream. + _fl: The integer to encode. + _ft: The number of integers that can be encoded (one more than the max). + This must be at least one, and no more than 2**32-1.*/ +void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft); + +/*Encodes a sequence of raw bits in the stream. + _fl: The bits to encode. + _ftb: The number of bits to encode. + This must be between 1 and 25, inclusive.*/ +void ec_enc_bits(ec_enc *_this,opus_uint32 _fl,unsigned _ftb); + +/*Overwrites a few bits at the very start of an existing stream, after they + have already been encoded. + This makes it possible to have a few flags up front, where it is easy for + decoders to access them without parsing the whole stream, even if their + values are not determined until late in the encoding process, without having + to buffer all the intermediate symbols in the encoder. + In order for this to work, at least _nbits bits must have already been + encoded using probabilities that are an exact power of two. + The encoder can verify the number of encoded bits is sufficient, but cannot + check this latter condition. + _val: The bits to encode (in the least _nbits significant bits). + They will be decoded in order from most-significant to least. + _nbits: The number of bits to overwrite. + This must be no more than 8.*/ +void ec_enc_patch_initial_bits(ec_enc *_this,unsigned _val,unsigned _nbits); + +/*Compacts the data to fit in the target size. + This moves up the raw bits at the end of the current buffer so they are at + the end of the new buffer size. + The caller must ensure that the amount of data that's already been written + will fit in the new size. + _size: The number of bytes in the new buffer. + This must be large enough to contain the bits already written, and + must be no larger than the existing size.*/ +void ec_enc_shrink(ec_enc *_this,opus_uint32 _size); + +/*Indicates that there are no more symbols to encode. + All reamining output bytes are flushed to the output buffer. + ec_enc_init() must be called before the encoder can be used again.*/ +void ec_enc_done(ec_enc *_this); + +#endif diff --git a/drivers/opus/celt/fixed_debug.h b/drivers/opus/celt/fixed_debug.h new file mode 100644 index 0000000000..80bc94910f --- /dev/null +++ b/drivers/opus/celt/fixed_debug.h @@ -0,0 +1,773 @@ +/* Copyright (C) 2003-2008 Jean-Marc Valin + Copyright (C) 2007-2012 Xiph.Org Foundation */ +/** + @file fixed_debug.h + @brief Fixed-point operations with debugging +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FIXED_DEBUG_H +#define FIXED_DEBUG_H + +#include <stdio.h> +#include "opus_defines.h" + +#ifdef CELT_C +OPUS_EXPORT opus_int64 celt_mips=0; +#else +extern opus_int64 celt_mips; +#endif + +#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) +#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15)) + +/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16)) + +#define MULT16_32_P16(a,b) MULT16_32_PX(a,b,16) + +#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) +#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits)))) + +#define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768) +#define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL) +#define VERIFY_UINT(x) ((x)<=(2147483647LLU<<1)) + +#define SHR(a,b) SHR32(a,b) +#define PSHR(a,b) PSHR32(a,b) + +static OPUS_INLINE short NEG16(int x) +{ + int res; + if (!VERIFY_SHORT(x)) + { + fprintf (stderr, "NEG16: input is not short: %d\n", (int)x); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = -x; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "NEG16: output is not short: %d\n", (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips++; + return res; +} +static OPUS_INLINE int NEG32(opus_int64 x) +{ + opus_int64 res; + if (!VERIFY_INT(x)) + { + fprintf (stderr, "NEG16: input is not int: %d\n", (int)x); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = -x; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "NEG16: output is not int: %d\n", (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=2; + return res; +} + +#define EXTRACT16(x) EXTRACT16_(x, __FILE__, __LINE__) +static OPUS_INLINE short EXTRACT16_(int x, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(x)) + { + fprintf (stderr, "EXTRACT16: input is not short: %d in %s: line %d\n", x, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = x; + celt_mips++; + return res; +} + +#define EXTEND32(x) EXTEND32_(x, __FILE__, __LINE__) +static OPUS_INLINE int EXTEND32_(int x, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(x)) + { + fprintf (stderr, "EXTEND32: input is not short: %d in %s: line %d\n", x, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = x; + celt_mips++; + return res; +} + +#define SHR16(a, shift) SHR16_(a, shift, __FILE__, __LINE__) +static OPUS_INLINE short SHR16_(int a, int shift, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) + { + fprintf (stderr, "SHR16: inputs are not short: %d >> %d in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a>>shift; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "SHR16: output is not short: %d in %s: line %d\n", res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips++; + return res; +} +#define SHL16(a, shift) SHL16_(a, shift, __FILE__, __LINE__) +static OPUS_INLINE short SHL16_(int a, int shift, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(shift)) + { + fprintf (stderr, "SHL16: inputs are not short: %d %d in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a<<shift; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "SHL16: output is not short: %d in %s: line %d\n", res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips++; + return res; +} + +static OPUS_INLINE int SHR32(opus_int64 a, int shift) +{ + opus_int64 res; + if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) + { + fprintf (stderr, "SHR32: inputs are not int: %d %d\n", (int)a, shift); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a>>shift; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "SHR32: output is not int: %d\n", (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=2; + return res; +} +#define SHL32(a, shift) SHL32_(a, shift, __FILE__, __LINE__) +static OPUS_INLINE int SHL32_(opus_int64 a, int shift, char *file, int line) +{ + opus_int64 res; + if (!VERIFY_INT(a) || !VERIFY_SHORT(shift)) + { + fprintf (stderr, "SHL32: inputs are not int: %lld %d in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a<<shift; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "SHL32: output is not int: %lld<<%d = %lld in %s: line %d\n", a, shift, res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=2; + return res; +} + +#define PSHR32(a,shift) (celt_mips--,SHR32(ADD32((a),(((opus_val32)(1)<<((shift))>>1))),shift)) +#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) + +#define ROUND16(x,a) (celt_mips--,EXTRACT16(PSHR32((x),(a)))) +#define HALF16(x) (SHR16(x,1)) +#define HALF32(x) (SHR32(x,1)) + +//#define SHR(a,shift) ((a) >> (shift)) +//#define SHL(a,shift) ((a) << (shift)) + +#define ADD16(a, b) ADD16_(a, b, __FILE__, __LINE__) +static OPUS_INLINE short ADD16_(int a, int b, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "ADD16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a+b; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "ADD16: output is not short: %d+%d=%d in %s: line %d\n", a,b,res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips++; + return res; +} + +#define SUB16(a, b) SUB16_(a, b, __FILE__, __LINE__) +static OPUS_INLINE short SUB16_(int a, int b, char *file, int line) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "SUB16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a-b; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "SUB16: output is not short: %d in %s: line %d\n", res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips++; + return res; +} + +#define ADD32(a, b) ADD32_(a, b, __FILE__, __LINE__) +static OPUS_INLINE int ADD32_(opus_int64 a, opus_int64 b, char *file, int line) +{ + opus_int64 res; + if (!VERIFY_INT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "ADD32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a+b; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "ADD32: output is not int: %d in %s: line %d\n", (int)res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=2; + return res; +} + +#define SUB32(a, b) SUB32_(a, b, __FILE__, __LINE__) +static OPUS_INLINE int SUB32_(opus_int64 a, opus_int64 b, char *file, int line) +{ + opus_int64 res; + if (!VERIFY_INT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "SUB32: inputs are not int: %d %d in %s: line %d\n", (int)a, (int)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a-b; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "SUB32: output is not int: %d in %s: line %d\n", (int)res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=2; + return res; +} + +#undef UADD32 +#define UADD32(a, b) UADD32_(a, b, __FILE__, __LINE__) +static OPUS_INLINE unsigned int UADD32_(opus_uint64 a, opus_uint64 b, char *file, int line) +{ + opus_uint64 res; + if (!VERIFY_UINT(a) || !VERIFY_UINT(b)) + { + fprintf (stderr, "UADD32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a+b; + if (!VERIFY_UINT(res)) + { + fprintf (stderr, "UADD32: output is not uint32: %llu in %s: line %d\n", res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=2; + return res; +} + +#undef USUB32 +#define USUB32(a, b) USUB32_(a, b, __FILE__, __LINE__) +static OPUS_INLINE unsigned int USUB32_(opus_uint64 a, opus_uint64 b, char *file, int line) +{ + opus_uint64 res; + if (!VERIFY_UINT(a) || !VERIFY_UINT(b)) + { + fprintf (stderr, "USUB32: inputs are not uint32: %llu %llu in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + if (a<b) + { + fprintf (stderr, "USUB32: inputs underflow: %llu < %llu in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a-b; + if (!VERIFY_UINT(res)) + { + fprintf (stderr, "USUB32: output is not uint32: %llu - %llu = %llu in %s: line %d\n", a, b, res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=2; + return res; +} + +/* result fits in 16 bits */ +static OPUS_INLINE short MULT16_16_16(int a, int b) +{ + int res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_16: inputs are not short: %d %d\n", a, b); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a*b; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "MULT16_16_16: output is not short: %d\n", res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips++; + return res; +} + +#define MULT16_16(a, b) MULT16_16_(a, b, __FILE__, __LINE__) +static OPUS_INLINE int MULT16_16_(int a, int b, char *file, int line) +{ + opus_int64 res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = ((opus_int64)a)*b; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "MULT16_16: output is not int: %d in %s: line %d\n", (int)res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips++; + return res; +} + +#define MAC16_16(c,a,b) (celt_mips-=2,ADD32((c),MULT16_16((a),(b)))) + +#define MULT16_32_QX(a, b, Q) MULT16_32_QX_(a, b, Q, __FILE__, __LINE__) +static OPUS_INLINE int MULT16_32_QX_(int a, opus_int64 b, int Q, char *file, int line) +{ + opus_int64 res; + if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "MULT16_32_Q%d: inputs are not short+int: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + if (ABS32(b)>=((opus_val32)(1)<<(15+Q))) + { + fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n", Q, (int)a, (int)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = (((opus_int64)a)*(opus_int64)b) >> Q; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "MULT16_32_Q%d: output is not int: %d*%d=%d in %s: line %d\n", Q, (int)a, (int)b,(int)res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + if (Q==15) + celt_mips+=3; + else + celt_mips+=4; + return res; +} + +#define MULT16_32_PX(a, b, Q) MULT16_32_PX_(a, b, Q, __FILE__, __LINE__) +static OPUS_INLINE int MULT16_32_PX_(int a, opus_int64 b, int Q, char *file, int line) +{ + opus_int64 res; + if (!VERIFY_SHORT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "MULT16_32_P%d: inputs are not short+int: %d %d in %s: line %d\n\n", Q, (int)a, (int)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + if (ABS32(b)>=((opus_int64)(1)<<(15+Q))) + { + fprintf (stderr, "MULT16_32_Q%d: second operand too large: %d %d in %s: line %d\n\n", Q, (int)a, (int)b,file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = ((((opus_int64)a)*(opus_int64)b) + (((opus_val32)(1)<<Q)>>1))>> Q; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "MULT16_32_P%d: output is not int: %d*%d=%d in %s: line %d\n\n", Q, (int)a, (int)b,(int)res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + if (Q==15) + celt_mips+=4; + else + celt_mips+=5; + return res; +} + +#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15) +#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b)))) + +static OPUS_INLINE int SATURATE(int a, int b) +{ + if (a>b) + a=b; + if (a<-b) + a = -b; + celt_mips+=3; + return a; +} + +static OPUS_INLINE opus_int16 SATURATE16(opus_int32 a) +{ + celt_mips+=3; + if (a>32767) + return 32767; + else if (a<-32768) + return -32768; + else return a; +} + +static OPUS_INLINE int MULT16_16_Q11_32(int a, int b) +{ + opus_int64 res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_Q11: inputs are not short: %d %d\n", a, b); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = ((opus_int64)a)*b; + res >>= 11; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "MULT16_16_Q11: output is not short: %d*%d=%d\n", (int)a, (int)b, (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=3; + return res; +} +static OPUS_INLINE short MULT16_16_Q13(int a, int b) +{ + opus_int64 res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_Q13: inputs are not short: %d %d\n", a, b); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = ((opus_int64)a)*b; + res >>= 13; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "MULT16_16_Q13: output is not short: %d*%d=%d\n", a, b, (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=3; + return res; +} +static OPUS_INLINE short MULT16_16_Q14(int a, int b) +{ + opus_int64 res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_Q14: inputs are not short: %d %d\n", a, b); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = ((opus_int64)a)*b; + res >>= 14; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "MULT16_16_Q14: output is not short: %d\n", (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=3; + return res; +} + +#define MULT16_16_Q15(a, b) MULT16_16_Q15_(a, b, __FILE__, __LINE__) +static OPUS_INLINE short MULT16_16_Q15_(int a, int b, char *file, int line) +{ + opus_int64 res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_Q15: inputs are not short: %d %d in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = ((opus_int64)a)*b; + res >>= 15; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "MULT16_16_Q15: output is not short: %d in %s: line %d\n", (int)res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=1; + return res; +} + +static OPUS_INLINE short MULT16_16_P13(int a, int b) +{ + opus_int64 res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_P13: inputs are not short: %d %d\n", a, b); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = ((opus_int64)a)*b; + res += 4096; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "MULT16_16_P13: overflow: %d*%d=%d\n", a, b, (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res >>= 13; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "MULT16_16_P13: output is not short: %d*%d=%d\n", a, b, (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=4; + return res; +} +static OPUS_INLINE short MULT16_16_P14(int a, int b) +{ + opus_int64 res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_P14: inputs are not short: %d %d\n", a, b); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = ((opus_int64)a)*b; + res += 8192; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "MULT16_16_P14: overflow: %d*%d=%d\n", a, b, (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res >>= 14; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "MULT16_16_P14: output is not short: %d*%d=%d\n", a, b, (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=4; + return res; +} +static OPUS_INLINE short MULT16_16_P15(int a, int b) +{ + opus_int64 res; + if (!VERIFY_SHORT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "MULT16_16_P15: inputs are not short: %d %d\n", a, b); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = ((opus_int64)a)*b; + res += 16384; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "MULT16_16_P15: overflow: %d*%d=%d\n", a, b, (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res >>= 15; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "MULT16_16_P15: output is not short: %d*%d=%d\n", a, b, (int)res); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=2; + return res; +} + +#define DIV32_16(a, b) DIV32_16_(a, b, __FILE__, __LINE__) + +static OPUS_INLINE int DIV32_16_(opus_int64 a, opus_int64 b, char *file, int line) +{ + opus_int64 res; + if (b==0) + { + fprintf(stderr, "DIV32_16: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + return 0; + } + if (!VERIFY_INT(a) || !VERIFY_SHORT(b)) + { + fprintf (stderr, "DIV32_16: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a/b; + if (!VERIFY_SHORT(res)) + { + fprintf (stderr, "DIV32_16: output is not short: %d / %d = %d in %s: line %d\n", (int)a,(int)b,(int)res, file, line); + if (res>32767) + res = 32767; + if (res<-32768) + res = -32768; +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=35; + return res; +} + +#define DIV32(a, b) DIV32_(a, b, __FILE__, __LINE__) +static OPUS_INLINE int DIV32_(opus_int64 a, opus_int64 b, char *file, int line) +{ + opus_int64 res; + if (b==0) + { + fprintf(stderr, "DIV32: divide by zero: %d/%d in %s: line %d\n", (int)a, (int)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + return 0; + } + + if (!VERIFY_INT(a) || !VERIFY_INT(b)) + { + fprintf (stderr, "DIV32: inputs are not int/short: %d %d in %s: line %d\n", (int)a, (int)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + res = a/b; + if (!VERIFY_INT(res)) + { + fprintf (stderr, "DIV32: output is not int: %d in %s: line %d\n", (int)res, file, line); +#ifdef FIXED_DEBUG_ASSERT + celt_assert(0); +#endif + } + celt_mips+=70; + return res; +} + +#undef PRINT_MIPS +#define PRINT_MIPS(file) do {fprintf (file, "total complexity = %llu MIPS\n", celt_mips);} while (0); + +#endif diff --git a/drivers/opus/celt/fixed_generic.h b/drivers/opus/celt/fixed_generic.h new file mode 100644 index 0000000000..ecf018a244 --- /dev/null +++ b/drivers/opus/celt/fixed_generic.h @@ -0,0 +1,134 @@ +/* Copyright (C) 2007-2009 Xiph.Org Foundation + Copyright (C) 2003-2008 Jean-Marc Valin + Copyright (C) 2007-2008 CSIRO */ +/** + @file fixed_generic.h + @brief Generic fixed-point operations +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FIXED_GENERIC_H +#define FIXED_GENERIC_H + +/** Multiply a 16-bit signed value by a 16-bit unsigned value. The result is a 32-bit signed value */ +#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) + +/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) + +/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ +#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) + +/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ +#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) + +/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ +#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) + +/** Compile-time conversion of float constant to 16-bit value */ +#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) + +/** Compile-time conversion of float constant to 32-bit value */ +#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits)))) + +/** Negate a 16-bit value */ +#define NEG16(x) (-(x)) +/** Negate a 32-bit value */ +#define NEG32(x) (-(x)) + +/** Change a 32-bit value into a 16-bit value. The value is assumed to fit in 16-bit, otherwise the result is undefined */ +#define EXTRACT16(x) ((opus_val16)(x)) +/** Change a 16-bit value into a 32-bit value */ +#define EXTEND32(x) ((opus_val32)(x)) + +/** Arithmetic shift-right of a 16-bit value */ +#define SHR16(a,shift) ((a) >> (shift)) +/** Arithmetic shift-left of a 16-bit value */ +#define SHL16(a,shift) ((opus_int16)((opus_uint16)(a)<<(shift))) +/** Arithmetic shift-right of a 32-bit value */ +#define SHR32(a,shift) ((a) >> (shift)) +/** Arithmetic shift-left of a 32-bit value */ +#define SHL32(a,shift) ((opus_int32)((opus_uint32)(a)<<(shift))) + +/** 32-bit arithmetic shift right with rounding-to-nearest instead of rounding down */ +#define PSHR32(a,shift) (SHR32((a)+((EXTEND32(1)<<((shift))>>1)),shift)) +/** 32-bit arithmetic shift right where the argument can be negative */ +#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) + +/** "RAW" macros, should not be used outside of this header file */ +#define SHR(a,shift) ((a) >> (shift)) +#define SHL(a,shift) SHL32(a,shift) +#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift)) +#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) + +#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x))) + +/** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */ +#define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a)))) +/** Divide by two */ +#define HALF16(x) (SHR16(x,1)) +#define HALF32(x) (SHR32(x,1)) + +/** Add two 16-bit values */ +#define ADD16(a,b) ((opus_val16)((opus_val16)(a)+(opus_val16)(b))) +/** Subtract two 16-bit values */ +#define SUB16(a,b) ((opus_val16)(a)-(opus_val16)(b)) +/** Add two 32-bit values */ +#define ADD32(a,b) ((opus_val32)(a)+(opus_val32)(b)) +/** Subtract two 32-bit values */ +#define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b)) + +/** 16x16 multiplication where the result fits in 16 bits */ +#define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b)))) + +/* (opus_val32)(opus_val16) gives TI compiler a hint that it's 16x16->32 multiply */ +/** 16x16 multiplication where the result fits in 32 bits */ +#define MULT16_16(a,b) (((opus_val32)(opus_val16)(a))*((opus_val32)(opus_val16)(b))) + +/** 16x16 multiply-add where the result fits in 32 bits */ +#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b)))) +/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. + b must fit in 31 bits. + Result fits in 32 bits. */ +#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) + +#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) +#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) +#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13)) +#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14)) +#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15)) + +#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13)) +#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14)) +#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15)) + +/** Divide a 32-bit value by a 16-bit value. Result fits in 16 bits */ +#define DIV32_16(a,b) ((opus_val16)(((opus_val32)(a))/((opus_val16)(b)))) + +/** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */ +#define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b))) + +#endif diff --git a/drivers/opus/celt/float_cast.h b/drivers/opus/celt/float_cast.h new file mode 100644 index 0000000000..ede6574860 --- /dev/null +++ b/drivers/opus/celt/float_cast.h @@ -0,0 +1,140 @@ +/* Copyright (C) 2001 Erik de Castro Lopo <erikd AT mega-nerd DOT com> */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Version 1.1 */ + +#ifndef FLOAT_CAST_H +#define FLOAT_CAST_H + + +#include "arch.h" + +/*============================================================================ +** On Intel Pentium processors (especially PIII and probably P4), converting +** from float to int is very slow. To meet the C specs, the code produced by +** most C compilers targeting Pentium needs to change the FPU rounding mode +** before the float to int conversion is performed. +** +** Changing the FPU rounding mode causes the FPU pipeline to be flushed. It +** is this flushing of the pipeline which is so slow. +** +** Fortunately the ISO C99 specifications define the functions lrint, lrintf, +** llrint and llrintf which fix this problem as a side effect. +** +** On Unix-like systems, the configure process should have detected the +** presence of these functions. If they weren't found we have to replace them +** here with a standard C cast. +*/ + +/* +** The C99 prototypes for lrint and lrintf are as follows: +** +** long int lrintf (float x) ; +** long int lrint (double x) ; +*/ + +/* The presence of the required functions are detected during the configure +** process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in +** the config.h file. +*/ + +#if (HAVE_LRINTF) + +/* These defines enable functionality introduced with the 1999 ISO C +** standard. They must be defined before the inclusion of math.h to +** engage them. If optimisation is enabled, these functions will be +** inlined. With optimisation switched off, you have to link in the +** maths library using -lm. +*/ + +#define _ISOC9X_SOURCE 1 +#define _ISOC99_SOURCE 1 + +#define __USE_ISOC9X 1 +#define __USE_ISOC99 1 + +#include <math.h> +#define float2int(x) lrintf(x) + +#elif (defined(HAVE_LRINT)) + +#define _ISOC9X_SOURCE 1 +#define _ISOC99_SOURCE 1 + +#define __USE_ISOC9X 1 +#define __USE_ISOC99 1 + +#include <math.h> +#define float2int(x) lrint(x) + +#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN64) || defined (_WIN64)) + #include <xmmintrin.h> + + __inline long int float2int(float value) + { + return _mm_cvtss_si32(_mm_load_ss(&value)); + } +#elif (defined(_MSC_VER) && _MSC_VER >= 1400) && (defined (WIN32) || defined (_WIN32)) + #include <math.h> + + /* Win32 doesn't seem to have these functions. + ** Therefore implement OPUS_INLINE versions of these functions here. + */ + + __inline long int + float2int (float flt) + { int intgr; + + _asm + { fld flt + fistp intgr + } ; + + return intgr ; + } + +#else + +#if (defined(__GNUC__) && defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) + /* supported by gcc in C99 mode, but not by all other compilers */ + #warning "Don't have the functions lrint() and lrintf ()." + #warning "Replacing these functions with a standard C cast." +#endif /* __STDC_VERSION__ >= 199901L */ + #include <math.h> + #define float2int(flt) ((int)(floor(.5+flt))) +#endif + +#ifndef DISABLE_FLOAT_API +static OPUS_INLINE opus_int16 FLOAT2INT16(float x) +{ + x = x*CELT_SIG_SCALE; + x = MAX32(x, -32768); + x = MIN32(x, 32767); + return (opus_int16)float2int(x); +} +#endif /* DISABLE_FLOAT_API */ + +#endif /* FLOAT_CAST_H */ diff --git a/drivers/opus/celt/kiss_fft.c b/drivers/opus/celt/kiss_fft.c new file mode 100644 index 0000000000..333be975d1 --- /dev/null +++ b/drivers/opus/celt/kiss_fft.c @@ -0,0 +1,719 @@ +/*Copyright (c) 2003-2004, Mark Borgerding + Lots of modifications by Jean-Marc Valin + Copyright (c) 2005-2007, Xiph.Org Foundation + Copyright (c) 2008, Xiph.Org Foundation, CSIRO + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE.*/ + +/* This code is originally from Mark Borgerding's KISS-FFT but has been + heavily modified to better suit Opus */ + +#ifndef SKIP_CONFIG_H +# ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +# endif +#endif + +#include "_kiss_fft_guts.h" +#include "arch.h" +#include "os_support.h" +#include "mathops.h" +#include "stack_alloc.h" + +/* The guts header contains all the multiplication and addition macros that are defined for + complex numbers. It also delares the kf_ internal functions. +*/ + +static void kf_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx * Fout2; + const kiss_twiddle_cpx * tw1; + int i,j; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout2 = Fout + m; + tw1 = st->twiddles; + for(j=0;j<m;j++) + { + kiss_fft_cpx t; + Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); + Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); + C_MUL (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + } + } +} + +static void ki_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx * Fout2; + const kiss_twiddle_cpx * tw1; + kiss_fft_cpx t; + int i,j; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout2 = Fout + m; + tw1 = st->twiddles; + for(j=0;j<m;j++) + { + C_MULC (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + } + } +} + +static void kf_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + const kiss_twiddle_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + const size_t m2=2*m; + const size_t m3=3*m; + int i, j; + + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw3 = tw2 = tw1 = st->twiddles; + for (j=0;j<m;j++) + { + C_MUL4(scratch[0],Fout[m] , *tw1 ); + C_MUL4(scratch[1],Fout[m2] , *tw2 ); + C_MUL4(scratch[2],Fout[m3] , *tw3 ); + + Fout->r = PSHR32(Fout->r, 2); + Fout->i = PSHR32(Fout->i, 2); + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + ++Fout; + } + } +} + +static void ki_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + const kiss_twiddle_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + const size_t m2=2*m; + const size_t m3=3*m; + int i, j; + + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw3 = tw2 = tw1 = st->twiddles; + for (j=0;j<m;j++) + { + C_MULC(scratch[0],Fout[m] , *tw1 ); + C_MULC(scratch[1],Fout[m2] , *tw2 ); + C_MULC(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + ++Fout; + } + } +} + +#ifndef RADIX_TWO_ONLY + +static void kf_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + int i; + size_t k; + const size_t m2 = 2*m; + const kiss_twiddle_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_twiddle_cpx epi3; + + kiss_fft_cpx * Fout_beg = Fout; + epi3 = st->twiddles[fstride*m]; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw1=tw2=st->twiddles; + k=m; + do { + C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); + + C_MUL(scratch[1],Fout[m] , *tw1); + C_MUL(scratch[2],Fout[m2] , *tw2); + + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; + + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + + C_MULBYSCALAR( scratch[0] , epi3.i ); + + C_ADDTO(*Fout,scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + } while(--k); + } +} + +static void ki_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + int i, k; + const size_t m2 = 2*m; + const kiss_twiddle_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_twiddle_cpx epi3; + + kiss_fft_cpx * Fout_beg = Fout; + epi3 = st->twiddles[fstride*m]; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw1=tw2=st->twiddles; + k=m; + do{ + + C_MULC(scratch[1],Fout[m] , *tw1); + C_MULC(scratch[2],Fout[m2] , *tw2); + + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; + + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + + C_MULBYSCALAR( scratch[0] , -epi3.i ); + + C_ADDTO(*Fout,scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + }while(--k); + } +} + +static void kf_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int i, u; + kiss_fft_cpx scratch[13]; + const kiss_twiddle_cpx * twiddles = st->twiddles; + const kiss_twiddle_cpx *tw; + kiss_twiddle_cpx ya,yb; + kiss_fft_cpx * Fout_beg = Fout; + + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + tw=st->twiddles; + + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + for ( u=0; u<m; ++u ) { + C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); + scratch[0] = *Fout0; + + C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); + C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); + C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); + C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); + + C_ADD( scratch[7],scratch[1],scratch[4]); + C_SUB( scratch[10],scratch[1],scratch[4]); + C_ADD( scratch[8],scratch[2],scratch[3]); + C_SUB( scratch[9],scratch[2],scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } + } +} + +static void ki_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_state *st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int i, u; + kiss_fft_cpx scratch[13]; + const kiss_twiddle_cpx * twiddles = st->twiddles; + const kiss_twiddle_cpx *tw; + kiss_twiddle_cpx ya,yb; + kiss_fft_cpx * Fout_beg = Fout; + + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + tw=st->twiddles; + + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + for ( u=0; u<m; ++u ) { + scratch[0] = *Fout0; + + C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); + C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); + C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); + C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); + + C_ADD( scratch[7],scratch[1],scratch[4]); + C_SUB( scratch[10],scratch[1],scratch[4]); + C_ADD( scratch[8],scratch[2],scratch[3]); + C_SUB( scratch[9],scratch[2],scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); + scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); + scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } + } +} + +#endif + + +#ifdef CUSTOM_MODES + +static +void compute_bitrev_table( + int Fout, + opus_int16 *f, + const size_t fstride, + int in_stride, + opus_int16 * factors, + const kiss_fft_state *st + ) +{ + const int p=*factors++; /* the radix */ + const int m=*factors++; /* stage's fft length/p */ + + /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ + if (m==1) + { + int j; + for (j=0;j<p;j++) + { + *f = Fout+j; + f += fstride*in_stride; + } + } else { + int j; + for (j=0;j<p;j++) + { + compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st); + f += fstride*in_stride; + Fout += m; + } + } +} + +/* facbuf is populated by p1,m1,p2,m2, ... + where + p[i] * m[i] = m[i-1] + m0 = n */ +static +int kf_factor(int n,opus_int16 * facbuf) +{ + int p=4; + + /*factor out powers of 4, powers of 2, then any remaining primes */ + do { + while (n % p) { + switch (p) { + case 4: p = 2; break; + case 2: p = 3; break; + default: p += 2; break; + } + if (p>32000 || (opus_int32)p*(opus_int32)p > n) + p = n; /* no more factors, skip to end */ + } + n /= p; +#ifdef RADIX_TWO_ONLY + if (p!=2 && p != 4) +#else + if (p>5) +#endif + { + return 0; + } + *facbuf++ = p; + *facbuf++ = n; + } while (n > 1); + return 1; +} + +static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) +{ + int i; +#ifdef OPUS_FIXED_POINT + for (i=0;i<nfft;++i) { + opus_val32 phase = -i; + kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft)); + } +#else + for (i=0;i<nfft;++i) { + const double pi=3.14159265358979323846264338327; + double phase = ( -2*pi /nfft ) * i; + kf_cexp(twiddles+i, phase ); + } +#endif +} + +/* + * + * Allocates all necessary storage space for the fft and ifft. + * The return value is a contiguous block of memory. As such, + * It can be freed with free(). + * */ +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base) +{ + kiss_fft_state *st=NULL; + size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/ + + if ( lenmem==NULL ) { + st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded ); + }else{ + if (mem != NULL && *lenmem >= memneeded) + st = (kiss_fft_state*)mem; + *lenmem = memneeded; + } + if (st) { + opus_int16 *bitrev; + kiss_twiddle_cpx *twiddles; + + st->nfft=nfft; +#ifndef OPUS_FIXED_POINT + st->scale = 1.f/nfft; +#endif + if (base != NULL) + { + st->twiddles = base->twiddles; + st->shift = 0; + while (nfft<<st->shift != base->nfft && st->shift < 32) + st->shift++; + if (st->shift>=32) + goto fail; + } else { + st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft); + compute_twiddles(twiddles, nfft); + st->shift = -1; + } + if (!kf_factor(nfft,st->factors)) + { + goto fail; + } + + /* bitrev */ + st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft); + if (st->bitrev==NULL) + goto fail; + compute_bitrev_table(0, bitrev, 1,1, st->factors,st); + } + return st; +fail: + opus_fft_free(st); + return NULL; +} + +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem ) +{ + return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL); +} + +void opus_fft_free(const kiss_fft_state *cfg) +{ + if (cfg) + { + opus_free((opus_int16*)cfg->bitrev); + if (cfg->shift < 0) + opus_free((kiss_twiddle_cpx*)cfg->twiddles); + opus_free((kiss_fft_state*)cfg); + } +} + +#endif /* CUSTOM_MODES */ + +void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int m2, m; + int p; + int L; + int fstride[MAXFACTORS]; + int i; + int shift; + + /* st->shift can be -1 */ + shift = st->shift>0 ? st->shift : 0; + + celt_assert2 (fin != fout, "In-place FFT not supported"); + /* Bit-reverse the input */ + for (i=0;i<st->nfft;i++) + { + fout[st->bitrev[i]] = fin[i]; +#ifndef OPUS_FIXED_POINT + fout[st->bitrev[i]].r *= st->scale; + fout[st->bitrev[i]].i *= st->scale; +#endif + } + + fstride[0] = 1; + L=0; + do { + p = st->factors[2*L]; + m = st->factors[2*L+1]; + fstride[L+1] = fstride[L]*p; + L++; + } while(m!=1); + m = st->factors[2*L-1]; + for (i=L-1;i>=0;i--) + { + if (i!=0) + m2 = st->factors[2*i-1]; + else + m2 = 1; + switch (st->factors[2*i]) + { + case 2: + kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); + break; + case 4: + kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); + break; + #ifndef RADIX_TWO_ONLY + case 3: + kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); + break; + case 5: + kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); + break; + #endif + } + m = m2; + } +} + +void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int m2, m; + int p; + int L; + int fstride[MAXFACTORS]; + int i; + int shift; + + /* st->shift can be -1 */ + shift = st->shift>0 ? st->shift : 0; + celt_assert2 (fin != fout, "In-place FFT not supported"); + /* Bit-reverse the input */ + for (i=0;i<st->nfft;i++) + fout[st->bitrev[i]] = fin[i]; + + fstride[0] = 1; + L=0; + do { + p = st->factors[2*L]; + m = st->factors[2*L+1]; + fstride[L+1] = fstride[L]*p; + L++; + } while(m!=1); + m = st->factors[2*L-1]; + for (i=L-1;i>=0;i--) + { + if (i!=0) + m2 = st->factors[2*i-1]; + else + m2 = 1; + switch (st->factors[2*i]) + { + case 2: + ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); + break; + case 4: + ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); + break; +#ifndef RADIX_TWO_ONLY + case 3: + ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); + break; + case 5: + ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); + break; +#endif + } + m = m2; + } +} + diff --git a/drivers/opus/celt/kiss_fft.h b/drivers/opus/celt/kiss_fft.h new file mode 100644 index 0000000000..aa22b3a419 --- /dev/null +++ b/drivers/opus/celt/kiss_fft.h @@ -0,0 +1,139 @@ +/*Copyright (c) 2003-2004, Mark Borgerding + Lots of modifications by Jean-Marc Valin + Copyright (c) 2005-2007, Xiph.Org Foundation + Copyright (c) 2008, Xiph.Org Foundation, CSIRO + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE.*/ + +#ifndef KISS_FFT_H +#define KISS_FFT_H + +#include <stdlib.h> +#include <math.h> +#include "arch.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef USE_SIMD +# include <xmmintrin.h> +# define kiss_fft_scalar __m128 +#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes) +#else +#define KISS_FFT_MALLOC opus_alloc +#endif + +#ifdef OPUS_FIXED_POINT +#include "arch.h" + +# define kiss_fft_scalar opus_int32 +# define kiss_twiddle_scalar opus_int16 + + +#else +# ifndef kiss_fft_scalar +/* default is float */ +# define kiss_fft_scalar float +# define kiss_twiddle_scalar float +# define KF_SUFFIX _celt_single +# endif +#endif + +typedef struct { + kiss_fft_scalar r; + kiss_fft_scalar i; +}kiss_fft_cpx; + +typedef struct { + kiss_twiddle_scalar r; + kiss_twiddle_scalar i; +}kiss_twiddle_cpx; + +#define MAXFACTORS 8 +/* e.g. an fft of length 128 has 4 factors + as far as kissfft is concerned + 4*4*4*2 + */ + +typedef struct kiss_fft_state{ + int nfft; +#ifndef OPUS_FIXED_POINT + kiss_fft_scalar scale; +#endif + int shift; + opus_int16 factors[2*MAXFACTORS]; + const opus_int16 *bitrev; + const kiss_twiddle_cpx *twiddles; +} kiss_fft_state; + +/*typedef struct kiss_fft_state* kiss_fft_cfg;*/ + +/** + * opus_fft_alloc + * + * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. + * + * typical usage: kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL); + * + * The return value from fft_alloc is a cfg buffer used internally + * by the fft routine or NULL. + * + * If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc. + * The returned value should be free()d when done to avoid memory leaks. + * + * The state can be placed in a user supplied buffer 'mem': + * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, + * then the function places the cfg in mem and the size used in *lenmem + * and returns mem. + * + * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), + * then the function returns NULL and places the minimum cfg + * buffer size in *lenmem. + * */ + +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base); + +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); + +/** + * opus_fft(cfg,in_out_buf) + * + * Perform an FFT on a complex input buffer. + * for a forward FFT, + * fin should be f[0] , f[1] , ... ,f[nfft-1] + * fout will be F[0] , F[1] , ... ,F[nfft-1] + * Note that each element is complex and can be accessed like + f[k].r and f[k].i + * */ +void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); + +void opus_fft_free(const kiss_fft_state *cfg); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/opus/celt/laplace.c b/drivers/opus/celt/laplace.c new file mode 100644 index 0000000000..c6d293f298 --- /dev/null +++ b/drivers/opus/celt/laplace.c @@ -0,0 +1,134 @@ +/* Copyright (c) 2007 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "laplace.h" +#include "mathops.h" + +/* The minimum probability of an energy delta (out of 32768). */ +#define LAPLACE_LOG_MINP (0) +#define LAPLACE_MINP (1<<LAPLACE_LOG_MINP) +/* The minimum number of guaranteed representable energy deltas (in one + direction). */ +#define LAPLACE_NMIN (16) + +/* When called, decay is positive and at most 11456. */ +static unsigned ec_laplace_get_freq1(unsigned fs0, int decay) +{ + unsigned ft; + ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN) - fs0; + return ft*(opus_int32)(16384-decay)>>15; +} + +void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay) +{ + unsigned fl; + int val = *value; + fl = 0; + if (val) + { + int s; + int i; + s = -(val<0); + val = (val+s)^s; + fl = fs; + fs = ec_laplace_get_freq1(fs, decay); + /* Search the decaying part of the PDF.*/ + for (i=1; fs > 0 && i < val; i++) + { + fs *= 2; + fl += fs+2*LAPLACE_MINP; + fs = (fs*(opus_int32)decay)>>15; + } + /* Everything beyond that has probability LAPLACE_MINP. */ + if (!fs) + { + int di; + int ndi_max; + ndi_max = (32768-fl+LAPLACE_MINP-1)>>LAPLACE_LOG_MINP; + ndi_max = (ndi_max-s)>>1; + di = IMIN(val - i, ndi_max - 1); + fl += (2*di+1+s)*LAPLACE_MINP; + fs = IMIN(LAPLACE_MINP, 32768-fl); + *value = (i+di+s)^s; + } + else + { + fs += LAPLACE_MINP; + fl += fs&~s; + } + celt_assert(fl+fs<=32768); + celt_assert(fs>0); + } + ec_encode_bin(enc, fl, fl+fs, 15); +} + +int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay) +{ + int val=0; + unsigned fl; + unsigned fm; + fm = ec_decode_bin(dec, 15); + fl = 0; + if (fm >= fs) + { + val++; + fl = fs; + fs = ec_laplace_get_freq1(fs, decay)+LAPLACE_MINP; + /* Search the decaying part of the PDF.*/ + while(fs > LAPLACE_MINP && fm >= fl+2*fs) + { + fs *= 2; + fl += fs; + fs = ((fs-2*LAPLACE_MINP)*(opus_int32)decay)>>15; + fs += LAPLACE_MINP; + val++; + } + /* Everything beyond that has probability LAPLACE_MINP. */ + if (fs <= LAPLACE_MINP) + { + int di; + di = (fm-fl)>>(LAPLACE_LOG_MINP+1); + val += di; + fl += 2*di*LAPLACE_MINP; + } + if (fm < fl+fs) + val = -val; + else + fl += fs; + } + celt_assert(fl<32768); + celt_assert(fs>0); + celt_assert(fl<=fm); + celt_assert(fm<IMIN(fl+fs,32768)); + ec_dec_update(dec, fl, IMIN(fl+fs,32768), 32768); + return val; +} diff --git a/drivers/opus/celt/laplace.h b/drivers/opus/celt/laplace.h new file mode 100644 index 0000000000..46c14b5da5 --- /dev/null +++ b/drivers/opus/celt/laplace.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2007 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "entenc.h" +#include "entdec.h" + +/** Encode a value that is assumed to be the realisation of a + Laplace-distributed random process + @param enc Entropy encoder state + @param value Value to encode + @param fs Probability of 0, multiplied by 32768 + @param decay Probability of the value +/- 1, multiplied by 16384 +*/ +void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay); + +/** Decode a value that is assumed to be the realisation of a + Laplace-distributed random process + @param dec Entropy decoder state + @param fs Probability of 0, multiplied by 32768 + @param decay Probability of the value +/- 1, multiplied by 16384 + @return Value decoded + */ +int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay); diff --git a/drivers/opus/celt/mathops.c b/drivers/opus/celt/mathops.c new file mode 100644 index 0000000000..49be746d8c --- /dev/null +++ b/drivers/opus/celt/mathops.c @@ -0,0 +1,208 @@ +/* Copyright (c) 2002-2008 Jean-Marc Valin + Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/** + @file mathops.h + @brief Various math functions +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "mathops.h" + +/*Compute floor(sqrt(_val)) with exact arithmetic. + This has been tested on all possible 32-bit inputs.*/ +unsigned isqrt32(opus_uint32 _val){ + unsigned b; + unsigned g; + int bshift; + /*Uses the second method from + http://www.azillionmonkeys.com/qed/sqroot.html + The main idea is to search for the largest binary digit b such that + (g+b)*(g+b) <= _val, and add it to the solution g.*/ + g=0; + bshift=(EC_ILOG(_val)-1)>>1; + b=1U<<bshift; + do{ + opus_uint32 t; + t=(((opus_uint32)g<<1)+b)<<bshift; + if(t<=_val){ + g+=b; + _val-=t; + } + b>>=1; + bshift--; + } + while(bshift>=0); + return g; +} + +#ifdef OPUS_FIXED_POINT + +opus_val32 frac_div32(opus_val32 a, opus_val32 b) +{ + opus_val16 rcp; + opus_val32 result, rem; + int shift = celt_ilog2(b)-29; + a = VSHR32(a,shift); + b = VSHR32(b,shift); + /* 16-bit reciprocal */ + rcp = ROUND16(celt_rcp(ROUND16(b,16)),3); + result = MULT16_32_Q15(rcp, a); + rem = PSHR32(a,2)-MULT32_32_Q31(result, b); + result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2)); + if (result >= 536870912) /* 2^29 */ + return 2147483647; /* 2^31 - 1 */ + else if (result <= -536870912) /* -2^29 */ + return -2147483647; /* -2^31 */ + else + return SHL32(result, 2); +} + +/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */ +opus_val16 celt_rsqrt_norm(opus_val32 x) +{ + opus_val16 n; + opus_val16 r; + opus_val16 r2; + opus_val16 y; + /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ + n = x-32768; + /* Get a rough initial guess for the root. + The optimal minimax quadratic approximation (using relative error) is + r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). + Coefficients here, and the final result r, are Q14.*/ + r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713)))); + /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. + We can compute the result from n and r using Q15 multiplies with some + adjustment, carefully done to avoid overflow. + Range of y is [-1564,1594]. */ + r2 = MULT16_16_Q15(r, r); + y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); + /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). + This yields the Q14 reciprocal square root of the Q16 x, with a maximum + relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a + peak absolute error of 2.26591/16384. */ + return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, + SUB16(MULT16_16_Q15(y, 12288), 16384)))); +} + +/** Sqrt approximation (QX input, QX/2 output) */ +opus_val32 celt_sqrt(opus_val32 x) +{ + int k; + opus_val16 n; + opus_val32 rt; + static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664}; + if (x==0) + return 0; + else if (x>=1073741824) + return 32767; + k = (celt_ilog2(x)>>1)-7; + x = VSHR32(x, 2*k); + n = x-32768; + rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], + MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); + rt = VSHR32(rt,7-k); + return rt; +} + +#define L1 32767 +#define L2 -7651 +#define L3 8277 +#define L4 -626 + +static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x) +{ + opus_val16 x2; + + x2 = MULT16_16_P15(x,x); + return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 + )))))))); +} + +#undef L1 +#undef L2 +#undef L3 +#undef L4 + +opus_val16 celt_cos_norm(opus_val32 x) +{ + x = x&0x0001ffff; + if (x>SHL32(EXTEND32(1), 16)) + x = SUB32(SHL32(EXTEND32(1), 17),x); + if (x&0x00007fff) + { + if (x<SHL32(EXTEND32(1), 15)) + { + return _celt_cos_pi_2(EXTRACT16(x)); + } else { + return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x))); + } + } else { + if (x&0x0000ffff) + return 0; + else if (x&0x0001ffff) + return -32767; + else + return 32767; + } +} + +/** Reciprocal approximation (Q15 input, Q16 output) */ +opus_val32 celt_rcp(opus_val32 x) +{ + int i; + opus_val16 n; + opus_val16 r; + celt_assert2(x>0, "celt_rcp() only defined for positive values"); + i = celt_ilog2(x); + /* n is Q15 with range [0,1). */ + n = VSHR32(x,i-15)-32768; + /* Start with a linear approximation: + r = 1.8823529411764706-0.9411764705882353*n. + The coefficients and the result are Q14 in the range [15420,30840].*/ + r = ADD16(30840, MULT16_16_Q15(-15420, n)); + /* Perform two Newton iterations: + r -= r*((r*n)-1.Q15) + = r*((r*n)+(r-1.Q15)). */ + r = SUB16(r, MULT16_16_Q15(r, + ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))); + /* We subtract an extra 1 in the second iteration to avoid overflow; it also + neatly compensates for truncation error in the rest of the process. */ + r = SUB16(r, ADD16(1, MULT16_16_Q15(r, + ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))))); + /* r is now the Q15 solution to 2/(n+1), with a maximum relative error + of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute + error of 1.24665/32768. */ + return VSHR32(EXTEND32(r),i-16); +} + +#endif diff --git a/drivers/opus/celt/mathops.h b/drivers/opus/celt/mathops.h new file mode 100644 index 0000000000..4a6bc539bc --- /dev/null +++ b/drivers/opus/celt/mathops.h @@ -0,0 +1,258 @@ +/* Copyright (c) 2002-2008 Jean-Marc Valin + Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/** + @file mathops.h + @brief Various math functions +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef MATHOPS_H +#define MATHOPS_H + +#include "arch.h" +#include "entcode.h" +#include "os_support.h" + +/* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */ +#define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15) + +unsigned isqrt32(opus_uint32 _val); + +#ifndef OVERRIDE_CELT_MAXABS16 +static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len) +{ + int i; + opus_val16 maxval = 0; + opus_val16 minval = 0; + for (i=0;i<len;i++) + { + maxval = MAX16(maxval, x[i]); + minval = MIN16(minval, x[i]); + } + return MAX32(EXTEND32(maxval),-EXTEND32(minval)); +} +#endif + +#ifndef OVERRIDE_CELT_MAXABS32 +#ifdef OPUS_FIXED_POINT +static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len) +{ + int i; + opus_val32 maxval = 0; + opus_val32 minval = 0; + for (i=0;i<len;i++) + { + maxval = MAX32(maxval, x[i]); + minval = MIN32(minval, x[i]); + } + return MAX32(maxval, -minval); +} +#else +#define celt_maxabs32(x,len) celt_maxabs16(x,len) +#endif +#endif + + +#ifndef OPUS_FIXED_POINT + +#define PI 3.141592653f +#define celt_sqrt(x) ((float)sqrt(x)) +#define celt_rsqrt(x) (1.f/celt_sqrt(x)) +#define celt_rsqrt_norm(x) (celt_rsqrt(x)) +#define celt_cos_norm(x) ((float)cos((.5f*PI)*(x))) +#define celt_rcp(x) (1.f/(x)) +#define celt_div(a,b) ((a)/(b)) +#define frac_div32(a,b) ((float)(a)/(b)) + +#ifdef FLOAT_APPROX + +/* Note: This assumes radix-2 floating point with the exponent at bits 23..30 and an offset of 127 + denorm, +/- inf and NaN are *not* handled */ + +/** Base-2 log approximation (log2(x)). */ +static OPUS_INLINE float celt_log2(float x) +{ + int integer; + float frac; + union { + float f; + opus_uint32 i; + } in; + in.f = x; + integer = (in.i>>23)-127; + in.i -= integer<<23; + frac = in.f - 1.5f; + frac = -0.41445418f + frac*(0.95909232f + + frac*(-0.33951290f + frac*0.16541097f)); + return 1+integer+frac; +} + +/** Base-2 exponential approximation (2^x). */ +static OPUS_INLINE float celt_exp2(float x) +{ + int integer; + float frac; + union { + float f; + opus_uint32 i; + } res; + integer = floor(x); + if (integer < -50) + return 0; + frac = x-integer; + /* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */ + res.f = 0.99992522f + frac * (0.69583354f + + frac * (0.22606716f + 0.078024523f*frac)); + res.i = (res.i + (integer<<23)) & 0x7fffffff; + return res.f; +} + +#else +#define celt_log2(x) ((float)(1.442695040888963387*log(x))) +#define celt_exp2(x) ((float)exp(0.6931471805599453094*(x))) +#endif + +#endif + +#ifdef OPUS_FIXED_POINT + +#include "os_support.h" + +#ifndef OVERRIDE_CELT_ILOG2 +/** Integer log in base2. Undefined for zero and negative numbers */ +static OPUS_INLINE opus_int16 celt_ilog2(opus_int32 x) +{ + celt_assert2(x>0, "celt_ilog2() only defined for strictly positive numbers"); + return EC_ILOG(x)-1; +} +#endif + + +/** Integer log in base2. Defined for zero, but not for negative numbers */ +static OPUS_INLINE opus_int16 celt_zlog2(opus_val32 x) +{ + return x <= 0 ? 0 : celt_ilog2(x); +} + +opus_val16 celt_rsqrt_norm(opus_val32 x); + +opus_val32 celt_sqrt(opus_val32 x); + +opus_val16 celt_cos_norm(opus_val32 x); + +/** Base-2 logarithm approximation (log2(x)). (Q14 input, Q10 output) */ +static OPUS_INLINE opus_val16 celt_log2(opus_val32 x) +{ + int i; + opus_val16 n, frac; + /* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605, + 0.15530808010959576, -0.08556153059057618 */ + static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401}; + if (x==0) + return -32767; + i = celt_ilog2(x); + n = VSHR32(x,i-15)-32768-16384; + frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4])))))))); + return SHL16(i-13,DB_SHIFT)+SHR16(frac,14-DB_SHIFT); +} + +/* + K0 = 1 + K1 = log(2) + K2 = 3-4*log(2) + K3 = 3*log(2) - 2 +*/ +#define D0 16383 +#define D1 22804 +#define D2 14819 +#define D3 10204 + +static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x) +{ + opus_val16 frac; + frac = SHL16(x, 4); + return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac)))))); +} +/** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */ +static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x) +{ + int integer; + opus_val16 frac; + integer = SHR16(x,10); + if (integer>14) + return 0x7f000000; + else if (integer < -15) + return 0; + frac = celt_exp2_frac(x-SHL16(integer,10)); + return VSHR32(EXTEND32(frac), -integer-2); +} + +opus_val32 celt_rcp(opus_val32 x); + +#define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b)) + +opus_val32 frac_div32(opus_val32 a, opus_val32 b); + +#define M1 32767 +#define M2 -21 +#define M3 -11943 +#define M4 4936 + +/* Atan approximation using a 4th order polynomial. Input is in Q15 format + and normalized by pi/4. Output is in Q15 format */ +static OPUS_INLINE opus_val16 celt_atan01(opus_val16 x) +{ + return MULT16_16_P15(x, ADD32(M1, MULT16_16_P15(x, ADD32(M2, MULT16_16_P15(x, ADD32(M3, MULT16_16_P15(M4, x))))))); +} + +#undef M1 +#undef M2 +#undef M3 +#undef M4 + +/* atan2() approximation valid for positive input values */ +static OPUS_INLINE opus_val16 celt_atan2p(opus_val16 y, opus_val16 x) +{ + if (y < x) + { + opus_val32 arg; + arg = celt_div(SHL32(EXTEND32(y),15),x); + if (arg >= 32767) + arg = 32767; + return SHR16(celt_atan01(EXTRACT16(arg)),1); + } else { + opus_val32 arg; + arg = celt_div(SHL32(EXTEND32(x),15),y); + if (arg >= 32767) + arg = 32767; + return 25736-SHR16(celt_atan01(EXTRACT16(arg)),1); + } +} + +#endif /* OPUS_FIXED_POINT */ +#endif /* MATHOPS_H */ diff --git a/drivers/opus/celt/mdct.c b/drivers/opus/celt/mdct.c new file mode 100644 index 0000000000..d08d026fac --- /dev/null +++ b/drivers/opus/celt/mdct.c @@ -0,0 +1,311 @@ + /* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2008 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* This is a simple MDCT implementation that uses a N/4 complex FFT + to do most of the work. It should be relatively straightforward to + plug in pretty much and FFT here. + + This replaces the Vorbis FFT (and uses the exact same API), which + was a bit too messy and that was ending up duplicating code + (might as well use the same FFT everywhere). + + The algorithm is similar to (and inspired from) Fabrice Bellard's + MDCT implementation in FFMPEG, but has differences in signs, ordering + and scaling in many places. +*/ + +#ifndef SKIP_CONFIG_H +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif +#endif + +#include "mdct.h" +#include "kiss_fft.h" +#include "_kiss_fft_guts.h" +#include <math.h> +#include "os_support.h" +#include "mathops.h" +#include "stack_alloc.h" + +#ifdef CUSTOM_MODES + +int clt_mdct_init(celt_mdct_lookup *l,int N, int maxshift) +{ + int i; + int N4; + kiss_twiddle_scalar *trig; +#if defined(OPUS_FIXED_POINT) + int N2=N>>1; +#endif + l->n = N; + N4 = N>>2; + l->maxshift = maxshift; + for (i=0;i<=maxshift;i++) + { + if (i==0) + l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); + else + l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); +#ifndef ENABLE_TI_DSPLIB55 + if (l->kfft[i]==NULL) + return 0; +#endif + } + l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); + if (l->trig==NULL) + return 0; + /* We have enough points that sine isn't necessary */ +#if defined(OPUS_FIXED_POINT) + for (i=0;i<=N4;i++) + trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); +#else + for (i=0;i<=N4;i++) + trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); +#endif + return 1; +} + +void clt_mdct_clear(celt_mdct_lookup *l) +{ + int i; + for (i=0;i<=l->maxshift;i++) + opus_fft_free(l->kfft[i]); + opus_free((kiss_twiddle_scalar*)l->trig); +} + +#endif /* CUSTOM_MODES */ + +/* Forward MDCT trashes the input array */ +void clt_mdct_forward(const celt_mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, int shift, int stride) +{ + int i; + int N, N2, N4; + kiss_twiddle_scalar sine; + VARDECL(kiss_fft_scalar, f); + VARDECL(kiss_fft_scalar, f2); + SAVE_STACK; + N = l->n; + N >>= shift; + N2 = N>>1; + N4 = N>>2; + ALLOC(f, N2, kiss_fft_scalar); + ALLOC(f2, N2, kiss_fft_scalar); + /* sin(x) ~= x here */ +#ifdef OPUS_FIXED_POINT + sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; +#else + sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; +#endif + + /* Consider the input to be composed of four blocks: [a, b, c, d] */ + /* Window, shuffle, fold */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); + const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; + for(i=0;i<((overlap+3)>>2);i++) + { + /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ + *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); + *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + } + wp1 = window; + wp2 = window+overlap-1; + for(;i<N4-((overlap+3)>>2);i++) + { + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + *yp++ = *xp2; + *yp++ = *xp1; + xp1+=2; + xp2-=2; + } + for(;i<N4;i++) + { + /* Real part arranged as a-bR, Imag part arranged as -c-dR */ + *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2); + *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); + xp1+=2; + xp2-=2; + wp1+=2; + wp2-=2; + } + } + /* Pre-rotation */ + { + kiss_fft_scalar * OPUS_RESTRICT yp = f; + const kiss_twiddle_scalar *t = &l->trig[0]; + for(i=0;i<N4;i++) + { + kiss_fft_scalar re, im, yr, yi; + re = yp[0]; + im = yp[1]; + yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); + yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); + /* works because the cos is nearly one */ + *yp++ = yr + S_MUL(yi,sine); + *yp++ = yi - S_MUL(yr,sine); + } + } + + /* N/4 complex FFT, down-scales by 4/N */ + opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); + + /* Post-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT fp = f2; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); + const kiss_twiddle_scalar *t = &l->trig[0]; + /* Temp pointers to make it really clear to the compiler what we're doing */ + for(i=0;i<N4;i++) + { + kiss_fft_scalar yr, yi; + yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); + yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); + /* works because the cos is nearly one */ + *yp1 = yr - S_MUL(yi,sine); + *yp2 = yi + S_MUL(yr,sine);; + fp += 2; + yp1 += 2*stride; + yp2 -= 2*stride; + } + } + RESTORE_STACK; +} + +void clt_mdct_backward(const celt_mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) +{ + int i; + int N, N2, N4; + kiss_twiddle_scalar sine; + VARDECL(kiss_fft_scalar, f2); + SAVE_STACK; + N = l->n; + N >>= shift; + N2 = N>>1; + N4 = N>>2; + ALLOC(f2, N2, kiss_fft_scalar); + /* sin(x) ~= x here */ +#ifdef OPUS_FIXED_POINT + sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; +#else + sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; +#endif + + /* Pre-rotate */ + { + /* Temp pointers to make it really clear to the compiler what we're doing */ + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); + kiss_fft_scalar * OPUS_RESTRICT yp = f2; + const kiss_twiddle_scalar *t = &l->trig[0]; + for(i=0;i<N4;i++) + { + kiss_fft_scalar yr, yi; + yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); + yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); + /* works because the cos is nearly one */ + *yp++ = yr - S_MUL(yi,sine); + *yp++ = yi + S_MUL(yr,sine); + xp1+=2*stride; + xp2-=2*stride; + } + } + + /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ + opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); + + /* Post-rotate and de-shuffle from both ends of the buffer at once to make + it in-place. */ + { + kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; + const kiss_twiddle_scalar *t = &l->trig[0]; + /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the + middle pair will be computed twice. */ + for(i=0;i<(N4+1)>>1;i++) + { + kiss_fft_scalar re, im, yr, yi; + kiss_twiddle_scalar t0, t1; + re = yp0[0]; + im = yp0[1]; + t0 = t[i<<shift]; + t1 = t[(N4-i)<<shift]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL(re,t0) - S_MUL(im,t1); + yi = S_MUL(im,t0) + S_MUL(re,t1); + re = yp1[0]; + im = yp1[1]; + /* works because the cos is nearly one */ + yp0[0] = -(yr - S_MUL(yi,sine)); + yp1[1] = yi + S_MUL(yr,sine); + + t0 = t[(N4-i-1)<<shift]; + t1 = t[(i+1)<<shift]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL(re,t0) - S_MUL(im,t1); + yi = S_MUL(im,t0) + S_MUL(re,t1); + /* works because the cos is nearly one */ + yp1[0] = -(yr - S_MUL(yi,sine)); + yp0[1] = yi + S_MUL(yr,sine); + yp0 += 2; + yp1 -= 2; + } + } + + /* Mirror on both sides for TDAC */ + { + kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; + const opus_val16 * OPUS_RESTRICT wp1 = window; + const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; + + for(i = 0; i < overlap/2; i++) + { + kiss_fft_scalar x1, x2; + x1 = *xp1; + x2 = *yp1; + *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); + *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); + wp1++; + wp2--; + } + } + RESTORE_STACK; +} diff --git a/drivers/opus/celt/mdct.h b/drivers/opus/celt/mdct.h new file mode 100644 index 0000000000..4e7a199246 --- /dev/null +++ b/drivers/opus/celt/mdct.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2008 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* This is a simple MDCT implementation that uses a N/4 complex FFT + to do most of the work. It should be relatively straightforward to + plug in pretty much and FFT here. + + This replaces the Vorbis FFT (and uses the exact same API), which + was a bit too messy and that was ending up duplicating code + (might as well use the same FFT everywhere). + + The algorithm is similar to (and inspired from) Fabrice Bellard's + MDCT implementation in FFMPEG, but has differences in signs, ordering + and scaling in many places. +*/ + +#ifndef MDCT_H +#define MDCT_H + +#include "opus_defines.h" +#include "kiss_fft.h" +#include "arch.h" + +typedef struct { + int n; + int maxshift; + const kiss_fft_state *kfft[4]; + const kiss_twiddle_scalar * OPUS_RESTRICT trig; +} celt_mdct_lookup; + +int clt_mdct_init(celt_mdct_lookup *l,int N, int maxshift); +void clt_mdct_clear(celt_mdct_lookup *l); + +/** Compute a forward MDCT and scale by 4/N, trashes the input array */ +void clt_mdct_forward(const celt_mdct_lookup *l, kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 *window, int overlap, int shift, int stride); + +/** Compute a backward MDCT (no scaling) and performs weighted overlap-add + (scales implicitly by 1/2) */ +void clt_mdct_backward(const celt_mdct_lookup *l, kiss_fft_scalar *in, + kiss_fft_scalar * OPUS_RESTRICT out, + const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride); + +#endif diff --git a/drivers/opus/celt/mfrngcod.h b/drivers/opus/celt/mfrngcod.h new file mode 100644 index 0000000000..809152a59a --- /dev/null +++ b/drivers/opus/celt/mfrngcod.h @@ -0,0 +1,48 @@ +/* Copyright (c) 2001-2008 Timothy B. Terriberry + Copyright (c) 2008-2009 Xiph.Org Foundation */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#if !defined(_mfrngcode_H) +# define _mfrngcode_H (1) +# include "entcode.h" + +/*Constants used by the entropy encoder/decoder.*/ + +/*The number of bits to output at a time.*/ +# define EC_SYM_BITS (8) +/*The total number of bits in each of the state registers.*/ +# define EC_CODE_BITS (32) +/*The maximum symbol value.*/ +# define EC_SYM_MAX ((1U<<EC_SYM_BITS)-1) +/*Bits to shift by to move a symbol into the high-order position.*/ +# define EC_CODE_SHIFT (EC_CODE_BITS-EC_SYM_BITS-1) +/*Carry bit of the high-order range symbol.*/ +# define EC_CODE_TOP (((opus_uint32)1U)<<(EC_CODE_BITS-1)) +/*Low-order bit of the high-order range symbol.*/ +# define EC_CODE_BOT (EC_CODE_TOP>>EC_SYM_BITS) +/*The number of bits available for the last, partial symbol in the code field.*/ +# define EC_CODE_EXTRA ((EC_CODE_BITS-2)%EC_SYM_BITS+1) +#endif diff --git a/drivers/opus/celt/modes.c b/drivers/opus/celt/modes.c new file mode 100644 index 0000000000..3794074aaa --- /dev/null +++ b/drivers/opus/celt/modes.c @@ -0,0 +1,438 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2008 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "celt.h" +#include "opus_modes.h" +#include "rate.h" +#include "os_support.h" +#include "stack_alloc.h" +#include "quant_bands.h" + +static const opus_int16 eband5ms[] = { +/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100 +}; + +/* Alternate tuning (partially derived from Vorbis) */ +#define BITALLOC_SIZE 11 +/* Bit allocation table in units of 1/32 bit/sample (0.1875 dB SNR) */ +static const unsigned char band_allocation[] = { +/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 90, 80, 75, 69, 63, 56, 49, 40, 34, 29, 20, 18, 10, 0, 0, 0, 0, 0, 0, 0, 0, +110,100, 90, 84, 78, 71, 65, 58, 51, 45, 39, 32, 26, 20, 12, 0, 0, 0, 0, 0, 0, +118,110,103, 93, 86, 80, 75, 70, 65, 59, 53, 47, 40, 31, 23, 15, 4, 0, 0, 0, 0, +126,119,112,104, 95, 89, 83, 78, 72, 66, 60, 54, 47, 39, 32, 25, 17, 12, 1, 0, 0, +134,127,120,114,103, 97, 91, 85, 78, 72, 66, 60, 54, 47, 41, 35, 29, 23, 16, 10, 1, +144,137,130,124,113,107,101, 95, 88, 82, 76, 70, 64, 57, 51, 45, 39, 33, 26, 15, 1, +152,145,138,132,123,117,111,105, 98, 92, 86, 80, 74, 67, 61, 55, 49, 43, 36, 20, 1, +162,155,148,142,133,127,121,115,108,102, 96, 90, 84, 77, 71, 65, 59, 53, 46, 30, 1, +172,165,158,152,143,137,131,125,118,112,106,100, 94, 87, 81, 75, 69, 63, 56, 45, 20, +200,200,200,200,200,200,200,200,198,193,188,183,178,173,168,163,158,153,148,129,104, +}; + +#ifndef CUSTOM_MODES_ONLY + #ifdef OPUS_FIXED_POINT + #include "static_modes_fixed.h" + #else + #include "static_modes_float.h" + #endif +#endif /* CUSTOM_MODES_ONLY */ + +#ifndef M_PI +#define M_PI 3.141592653 +#endif + +#ifdef CUSTOM_MODES + +/* Defining 25 critical bands for the full 0-20 kHz audio bandwidth + Taken from http://ccrma.stanford.edu/~jos/bbt/Bark_Frequency_Scale.html */ +#define BARK_BANDS 25 +static const opus_int16 bark_freq[BARK_BANDS+1] = { + 0, 100, 200, 300, 400, + 510, 630, 770, 920, 1080, + 1270, 1480, 1720, 2000, 2320, + 2700, 3150, 3700, 4400, 5300, + 6400, 7700, 9500, 12000, 15500, + 20000}; + +static opus_int16 *compute_ebands(opus_int32 Fs, int frame_size, int res, int *nbEBands) +{ + opus_int16 *eBands; + int i, j, lin, low, high, nBark, offset=0; + + /* All modes that have 2.5 ms short blocks use the same definition */ + if (Fs == 400*(opus_int32)frame_size) + { + *nbEBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1; + eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+1)); + for (i=0;i<*nbEBands+1;i++) + eBands[i] = eband5ms[i]; + return eBands; + } + /* Find the number of critical bands supported by our sampling rate */ + for (nBark=1;nBark<BARK_BANDS;nBark++) + if (bark_freq[nBark+1]*2 >= Fs) + break; + + /* Find where the linear part ends (i.e. where the spacing is more than min_width */ + for (lin=0;lin<nBark;lin++) + if (bark_freq[lin+1]-bark_freq[lin] >= res) + break; + + low = (bark_freq[lin]+res/2)/res; + high = nBark-lin; + *nbEBands = low+high; + eBands = opus_alloc(sizeof(opus_int16)*(*nbEBands+2)); + + if (eBands==NULL) + return NULL; + + /* Linear spacing (min_width) */ + for (i=0;i<low;i++) + eBands[i] = i; + if (low>0) + offset = eBands[low-1]*res - bark_freq[lin-1]; + /* Spacing follows critical bands */ + for (i=0;i<high;i++) + { + int target = bark_freq[lin+i]; + /* Round to an even value */ + eBands[i+low] = (target+offset/2+res)/(2*res)*2; + offset = eBands[i+low]*res - target; + } + /* Enforce the minimum spacing at the boundary */ + for (i=0;i<*nbEBands;i++) + if (eBands[i] < i) + eBands[i] = i; + /* Round to an even value */ + eBands[*nbEBands] = (bark_freq[nBark]+res)/(2*res)*2; + if (eBands[*nbEBands] > frame_size) + eBands[*nbEBands] = frame_size; + for (i=1;i<*nbEBands-1;i++) + { + if (eBands[i+1]-eBands[i] < eBands[i]-eBands[i-1]) + { + eBands[i] -= (2*eBands[i]-eBands[i-1]-eBands[i+1])/2; + } + } + /* Remove any empty bands. */ + for (i=j=0;i<*nbEBands;i++) + if(eBands[i+1]>eBands[j]) + eBands[++j]=eBands[i+1]; + *nbEBands=j; + + for (i=1;i<*nbEBands;i++) + { + /* Every band must be smaller than the last band. */ + celt_assert(eBands[i]-eBands[i-1]<=eBands[*nbEBands]-eBands[*nbEBands-1]); + /* Each band must be no larger than twice the size of the previous one. */ + celt_assert(eBands[i+1]-eBands[i]<=2*(eBands[i]-eBands[i-1])); + } + + return eBands; +} + +static void compute_allocation_table(CELTMode *mode) +{ + int i, j; + unsigned char *allocVectors; + int maxBands = sizeof(eband5ms)/sizeof(eband5ms[0])-1; + + mode->nbAllocVectors = BITALLOC_SIZE; + allocVectors = opus_alloc(sizeof(unsigned char)*(BITALLOC_SIZE*mode->nbEBands)); + if (allocVectors==NULL) + return; + + /* Check for standard mode */ + if (mode->Fs == 400*(opus_int32)mode->shortMdctSize) + { + for (i=0;i<BITALLOC_SIZE*mode->nbEBands;i++) + allocVectors[i] = band_allocation[i]; + mode->allocVectors = allocVectors; + return; + } + /* If not the standard mode, interpolate */ + /* Compute per-codec-band allocation from per-critical-band matrix */ + for (i=0;i<BITALLOC_SIZE;i++) + { + for (j=0;j<mode->nbEBands;j++) + { + int k; + for (k=0;k<maxBands;k++) + { + if (400*(opus_int32)eband5ms[k] > mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize) + break; + } + if (k>maxBands-1) + allocVectors[i*mode->nbEBands+j] = band_allocation[i*maxBands + maxBands-1]; + else { + opus_int32 a0, a1; + a1 = mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize - 400*(opus_int32)eband5ms[k-1]; + a0 = 400*(opus_int32)eband5ms[k] - mode->eBands[j]*(opus_int32)mode->Fs/mode->shortMdctSize; + allocVectors[i*mode->nbEBands+j] = (a0*band_allocation[i*maxBands+k-1] + + a1*band_allocation[i*maxBands+k])/(a0+a1); + } + } + } + + /*printf ("\n"); + for (i=0;i<BITALLOC_SIZE;i++) + { + for (j=0;j<mode->nbEBands;j++) + printf ("%d ", allocVectors[i*mode->nbEBands+j]); + printf ("\n"); + } + exit(0);*/ + + mode->allocVectors = allocVectors; +} + +#endif /* CUSTOM_MODES */ + +CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error) +{ + int i; +#ifdef CUSTOM_MODES + CELTMode *mode=NULL; + int res; + opus_val16 *window; + opus_int16 *logN; + int LM; + ALLOC_STACK; +#if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA) + if (global_stack==NULL) + goto failure; +#endif +#endif + +#ifndef CUSTOM_MODES_ONLY + for (i=0;i<TOTAL_MODES;i++) + { + int j; + for (j=0;j<4;j++) + { + if (Fs == static_mode_list[i]->Fs && + (frame_size<<j) == static_mode_list[i]->shortMdctSize*static_mode_list[i]->nbShortMdcts) + { + if (error) + *error = OPUS_OK; + return (CELTMode*)static_mode_list[i]; + } + } + } +#endif /* CUSTOM_MODES_ONLY */ + +#ifndef CUSTOM_MODES + if (error) + *error = OPUS_BAD_ARG; + return NULL; +#else + + /* The good thing here is that permutation of the arguments will automatically be invalid */ + + if (Fs < 8000 || Fs > 96000) + { + if (error) + *error = OPUS_BAD_ARG; + return NULL; + } + if (frame_size < 40 || frame_size > 1024 || frame_size%2!=0) + { + if (error) + *error = OPUS_BAD_ARG; + return NULL; + } + /* Frames of less than 1ms are not supported. */ + if ((opus_int32)frame_size*1000 < Fs) + { + if (error) + *error = OPUS_BAD_ARG; + return NULL; + } + + if ((opus_int32)frame_size*75 >= Fs && (frame_size%16)==0) + { + LM = 3; + } else if ((opus_int32)frame_size*150 >= Fs && (frame_size%8)==0) + { + LM = 2; + } else if ((opus_int32)frame_size*300 >= Fs && (frame_size%4)==0) + { + LM = 1; + } else + { + LM = 0; + } + + /* Shorts longer than 3.3ms are not supported. */ + if ((opus_int32)(frame_size>>LM)*300 > Fs) + { + if (error) + *error = OPUS_BAD_ARG; + return NULL; + } + + mode = opus_alloc(sizeof(CELTMode)); + if (mode==NULL) + goto failure; + mode->Fs = Fs; + + /* Pre/de-emphasis depends on sampling rate. The "standard" pre-emphasis + is defined as A(z) = 1 - 0.85*z^-1 at 48 kHz. Other rates should + approximate that. */ + if(Fs < 12000) /* 8 kHz */ + { + mode->preemph[0] = QCONST16(0.3500061035f, 15); + mode->preemph[1] = -QCONST16(0.1799926758f, 15); + mode->preemph[2] = QCONST16(0.2719968125f, SIG_SHIFT); /* exact 1/preemph[3] */ + mode->preemph[3] = QCONST16(3.6765136719f, 13); + } else if(Fs < 24000) /* 16 kHz */ + { + mode->preemph[0] = QCONST16(0.6000061035f, 15); + mode->preemph[1] = -QCONST16(0.1799926758f, 15); + mode->preemph[2] = QCONST16(0.4424998650f, SIG_SHIFT); /* exact 1/preemph[3] */ + mode->preemph[3] = QCONST16(2.2598876953f, 13); + } else if(Fs < 40000) /* 32 kHz */ + { + mode->preemph[0] = QCONST16(0.7799987793f, 15); + mode->preemph[1] = -QCONST16(0.1000061035f, 15); + mode->preemph[2] = QCONST16(0.7499771125f, SIG_SHIFT); /* exact 1/preemph[3] */ + mode->preemph[3] = QCONST16(1.3333740234f, 13); + } else /* 48 kHz */ + { + mode->preemph[0] = QCONST16(0.8500061035f, 15); + mode->preemph[1] = QCONST16(0.0f, 15); + mode->preemph[2] = QCONST16(1.f, SIG_SHIFT); + mode->preemph[3] = QCONST16(1.f, 13); + } + + mode->maxLM = LM; + mode->nbShortMdcts = 1<<LM; + mode->shortMdctSize = frame_size/mode->nbShortMdcts; + res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize); + + mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands); + if (mode->eBands==NULL) + goto failure; +#if !defined(SMALL_FOOTPRINT) + /* Make sure we don't allocate a band larger than our PVQ table. + 208 should be enough, but let's be paranoid. */ + if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])<<LM > + 208) { + goto failure; + } +#endif + + mode->effEBands = mode->nbEBands; + while (mode->eBands[mode->effEBands] > mode->shortMdctSize) + mode->effEBands--; + + /* Overlap must be divisible by 4 */ + mode->overlap = ((mode->shortMdctSize>>2)<<2); + + compute_allocation_table(mode); + if (mode->allocVectors==NULL) + goto failure; + + window = (opus_val16*)opus_alloc(mode->overlap*sizeof(opus_val16)); + if (window==NULL) + goto failure; + +#ifndef OPUS_FIXED_POINT + for (i=0;i<mode->overlap;i++) + window[i] = Q15ONE*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap)); +#else + for (i=0;i<mode->overlap;i++) + window[i] = MIN32(32767,floor(.5+32768.*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap)))); +#endif + mode->window = window; + + logN = (opus_int16*)opus_alloc(mode->nbEBands*sizeof(opus_int16)); + if (logN==NULL) + goto failure; + + for (i=0;i<mode->nbEBands;i++) + logN[i] = log2_frac(mode->eBands[i+1]-mode->eBands[i], BITRES); + mode->logN = logN; + + compute_pulse_cache(mode, mode->maxLM); + + if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts, + mode->maxLM) == 0) + goto failure; + + if (error) + *error = OPUS_OK; + + return mode; +failure: + if (error) + *error = OPUS_ALLOC_FAIL; + if (mode!=NULL) + opus_custom_mode_destroy(mode); + return NULL; +#endif /* !CUSTOM_MODES */ +} + +#ifdef CUSTOM_MODES +void opus_custom_mode_destroy(CELTMode *mode) +{ + if (mode == NULL) + return; +#ifndef CUSTOM_MODES_ONLY + { + int i; + for (i=0;i<TOTAL_MODES;i++) + { + if (mode == static_mode_list[i]) + { + return; + } + } + } +#endif /* CUSTOM_MODES_ONLY */ + opus_free((opus_int16*)mode->eBands); + opus_free((opus_int16*)mode->allocVectors); + + opus_free((opus_val16*)mode->window); + opus_free((opus_int16*)mode->logN); + + opus_free((opus_int16*)mode->cache.index); + opus_free((unsigned char*)mode->cache.bits); + opus_free((unsigned char*)mode->cache.caps); + clt_mdct_clear(&mode->mdct); + + opus_free((CELTMode *)mode); +} +#endif diff --git a/drivers/opus/celt/opus_custom_demo.c b/drivers/opus/celt/opus_custom_demo.c new file mode 100644 index 0000000000..8c7f58b6e6 --- /dev/null +++ b/drivers/opus/celt/opus_custom_demo.c @@ -0,0 +1,210 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "opus_custom.h" +#include "arch.h" +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +#define MAX_PACKET 1275 + +int main(int argc, char *argv[]) +{ + int err; + char *inFile, *outFile; + FILE *fin, *fout; + OpusCustomMode *mode=NULL; + OpusCustomEncoder *enc; + OpusCustomDecoder *dec; + int len; + opus_int32 frame_size, channels, rate; + int bytes_per_packet; + unsigned char data[MAX_PACKET]; + int complexity; +#if !(defined (OPUS_FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH) + int i; + double rmsd = 0; +#endif + int count = 0; + opus_int32 skip; + opus_int16 *in, *out; + if (argc != 9 && argc != 8 && argc != 7) + { + fprintf (stderr, "Usage: test_opus_custom <rate> <channels> <frame size> " + " <bytes per packet> [<complexity> [packet loss rate]] " + "<input> <output>\n"); + return 1; + } + + rate = (opus_int32)atol(argv[1]); + channels = atoi(argv[2]); + frame_size = atoi(argv[3]); + mode = opus_custom_mode_create(rate, frame_size, NULL); + if (mode == NULL) + { + fprintf(stderr, "failed to create a mode\n"); + return 1; + } + + bytes_per_packet = atoi(argv[4]); + if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET) + { + fprintf (stderr, "bytes per packet must be between 0 and %d\n", + MAX_PACKET); + return 1; + } + + inFile = argv[argc-2]; + fin = fopen(inFile, "rb"); + if (!fin) + { + fprintf (stderr, "Could not open input file %s\n", argv[argc-2]); + return 1; + } + outFile = argv[argc-1]; + fout = fopen(outFile, "wb+"); + if (!fout) + { + fprintf (stderr, "Could not open output file %s\n", argv[argc-1]); + fclose(fin); + return 1; + } + + enc = opus_custom_encoder_create(mode, channels, &err); + if (err != 0) + { + fprintf(stderr, "Failed to create the encoder: %s\n", opus_strerror(err)); + fclose(fin); + fclose(fout); + return 1; + } + dec = opus_custom_decoder_create(mode, channels, &err); + if (err != 0) + { + fprintf(stderr, "Failed to create the decoder: %s\n", opus_strerror(err)); + fclose(fin); + fclose(fout); + return 1; + } + opus_custom_decoder_ctl(dec, OPUS_GET_LOOKAHEAD(&skip)); + + if (argc>7) + { + complexity=atoi(argv[5]); + opus_custom_encoder_ctl(enc,OPUS_SET_COMPLEXITY(complexity)); + } + + in = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16)); + out = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16)); + + while (!feof(fin)) + { + int ret; + err = fread(in, sizeof(short), frame_size*channels, fin); + if (feof(fin)) + break; + len = opus_custom_encode(enc, in, frame_size, data, bytes_per_packet); + if (len <= 0) + fprintf (stderr, "opus_custom_encode() failed: %s\n", opus_strerror(len)); + + /* This is for simulating bit errors */ +#if 0 + int errors = 0; + int eid = 0; + /* This simulates random bit error */ + for (i=0;i<len*8;i++) + { + if (rand()%atoi(argv[8])==0) + { + if (i<64) + { + errors++; + eid = i; + } + data[i/8] ^= 1<<(7-(i%8)); + } + } + if (errors == 1) + data[eid/8] ^= 1<<(7-(eid%8)); + else if (errors%2 == 1) + data[rand()%8] ^= 1<<rand()%8; +#endif + +#if 1 /* Set to zero to use the encoder's output instead */ + /* This is to simulate packet loss */ + if (argc==9 && rand()%1000<atoi(argv[argc-3])) + /*if (errors && (errors%2==0))*/ + ret = opus_custom_decode(dec, NULL, len, out, frame_size); + else + ret = opus_custom_decode(dec, data, len, out, frame_size); + if (ret < 0) + fprintf(stderr, "opus_custom_decode() failed: %s\n", opus_strerror(ret)); +#else + for (i=0;i<ret*channels;i++) + out[i] = in[i]; +#endif +#if !(defined (OPUS_FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH) + for (i=0;i<ret*channels;i++) + { + rmsd += (in[i]-out[i])*1.0*(in[i]-out[i]); + /*out[i] -= in[i];*/ + } +#endif + count++; + fwrite(out+skip*channels, sizeof(short), (ret-skip)*channels, fout); + skip = 0; + } + PRINT_MIPS(stderr); + + opus_custom_encoder_destroy(enc); + opus_custom_decoder_destroy(dec); + fclose(fin); + fclose(fout); + opus_custom_mode_destroy(mode); + free(in); + free(out); +#if !(defined (OPUS_FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH) + if (rmsd > 0) + { + rmsd = sqrt(rmsd/(1.0*frame_size*channels*count)); + fprintf (stderr, "Error: encoder doesn't match decoder\n"); + fprintf (stderr, "RMS mismatch is %f\n", rmsd); + return 1; + } else { + fprintf (stderr, "Encoder matches decoder!!\n"); + } +#endif + return 0; +} + diff --git a/drivers/opus/celt/opus_modes.h b/drivers/opus/celt/opus_modes.h new file mode 100644 index 0000000000..a1df46265e --- /dev/null +++ b/drivers/opus/celt/opus_modes.h @@ -0,0 +1,83 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2008 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef OPUS_MODES_H +#define OPUS_MODES_H + +#include "opus_types.h" +#include "celt.h" +#include "arch.h" +#include "mdct.h" +#include "entenc.h" +#include "entdec.h" + +#define MAX_PERIOD 1024 + +#ifndef OVERLAP +#define OVERLAP(mode) ((mode)->overlap) +#endif + +#ifndef FRAMESIZE +#define FRAMESIZE(mode) ((mode)->mdctSize) +#endif + +typedef struct { + int size; + const opus_int16 *index; + const unsigned char *bits; + const unsigned char *caps; +} PulseCache; + +/** Mode definition (opaque) + @brief Mode definition + */ +struct OpusCustomMode { + opus_int32 Fs; + int overlap; + + int nbEBands; + int effEBands; + opus_val16 preemph[4]; + const opus_int16 *eBands; /**< Definition for each "pseudo-critical band" */ + + int maxLM; + int nbShortMdcts; + int shortMdctSize; + + int nbAllocVectors; /**< Number of lines in the matrix below */ + const unsigned char *allocVectors; /**< Number of bits in each band for several rates */ + const opus_int16 *logN; + + const opus_val16 *window; + celt_mdct_lookup mdct; + PulseCache cache; +}; + + +#endif diff --git a/drivers/opus/celt/os_support.h b/drivers/opus/celt/os_support.h new file mode 100644 index 0000000000..5e47e3cff9 --- /dev/null +++ b/drivers/opus/celt/os_support.h @@ -0,0 +1,92 @@ +/* Copyright (C) 2007 Jean-Marc Valin + + File: os_support.h + This is the (tiny) OS abstraction layer. Aside from math.h, this is the + only place where system headers are allowed. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef OS_SUPPORT_H +#define OS_SUPPORT_H + +#ifdef CUSTOM_SUPPORT +# include "custom_support.h" +#endif + +#include "opus_types.h" +#include "opus_defines.h" + +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */ +#ifndef OVERRIDE_OPUS_ALLOC +static OPUS_INLINE void *opus_alloc (size_t size) +{ + return malloc(size); +} +#endif + +/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */ +#ifndef OVERRIDE_OPUS_ALLOC_SCRATCH +static OPUS_INLINE void *opus_alloc_scratch (size_t size) +{ + /* Scratch space doesn't need to be cleared */ + return opus_alloc(size); +} +#endif + +/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */ +#ifndef OVERRIDE_OPUS_FREE +static OPUS_INLINE void opus_free (void *ptr) +{ + free(ptr); +} +#endif + +/** Copy n bytes of memory from src to dst. The 0* term provides compile-time type checking */ +#ifndef OVERRIDE_OPUS_COPY +#define OPUS_COPY(dst, src, n) (memcpy((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) +#endif + +/** Copy n bytes of memory from src to dst, allowing overlapping regions. The 0* term + provides compile-time type checking */ +#ifndef OVERRIDE_OPUS_MOVE +#define OPUS_MOVE(dst, src, n) (memmove((dst), (src), (n)*sizeof(*(dst)) + 0*((dst)-(src)) )) +#endif + +/** Set n elements of dst to zero, starting at address s */ +#ifndef OVERRIDE_OPUS_CLEAR +#define OPUS_CLEAR(dst, n) (memset((dst), 0, (n)*sizeof(*(dst)))) +#endif + +/*#ifdef __GNUC__ +#pragma GCC poison printf sprintf +#pragma GCC poison malloc free realloc calloc +#endif*/ + +#endif /* OS_SUPPORT_H */ + diff --git a/drivers/opus/celt/pitch.c b/drivers/opus/celt/pitch.c new file mode 100644 index 0000000000..48cd02fb2b --- /dev/null +++ b/drivers/opus/celt/pitch.c @@ -0,0 +1,537 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/** + @file pitch.c + @brief Pitch analysis + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "pitch.h" +#include "os_support.h" +#include "opus_modes.h" +#include "stack_alloc.h" +#include "mathops.h" +#include "celt_lpc.h" + +static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, + int max_pitch, int *best_pitch +#ifdef OPUS_FIXED_POINT + , int yshift, opus_val32 maxcorr +#endif + ) +{ + int i, j; + opus_val32 Syy=1; + opus_val16 best_num[2]; + opus_val32 best_den[2]; +#ifdef OPUS_FIXED_POINT + int xshift; + + xshift = celt_ilog2(maxcorr)-14; +#endif + + best_num[0] = -1; + best_num[1] = -1; + best_den[0] = 0; + best_den[1] = 0; + best_pitch[0] = 0; + best_pitch[1] = 1; + for (j=0;j<len;j++) + Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift)); + for (i=0;i<max_pitch;i++) + { + if (xcorr[i]>0) + { + opus_val16 num; + opus_val32 xcorr16; + xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift)); +#ifndef OPUS_FIXED_POINT + /* Considering the range of xcorr16, this should avoid both underflows + and overflows (inf) when squaring xcorr16 */ + xcorr16 *= 1e-12f; +#endif + num = MULT16_16_Q15(xcorr16,xcorr16); + if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy)) + { + if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy)) + { + best_num[1] = best_num[0]; + best_den[1] = best_den[0]; + best_pitch[1] = best_pitch[0]; + best_num[0] = num; + best_den[0] = Syy; + best_pitch[0] = i; + } else { + best_num[1] = num; + best_den[1] = Syy; + best_pitch[1] = i; + } + } + } + Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift); + Syy = MAX32(1, Syy); + } +} + +static void celt_fir5(const opus_val16 *x, + const opus_val16 *num, + opus_val16 *y, + int N, + opus_val16 *mem) +{ + int i; + opus_val16 num0, num1, num2, num3, num4; + opus_val32 mem0, mem1, mem2, mem3, mem4; + num0=num[0]; + num1=num[1]; + num2=num[2]; + num3=num[3]; + num4=num[4]; + mem0=mem[0]; + mem1=mem[1]; + mem2=mem[2]; + mem3=mem[3]; + mem4=mem[4]; + for (i=0;i<N;i++) + { + opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); + sum = MAC16_16(sum,num0,mem0); + sum = MAC16_16(sum,num1,mem1); + sum = MAC16_16(sum,num2,mem2); + sum = MAC16_16(sum,num3,mem3); + sum = MAC16_16(sum,num4,mem4); + mem4 = mem3; + mem3 = mem2; + mem2 = mem1; + mem1 = mem0; + mem0 = x[i]; + y[i] = ROUND16(sum, SIG_SHIFT); + } + mem[0]=mem0; + mem[1]=mem1; + mem[2]=mem2; + mem[3]=mem3; + mem[4]=mem4; +} + + +void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, + int len, int C, int arch) +{ + int i; + opus_val32 ac[5]; + opus_val16 tmp=Q15ONE; + opus_val16 lpc[4], mem[5]={0,0,0,0,0}; + opus_val16 lpc2[5]; + opus_val16 c1 = QCONST16(.8f,15); +#ifdef OPUS_FIXED_POINT + int shift; + opus_val32 maxabs = celt_maxabs32(x[0], len); + if (C==2) + { + opus_val32 maxabs_1 = celt_maxabs32(x[1], len); + maxabs = MAX32(maxabs, maxabs_1); + } + if (maxabs<1) + maxabs=1; + shift = celt_ilog2(maxabs)-10; + if (shift<0) + shift=0; + if (C==2) + shift++; +#endif + for (i=1;i<len>>1;i++) + x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift); + x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift); + if (C==2) + { + for (i=1;i<len>>1;i++) + x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift); + x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift); + } + + _celt_autocorr(x_lp, ac, NULL, 0, + 4, len>>1, arch); + + /* Noise floor -40 dB */ +#ifdef OPUS_FIXED_POINT + ac[0] += SHR32(ac[0],13); +#else + ac[0] *= 1.0001f; +#endif + /* Lag windowing */ + for (i=1;i<=4;i++) + { + /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/ +#ifdef OPUS_FIXED_POINT + ac[i] -= MULT16_32_Q15(2*i*i, ac[i]); +#else + ac[i] -= ac[i]*(.008f*i)*(.008f*i); +#endif + } + + _celt_lpc(lpc, ac, 4); + for (i=0;i<4;i++) + { + tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp); + lpc[i] = MULT16_16_Q15(lpc[i], tmp); + } + /* Add a zero */ + lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT); + lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]); + lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]); + lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]); + lpc2[4] = MULT16_16_Q15(c1,lpc[3]); + celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); +} + +#if 0 /* This is a simple version of the pitch correlation that should work + well on DSPs like Blackfin and TI C5x/C6x */ + +#ifdef OPUS_FIXED_POINT +opus_val32 +#else +void +#endif +celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch) +{ + int i, j; +#ifdef OPUS_FIXED_POINT + opus_val32 maxcorr=1; +#endif + for (i=0;i<max_pitch;i++) + { + opus_val32 sum = 0; + for (j=0;j<len;j++) + sum = MAC16_16(sum, x[j],y[i+j]); + xcorr[i] = sum; +#ifdef OPUS_FIXED_POINT + maxcorr = MAX32(maxcorr, sum); +#endif + } +#ifdef OPUS_FIXED_POINT + return maxcorr; +#endif +} + +#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */ + +#ifdef OPUS_FIXED_POINT +opus_val32 +#else +void +#endif +celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) +{ + int i,j; + /*The EDSP version requires that max_pitch is at least 1, and that _x is + 32-bit aligned. + Since it's hard to put asserts in assembly, put them here.*/ + celt_assert(max_pitch>0); + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0); +#ifdef OPUS_FIXED_POINT + opus_val32 maxcorr=1; +#endif + for (i=0;i<max_pitch-3;i+=4) + { + opus_val32 sum[4]={0,0,0,0}; + xcorr_kernel(_x, _y+i, sum, len); + xcorr[i]=sum[0]; + xcorr[i+1]=sum[1]; + xcorr[i+2]=sum[2]; + xcorr[i+3]=sum[3]; +#ifdef OPUS_FIXED_POINT + sum[0] = MAX32(sum[0], sum[1]); + sum[2] = MAX32(sum[2], sum[3]); + sum[0] = MAX32(sum[0], sum[2]); + maxcorr = MAX32(maxcorr, sum[0]); +#endif + } + /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */ + for (;i<max_pitch;i++) + { + opus_val32 sum = 0; + for (j=0;j<len;j++) + sum = MAC16_16(sum, _x[j],_y[i+j]); + xcorr[i] = sum; +#ifdef OPUS_FIXED_POINT + maxcorr = MAX32(maxcorr, sum); +#endif + } +#ifdef OPUS_FIXED_POINT + return maxcorr; +#endif +} + +#endif +void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, + int len, int max_pitch, int *pitch, int arch) +{ + int i, j; + int lag; + int best_pitch[2]={0,0}; + VARDECL(opus_val16, x_lp4); + VARDECL(opus_val16, y_lp4); + VARDECL(opus_val32, xcorr); +#ifdef OPUS_FIXED_POINT + opus_val32 maxcorr; + opus_val32 xmax, ymax; + int shift=0; +#endif + int offset; + + SAVE_STACK; + + celt_assert(len>0); + celt_assert(max_pitch>0); + lag = len+max_pitch; + + ALLOC(x_lp4, len>>2, opus_val16); + ALLOC(y_lp4, lag>>2, opus_val16); + ALLOC(xcorr, max_pitch>>1, opus_val32); + + /* Downsample by 2 again */ + for (j=0;j<len>>2;j++) + x_lp4[j] = x_lp[2*j]; + for (j=0;j<lag>>2;j++) + y_lp4[j] = y[2*j]; + +#ifdef OPUS_FIXED_POINT + xmax = celt_maxabs16(x_lp4, len>>2); + ymax = celt_maxabs16(y_lp4, lag>>2); + shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11; + if (shift>0) + { + for (j=0;j<len>>2;j++) + x_lp4[j] = SHR16(x_lp4[j], shift); + for (j=0;j<lag>>2;j++) + y_lp4[j] = SHR16(y_lp4[j], shift); + /* Use double the shift for a MAC */ + shift *= 2; + } else { + shift = 0; + } +#endif + + /* Coarse search with 4x decimation */ + +#ifdef OPUS_FIXED_POINT + maxcorr = +#endif + celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch); + + find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch +#ifdef OPUS_FIXED_POINT + , 0, maxcorr +#endif + ); + + /* Finer search with 2x decimation */ +#ifdef OPUS_FIXED_POINT + maxcorr=1; +#endif + for (i=0;i<max_pitch>>1;i++) + { + opus_val32 sum=0; + xcorr[i] = 0; + if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2) + continue; + for (j=0;j<len>>1;j++) + sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift); + xcorr[i] = MAX32(-1, sum); +#ifdef OPUS_FIXED_POINT + maxcorr = MAX32(maxcorr, sum); +#endif + } + find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch +#ifdef OPUS_FIXED_POINT + , shift+1, maxcorr +#endif + ); + + /* Refine by pseudo-interpolation */ + if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1) + { + opus_val32 a, b, c; + a = xcorr[best_pitch[0]-1]; + b = xcorr[best_pitch[0]]; + c = xcorr[best_pitch[0]+1]; + if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a)) + offset = 1; + else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c)) + offset = -1; + else + offset = 0; + } else { + offset = 0; + } + *pitch = 2*best_pitch[0]-offset; + + RESTORE_STACK; +} + +static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; +opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, + int N, int *T0_, int prev_period, opus_val16 prev_gain) +{ + int k, i, T, T0; + opus_val16 g, g0; + opus_val16 pg; + opus_val32 xy,xx,yy,xy2; + opus_val32 xcorr[3]; + opus_val32 best_xy, best_yy; + int offset; + int minperiod0; + VARDECL(opus_val32, yy_lookup); + SAVE_STACK; + + minperiod0 = minperiod; + maxperiod /= 2; + minperiod /= 2; + *T0_ /= 2; + prev_period /= 2; + N /= 2; + x += maxperiod; + if (*T0_>=maxperiod) + *T0_=maxperiod-1; + + T = T0 = *T0_; + ALLOC(yy_lookup, maxperiod+1, opus_val32); + dual_inner_prod(x, x, x-T0, N, &xx, &xy); + yy_lookup[0] = xx; + yy=xx; + for (i=1;i<=maxperiod;i++) + { + yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]); + yy_lookup[i] = MAX32(0, yy); + } + yy = yy_lookup[T0]; + best_xy = xy; + best_yy = yy; +#ifdef OPUS_FIXED_POINT + { + opus_val32 x2y2; + int sh, t; + x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy)); + sh = celt_ilog2(x2y2)>>1; + t = VSHR32(x2y2, 2*(sh-7)); + g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1); + } +#else + g = g0 = xy/celt_sqrt(1+xx*yy); +#endif + /* Look for any pitch at T/k */ + for (k=2;k<=15;k++) + { + int T1, T1b; + opus_val16 g1; + opus_val16 cont=0; + opus_val16 thresh; + T1 = (2*T0+k)/(2*k); + if (T1 < minperiod) + break; + /* Look for another strong correlation at T1b */ + if (k==2) + { + if (T1+T0>maxperiod) + T1b = T0; + else + T1b = T0+T1; + } else + { + T1b = (2*second_check[k]*T0+k)/(2*k); + } + dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); + xy += xy2; + yy = yy_lookup[T1] + yy_lookup[T1b]; +#ifdef OPUS_FIXED_POINT + { + opus_val32 x2y2; + int sh, t; + x2y2 = 1+MULT32_32_Q31(xx,yy); + sh = celt_ilog2(x2y2)>>1; + t = VSHR32(x2y2, 2*(sh-7)); + g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1); + } +#else + g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy); +#endif + if (abs(T1-prev_period)<=1) + cont = prev_gain; + else if (abs(T1-prev_period)<=2 && 5*k*k < T0) + cont = HALF32(prev_gain); + else + cont = 0; + thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont); + /* Bias against very high pitch (very short period) to avoid false-positives + due to short-term correlation */ + if (T1<3*minperiod) + thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont); + else if (T1<2*minperiod) + thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont); + if (g1 > thresh) + { + best_xy = xy; + best_yy = yy; + T = T1; + g = g1; + } + } + best_xy = MAX32(0, best_xy); + if (best_yy <= best_xy) + pg = Q15ONE; + else + pg = SHR32(frac_div32(best_xy,best_yy+1),16); + + for (k=0;k<3;k++) + { + int T1 = T+k-1; + xy = 0; + for (i=0;i<N;i++) + xy = MAC16_16(xy, x[i], x[i-T1]); + xcorr[k] = xy; + } + if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0])) + offset = 1; + else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2])) + offset = -1; + else + offset = 0; + if (pg > g) + pg = g; + *T0_ = 2*T+offset; + + if (*T0_<minperiod0) + *T0_=minperiod0; + RESTORE_STACK; + return pg; +} diff --git a/drivers/opus/celt/pitch.h b/drivers/opus/celt/pitch.h new file mode 100644 index 0000000000..3a7d305425 --- /dev/null +++ b/drivers/opus/celt/pitch.h @@ -0,0 +1,173 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/** + @file pitch.h + @brief Pitch analysis + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef PITCH_H +#define PITCH_H + +#include "opus_modes.h" +#include "cpu_support.h" + +#if defined(__SSE__) && !defined(OPUS_FIXED_POINT) +#include "x86/pitch_sse.h" +#endif + +#if defined(OPUS_ARM_ASM) && defined(OPUS_FIXED_POINT) +# include "arm/pitch_arm.h" +#endif + +void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, + int len, int C, int arch); + +void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, + int len, int max_pitch, int *pitch, int arch); + +opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, + int N, int *T0, int prev_period, opus_val16 prev_gain); + +/* OPT: This is the kernel you really want to optimize. It gets used a lot + by the prefilter and by the PLC. */ +#ifndef OVERRIDE_XCORR_KERNEL +static OPUS_INLINE void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) +{ + int j; + opus_val16 y_0, y_1, y_2, y_3; + celt_assert(len>=3); + y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ + y_0=*y++; + y_1=*y++; + y_2=*y++; + for (j=0;j<len-3;j+=4) + { + opus_val16 tmp; + tmp = *x++; + y_3=*y++; + sum[0] = MAC16_16(sum[0],tmp,y_0); + sum[1] = MAC16_16(sum[1],tmp,y_1); + sum[2] = MAC16_16(sum[2],tmp,y_2); + sum[3] = MAC16_16(sum[3],tmp,y_3); + tmp=*x++; + y_0=*y++; + sum[0] = MAC16_16(sum[0],tmp,y_1); + sum[1] = MAC16_16(sum[1],tmp,y_2); + sum[2] = MAC16_16(sum[2],tmp,y_3); + sum[3] = MAC16_16(sum[3],tmp,y_0); + tmp=*x++; + y_1=*y++; + sum[0] = MAC16_16(sum[0],tmp,y_2); + sum[1] = MAC16_16(sum[1],tmp,y_3); + sum[2] = MAC16_16(sum[2],tmp,y_0); + sum[3] = MAC16_16(sum[3],tmp,y_1); + tmp=*x++; + y_2=*y++; + sum[0] = MAC16_16(sum[0],tmp,y_3); + sum[1] = MAC16_16(sum[1],tmp,y_0); + sum[2] = MAC16_16(sum[2],tmp,y_1); + sum[3] = MAC16_16(sum[3],tmp,y_2); + } + if (j++<len) + { + opus_val16 tmp = *x++; + y_3=*y++; + sum[0] = MAC16_16(sum[0],tmp,y_0); + sum[1] = MAC16_16(sum[1],tmp,y_1); + sum[2] = MAC16_16(sum[2],tmp,y_2); + sum[3] = MAC16_16(sum[3],tmp,y_3); + } + if (j++<len) + { + opus_val16 tmp=*x++; + y_0=*y++; + sum[0] = MAC16_16(sum[0],tmp,y_1); + sum[1] = MAC16_16(sum[1],tmp,y_2); + sum[2] = MAC16_16(sum[2],tmp,y_3); + sum[3] = MAC16_16(sum[3],tmp,y_0); + } + if (j<len) + { + opus_val16 tmp=*x++; + y_1=*y++; + sum[0] = MAC16_16(sum[0],tmp,y_2); + sum[1] = MAC16_16(sum[1],tmp,y_3); + sum[2] = MAC16_16(sum[2],tmp,y_0); + sum[3] = MAC16_16(sum[3],tmp,y_1); + } +} +#endif /* OVERRIDE_XCORR_KERNEL */ + +#ifndef OVERRIDE_DUAL_INNER_PROD +static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, + int N, opus_val32 *xy1, opus_val32 *xy2) +{ + int i; + opus_val32 xy01=0; + opus_val32 xy02=0; + for (i=0;i<N;i++) + { + xy01 = MAC16_16(xy01, x[i], y01[i]); + xy02 = MAC16_16(xy02, x[i], y02[i]); + } + *xy1 = xy01; + *xy2 = xy02; +} +#endif + +#ifdef OPUS_FIXED_POINT +opus_val32 +#else +void +#endif +celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch); + +#if !defined(OVERRIDE_PITCH_XCORR) +/*Is run-time CPU detection enabled on this platform?*/ +# if defined(OPUS_HAVE_RTCD) +extern +# if defined(OPUS_FIXED_POINT) +opus_val32 +# else +void +# endif +(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, + const opus_val16 *, opus_val32 *, int, int); + +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ + xcorr, len, max_pitch)) +# else +# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ + ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch)) +# endif +#endif + +#endif diff --git a/drivers/opus/celt/quant_bands.c b/drivers/opus/celt/quant_bands.c new file mode 100644 index 0000000000..0a170e850d --- /dev/null +++ b/drivers/opus/celt/quant_bands.c @@ -0,0 +1,556 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "quant_bands.h" +#include "laplace.h" +#include <math.h> +#include "os_support.h" +#include "arch.h" +#include "mathops.h" +#include "stack_alloc.h" +#include "rate.h" + +#ifdef OPUS_FIXED_POINT +/* Mean energy in each band quantized in Q4 */ +const signed char eMeans[25] = { + 103,100, 92, 85, 81, + 77, 72, 70, 78, 75, + 73, 71, 78, 74, 69, + 72, 70, 74, 76, 71, + 60, 60, 60, 60, 60 +}; +#else +/* Mean energy in each band quantized in Q4 and converted back to float */ +const opus_val16 eMeans[25] = { + 6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f, + 4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f, + 4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f, + 4.500000f, 4.375000f, 4.625000f, 4.750000f, 4.437500f, + 3.750000f, 3.750000f, 3.750000f, 3.750000f, 3.750000f +}; +#endif +/* prediction coefficients: 0.9, 0.8, 0.65, 0.5 */ +#ifdef OPUS_FIXED_POINT +static const opus_val16 pred_coef[4] = {29440, 26112, 21248, 16384}; +static const opus_val16 beta_coef[4] = {30147, 22282, 12124, 6554}; +static const opus_val16 beta_intra = 4915; +#else +static const opus_val16 pred_coef[4] = {29440/32768., 26112/32768., 21248/32768., 16384/32768.}; +static const opus_val16 beta_coef[4] = {30147/32768., 22282/32768., 12124/32768., 6554/32768.}; +static const opus_val16 beta_intra = 4915/32768.; +#endif + +/*Parameters of the Laplace-like probability models used for the coarse energy. + There is one pair of parameters for each frame size, prediction type + (inter/intra), and band number. + The first number of each pair is the probability of 0, and the second is the + decay rate, both in Q8 precision.*/ +static const unsigned char e_prob_model[4][2][42] = { + /*120 sample frames.*/ + { + /*Inter*/ + { + 72, 127, 65, 129, 66, 128, 65, 128, 64, 128, 62, 128, 64, 128, + 64, 128, 92, 78, 92, 79, 92, 78, 90, 79, 116, 41, 115, 40, + 114, 40, 132, 26, 132, 26, 145, 17, 161, 12, 176, 10, 177, 11 + }, + /*Intra*/ + { + 24, 179, 48, 138, 54, 135, 54, 132, 53, 134, 56, 133, 55, 132, + 55, 132, 61, 114, 70, 96, 74, 88, 75, 88, 87, 74, 89, 66, + 91, 67, 100, 59, 108, 50, 120, 40, 122, 37, 97, 43, 78, 50 + } + }, + /*240 sample frames.*/ + { + /*Inter*/ + { + 83, 78, 84, 81, 88, 75, 86, 74, 87, 71, 90, 73, 93, 74, + 93, 74, 109, 40, 114, 36, 117, 34, 117, 34, 143, 17, 145, 18, + 146, 19, 162, 12, 165, 10, 178, 7, 189, 6, 190, 8, 177, 9 + }, + /*Intra*/ + { + 23, 178, 54, 115, 63, 102, 66, 98, 69, 99, 74, 89, 71, 91, + 73, 91, 78, 89, 86, 80, 92, 66, 93, 64, 102, 59, 103, 60, + 104, 60, 117, 52, 123, 44, 138, 35, 133, 31, 97, 38, 77, 45 + } + }, + /*480 sample frames.*/ + { + /*Inter*/ + { + 61, 90, 93, 60, 105, 42, 107, 41, 110, 45, 116, 38, 113, 38, + 112, 38, 124, 26, 132, 27, 136, 19, 140, 20, 155, 14, 159, 16, + 158, 18, 170, 13, 177, 10, 187, 8, 192, 6, 175, 9, 159, 10 + }, + /*Intra*/ + { + 21, 178, 59, 110, 71, 86, 75, 85, 84, 83, 91, 66, 88, 73, + 87, 72, 92, 75, 98, 72, 105, 58, 107, 54, 115, 52, 114, 55, + 112, 56, 129, 51, 132, 40, 150, 33, 140, 29, 98, 35, 77, 42 + } + }, + /*960 sample frames.*/ + { + /*Inter*/ + { + 42, 121, 96, 66, 108, 43, 111, 40, 117, 44, 123, 32, 120, 36, + 119, 33, 127, 33, 134, 34, 139, 21, 147, 23, 152, 20, 158, 25, + 154, 26, 166, 21, 173, 16, 184, 13, 184, 10, 150, 13, 139, 15 + }, + /*Intra*/ + { + 22, 178, 63, 114, 74, 82, 84, 83, 92, 82, 103, 62, 96, 72, + 96, 67, 101, 73, 107, 72, 113, 55, 118, 52, 125, 52, 118, 52, + 117, 55, 135, 49, 137, 39, 157, 32, 145, 29, 97, 33, 77, 40 + } + } +}; + +static const unsigned char small_energy_icdf[3]={2,1,0}; + +static opus_val32 loss_distortion(const opus_val16 *eBands, opus_val16 *oldEBands, int start, int end, int len, int C) +{ + int c, i; + opus_val32 dist = 0; + c=0; do { + for (i=start;i<end;i++) + { + opus_val16 d = SUB16(SHR16(eBands[i+c*len], 3), SHR16(oldEBands[i+c*len], 3)); + dist = MAC16_16(dist, d,d); + } + } while (++c<C); + return MIN32(200,SHR32(dist,2*DB_SHIFT-6)); +} + +static int quant_coarse_energy_impl(const CELTMode *m, int start, int end, + const opus_val16 *eBands, opus_val16 *oldEBands, + opus_int32 budget, opus_int32 tell, + const unsigned char *prob_model, opus_val16 *error, ec_enc *enc, + int C, int LM, int intra, opus_val16 max_decay, int lfe) +{ + int i, c; + int badness = 0; + opus_val32 prev[2] = {0,0}; + opus_val16 coef; + opus_val16 beta; + + if (tell+3 <= budget) + ec_enc_bit_logp(enc, intra, 3); + if (intra) + { + coef = 0; + beta = beta_intra; + } else { + beta = beta_coef[LM]; + coef = pred_coef[LM]; + } + + /* Encode at a fixed coarse resolution */ + for (i=start;i<end;i++) + { + c=0; + do { + int bits_left; + int qi, qi0; + opus_val32 q; + opus_val16 x; + opus_val32 f, tmp; + opus_val16 oldE; + opus_val16 decay_bound; + x = eBands[i+c*m->nbEBands]; + oldE = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]); +#ifdef OPUS_FIXED_POINT + f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c]; + /* Rounding to nearest integer here is really important! */ + qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7); + decay_bound = EXTRACT16(MAX32(-QCONST16(28.f,DB_SHIFT), + SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay))); +#else + f = x-coef*oldE-prev[c]; + /* Rounding to nearest integer here is really important! */ + qi = (int)floor(.5f+f); + decay_bound = MAX16(-QCONST16(28.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]) - max_decay; +#endif + /* Prevent the energy from going down too quickly (e.g. for bands + that have just one bin) */ + if (qi < 0 && x < decay_bound) + { + qi += (int)SHR16(SUB16(decay_bound,x), DB_SHIFT); + if (qi > 0) + qi = 0; + } + qi0 = qi; + /* If we don't have enough bits to encode all the energy, just assume + something safe. */ + tell = ec_tell(enc); + bits_left = budget-tell-3*C*(end-i); + if (i!=start && bits_left < 30) + { + if (bits_left < 24) + qi = IMIN(1, qi); + if (bits_left < 16) + qi = IMAX(-1, qi); + } + if (lfe && i>=2) + qi = IMIN(qi, 0); + if (budget-tell >= 15) + { + int pi; + pi = 2*IMIN(i,20); + ec_laplace_encode(enc, &qi, + prob_model[pi]<<7, prob_model[pi+1]<<6); + } + else if(budget-tell >= 2) + { + qi = IMAX(-1, IMIN(qi, 1)); + ec_enc_icdf(enc, 2*qi^-(qi<0), small_energy_icdf, 2); + } + else if(budget-tell >= 1) + { + qi = IMIN(0, qi); + ec_enc_bit_logp(enc, -qi, 1); + } + else + qi = -1; + error[i+c*m->nbEBands] = PSHR32(f,7) - SHL16(qi,DB_SHIFT); + badness += abs(qi0-qi); + q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT); + + tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7); +#ifdef OPUS_FIXED_POINT + tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp); +#endif + oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7); + prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8)); + } while (++c < C); + } + return lfe ? 0 : badness; +} + +void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, + const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget, + opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes, + int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe) +{ + int intra; + opus_val16 max_decay; + VARDECL(opus_val16, oldEBands_intra); + VARDECL(opus_val16, error_intra); + ec_enc enc_start_state; + opus_uint32 tell; + int badness1=0; + opus_int32 intra_bias; + opus_val32 new_distortion; + SAVE_STACK; + + intra = force_intra || (!two_pass && *delayedIntra>2*C*(end-start) && nbAvailableBytes > (end-start)*C); + intra_bias = (opus_int32)((budget**delayedIntra*loss_rate)/(C*512)); + new_distortion = loss_distortion(eBands, oldEBands, start, effEnd, m->nbEBands, C); + + tell = ec_tell(enc); + if (tell+3 > budget) + two_pass = intra = 0; + + max_decay = QCONST16(16.f,DB_SHIFT); + if (end-start>10) + { +#ifdef OPUS_FIXED_POINT + max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3)); +#else + max_decay = MIN32(max_decay, .125f*nbAvailableBytes); +#endif + } + if (lfe) + max_decay=3; + enc_start_state = *enc; + + ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16); + ALLOC(error_intra, C*m->nbEBands, opus_val16); + OPUS_COPY(oldEBands_intra, oldEBands, C*m->nbEBands); + + if (two_pass || intra) + { + badness1 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands_intra, budget, + tell, e_prob_model[LM][1], error_intra, enc, C, LM, 1, max_decay, lfe); + } + + if (!intra) + { + unsigned char *intra_buf; + ec_enc enc_intra_state; + opus_int32 tell_intra; + opus_uint32 nstart_bytes; + opus_uint32 nintra_bytes; + opus_uint32 save_bytes; + int badness2; + VARDECL(unsigned char, intra_bits); + + tell_intra = ec_tell_frac(enc); + + enc_intra_state = *enc; + + nstart_bytes = ec_range_bytes(&enc_start_state); + nintra_bytes = ec_range_bytes(&enc_intra_state); + intra_buf = ec_get_buffer(&enc_intra_state) + nstart_bytes; + save_bytes = nintra_bytes-nstart_bytes; + if (save_bytes == 0) + save_bytes = ALLOC_NONE; + ALLOC(intra_bits, save_bytes, unsigned char); + /* Copy bits from intra bit-stream */ + OPUS_COPY(intra_bits, intra_buf, nintra_bytes - nstart_bytes); + + *enc = enc_start_state; + + badness2 = quant_coarse_energy_impl(m, start, end, eBands, oldEBands, budget, + tell, e_prob_model[LM][intra], error, enc, C, LM, 0, max_decay, lfe); + + if (two_pass && (badness1 < badness2 || (badness1 == badness2 && ((opus_int32)ec_tell_frac(enc))+intra_bias > tell_intra))) + { + *enc = enc_intra_state; + /* Copy intra bits to bit-stream */ + OPUS_COPY(intra_buf, intra_bits, nintra_bytes - nstart_bytes); + OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands); + OPUS_COPY(error, error_intra, C*m->nbEBands); + intra = 1; + } + } else { + OPUS_COPY(oldEBands, oldEBands_intra, C*m->nbEBands); + OPUS_COPY(error, error_intra, C*m->nbEBands); + } + + if (intra) + *delayedIntra = new_distortion; + else + *delayedIntra = ADD32(MULT16_32_Q15(MULT16_16_Q15(pred_coef[LM], pred_coef[LM]),*delayedIntra), + new_distortion); + + RESTORE_STACK; +} + +void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C) +{ + int i, c; + + /* Encode finer resolution */ + for (i=start;i<end;i++) + { + opus_int16 frac = 1<<fine_quant[i]; + if (fine_quant[i] <= 0) + continue; + c=0; + do { + int q2; + opus_val16 offset; +#ifdef OPUS_FIXED_POINT + /* Has to be without rounding */ + q2 = (error[i+c*m->nbEBands]+QCONST16(.5f,DB_SHIFT))>>(DB_SHIFT-fine_quant[i]); +#else + q2 = (int)floor((error[i+c*m->nbEBands]+.5f)*frac); +#endif + if (q2 > frac-1) + q2 = frac-1; + if (q2<0) + q2 = 0; + ec_enc_bits(enc, q2, fine_quant[i]); +#ifdef OPUS_FIXED_POINT + offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT)); +#else + offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f; +#endif + oldEBands[i+c*m->nbEBands] += offset; + error[i+c*m->nbEBands] -= offset; + /*printf ("%f ", error[i] - offset);*/ + } while (++c < C); + } +} + +void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C) +{ + int i, prio, c; + + /* Use up the remaining bits */ + for (prio=0;prio<2;prio++) + { + for (i=start;i<end && bits_left>=C ;i++) + { + if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio) + continue; + c=0; + do { + int q2; + opus_val16 offset; + q2 = error[i+c*m->nbEBands]<0 ? 0 : 1; + ec_enc_bits(enc, q2, 1); +#ifdef OPUS_FIXED_POINT + offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1); +#else + offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384); +#endif + oldEBands[i+c*m->nbEBands] += offset; + bits_left--; + } while (++c < C); + } + } +} + +void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM) +{ + const unsigned char *prob_model = e_prob_model[LM][intra]; + int i, c; + opus_val32 prev[2] = {0, 0}; + opus_val16 coef; + opus_val16 beta; + opus_int32 budget; + opus_int32 tell; + + if (intra) + { + coef = 0; + beta = beta_intra; + } else { + beta = beta_coef[LM]; + coef = pred_coef[LM]; + } + + budget = dec->storage*8; + + /* Decode at a fixed coarse resolution */ + for (i=start;i<end;i++) + { + c=0; + do { + int qi; + opus_val32 q; + opus_val32 tmp; + /* It would be better to express this invariant as a + test on C at function entry, but that isn't enough + to make the static analyzer happy. */ + celt_assert(c<2); + tell = ec_tell(dec); + if(budget-tell>=15) + { + int pi; + pi = 2*IMIN(i,20); + qi = ec_laplace_decode(dec, + prob_model[pi]<<7, prob_model[pi+1]<<6); + } + else if(budget-tell>=2) + { + qi = ec_dec_icdf(dec, small_energy_icdf, 2); + qi = (qi>>1)^-(qi&1); + } + else if(budget-tell>=1) + { + qi = -ec_dec_bit_logp(dec, 1); + } + else + qi = -1; + q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT); + + oldEBands[i+c*m->nbEBands] = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]); + tmp = PSHR32(MULT16_16(coef,oldEBands[i+c*m->nbEBands]),8) + prev[c] + SHL32(q,7); +#ifdef OPUS_FIXED_POINT + tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp); +#endif + oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7); + prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8)); + } while (++c < C); + } +} + +void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C) +{ + int i, c; + /* Decode finer resolution */ + for (i=start;i<end;i++) + { + if (fine_quant[i] <= 0) + continue; + c=0; + do { + int q2; + opus_val16 offset; + q2 = ec_dec_bits(dec, fine_quant[i]); +#ifdef OPUS_FIXED_POINT + offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT)); +#else + offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f; +#endif + oldEBands[i+c*m->nbEBands] += offset; + } while (++c < C); + } +} + +void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C) +{ + int i, prio, c; + + /* Use up the remaining bits */ + for (prio=0;prio<2;prio++) + { + for (i=start;i<end && bits_left>=C ;i++) + { + if (fine_quant[i] >= MAX_FINE_BITS || fine_priority[i]!=prio) + continue; + c=0; + do { + int q2; + opus_val16 offset; + q2 = ec_dec_bits(dec, 1); +#ifdef OPUS_FIXED_POINT + offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1); +#else + offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384); +#endif + oldEBands[i+c*m->nbEBands] += offset; + bits_left--; + } while (++c < C); + } + } +} + +void amp2Log2(const CELTMode *m, int effEnd, int end, + celt_ener *bandE, opus_val16 *bandLogE, int C) +{ + int c, i; + c=0; + do { + for (i=0;i<effEnd;i++) + bandLogE[i+c*m->nbEBands] = + celt_log2(SHL32(bandE[i+c*m->nbEBands],2)) + - SHL16((opus_val16)eMeans[i],6); + for (i=effEnd;i<end;i++) + bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT); + } while (++c < C); +} diff --git a/drivers/opus/celt/quant_bands.h b/drivers/opus/celt/quant_bands.h new file mode 100644 index 0000000000..840df8723f --- /dev/null +++ b/drivers/opus/celt/quant_bands.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef QUANT_BANDS +#define QUANT_BANDS + +#include "arch.h" +#include "opus_modes.h" +#include "entenc.h" +#include "entdec.h" +#include "mathops.h" + +#ifdef OPUS_FIXED_POINT +extern const signed char eMeans[25]; +#else +extern const opus_val16 eMeans[25]; +#endif + +void amp2Log2(const CELTMode *m, int effEnd, int end, + celt_ener *bandE, opus_val16 *bandLogE, int C); + +void log2Amp(const CELTMode *m, int start, int end, + celt_ener *eBands, const opus_val16 *oldEBands, int C); + +void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd, + const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget, + opus_val16 *error, ec_enc *enc, int C, int LM, + int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra, + int two_pass, int loss_rate, int lfe); + +void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C); + +void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C); + +void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM); + +void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C); + +void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C); + +#endif /* QUANT_BANDS */ diff --git a/drivers/opus/celt/rate.c b/drivers/opus/celt/rate.c new file mode 100644 index 0000000000..cca585ad95 --- /dev/null +++ b/drivers/opus/celt/rate.c @@ -0,0 +1,638 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include <math.h> +#include "opus_modes.h" +#include "cwrs.h" +#include "arch.h" +#include "os_support.h" + +#include "entcode.h" +#include "rate.h" + +static const unsigned char LOG2_FRAC_TABLE[24]={ + 0, + 8,13, + 16,19,21,23, + 24,26,27,28,29,30,31,32, + 32,33,34,34,35,36,36,37,37 +}; + +#ifdef CUSTOM_MODES + +/*Determines if V(N,K) fits in a 32-bit unsigned integer. + N and K are themselves limited to 15 bits.*/ +static int fits_in32(int _n, int _k) +{ + static const opus_int16 maxN[15] = { + 32767, 32767, 32767, 1476, 283, 109, 60, 40, + 29, 24, 20, 18, 16, 14, 13}; + static const opus_int16 maxK[15] = { + 32767, 32767, 32767, 32767, 1172, 238, 95, 53, + 36, 27, 22, 18, 16, 15, 13}; + if (_n>=14) + { + if (_k>=14) + return 0; + else + return _n <= maxN[_k]; + } else { + return _k <= maxK[_n]; + } +} + +void compute_pulse_cache(CELTMode *m, int LM) +{ + int C; + int i; + int j; + int curr=0; + int nbEntries=0; + int entryN[100], entryK[100], entryI[100]; + const opus_int16 *eBands = m->eBands; + PulseCache *cache = &m->cache; + opus_int16 *cindex; + unsigned char *bits; + unsigned char *cap; + + cindex = (opus_int16 *)opus_alloc(sizeof(cache->index[0])*m->nbEBands*(LM+2)); + cache->index = cindex; + + /* Scan for all unique band sizes */ + for (i=0;i<=LM+1;i++) + { + for (j=0;j<m->nbEBands;j++) + { + int k; + int N = (eBands[j+1]-eBands[j])<<i>>1; + cindex[i*m->nbEBands+j] = -1; + /* Find other bands that have the same size */ + for (k=0;k<=i;k++) + { + int n; + for (n=0;n<m->nbEBands && (k!=i || n<j);n++) + { + if (N == (eBands[n+1]-eBands[n])<<k>>1) + { + cindex[i*m->nbEBands+j] = cindex[k*m->nbEBands+n]; + break; + } + } + } + if (cache->index[i*m->nbEBands+j] == -1 && N!=0) + { + int K; + entryN[nbEntries] = N; + K = 0; + while (fits_in32(N,get_pulses(K+1)) && K<MAX_PSEUDO) + K++; + entryK[nbEntries] = K; + cindex[i*m->nbEBands+j] = curr; + entryI[nbEntries] = curr; + + curr += K+1; + nbEntries++; + } + } + } + bits = (unsigned char *)opus_alloc(sizeof(unsigned char)*curr); + cache->bits = bits; + cache->size = curr; + /* Compute the cache for all unique sizes */ + for (i=0;i<nbEntries;i++) + { + unsigned char *ptr = bits+entryI[i]; + opus_int16 tmp[MAX_PULSES+1]; + get_required_bits(tmp, entryN[i], get_pulses(entryK[i]), BITRES); + for (j=1;j<=entryK[i];j++) + ptr[j] = tmp[get_pulses(j)]-1; + ptr[0] = entryK[i]; + } + + /* Compute the maximum rate for each band at which we'll reliably use as + many bits as we ask for. */ + cache->caps = cap = (unsigned char *)opus_alloc(sizeof(cache->caps[0])*(LM+1)*2*m->nbEBands); + for (i=0;i<=LM;i++) + { + for (C=1;C<=2;C++) + { + for (j=0;j<m->nbEBands;j++) + { + int N0; + int max_bits; + N0 = m->eBands[j+1]-m->eBands[j]; + /* N=1 bands only have a sign bit and fine bits. */ + if (N0<<i == 1) + max_bits = C*(1+MAX_FINE_BITS)<<BITRES; + else + { + const unsigned char *pcache; + opus_int32 num; + opus_int32 den; + int LM0; + int N; + int offset; + int ndof; + int qb; + int k; + LM0 = 0; + /* Even-sized bands bigger than N=2 can be split one more time. + As of commit 44203907 all bands >1 are even, including custom modes.*/ + if (N0 > 2) + { + N0>>=1; + LM0--; + } + /* N0=1 bands can't be split down to N<2. */ + else if (N0 <= 1) + { + LM0=IMIN(i,1); + N0<<=LM0; + } + /* Compute the cost for the lowest-level PVQ of a fully split + band. */ + pcache = bits + cindex[(LM0+1)*m->nbEBands+j]; + max_bits = pcache[pcache[0]]+1; + /* Add in the cost of coding regular splits. */ + N = N0; + for(k=0;k<i-LM0;k++){ + max_bits <<= 1; + /* Offset the number of qtheta bits by log2(N)/2 + + QTHETA_OFFSET compared to their "fair share" of + total/N */ + offset = ((m->logN[j]+((LM0+k)<<BITRES))>>1)-QTHETA_OFFSET; + /* The number of qtheta bits we'll allocate if the remainder + is to be max_bits. + The average measured cost for theta is 0.89701 times qb, + approximated here as 459/512. */ + num=459*(opus_int32)((2*N-1)*offset+max_bits); + den=((opus_int32)(2*N-1)<<9)-459; + qb = IMIN((num+(den>>1))/den, 57); + celt_assert(qb >= 0); + max_bits += qb; + N <<= 1; + } + /* Add in the cost of a stereo split, if necessary. */ + if (C==2) + { + max_bits <<= 1; + offset = ((m->logN[j]+(i<<BITRES))>>1)-(N==2?QTHETA_OFFSET_TWOPHASE:QTHETA_OFFSET); + ndof = 2*N-1-(N==2); + /* The average measured cost for theta with the step PDF is + 0.95164 times qb, approximated here as 487/512. */ + num = (N==2?512:487)*(opus_int32)(max_bits+ndof*offset); + den = ((opus_int32)ndof<<9)-(N==2?512:487); + qb = IMIN((num+(den>>1))/den, (N==2?64:61)); + celt_assert(qb >= 0); + max_bits += qb; + } + /* Add the fine bits we'll use. */ + /* Compensate for the extra DoF in stereo */ + ndof = C*N + ((C==2 && N>2) ? 1 : 0); + /* Offset the number of fine bits by log2(N)/2 + FINE_OFFSET + compared to their "fair share" of total/N */ + offset = ((m->logN[j] + (i<<BITRES))>>1)-FINE_OFFSET; + /* N=2 is the only point that doesn't match the curve */ + if (N==2) + offset += 1<<BITRES>>2; + /* The number of fine bits we'll allocate if the remainder is + to be max_bits. */ + num = max_bits+ndof*offset; + den = (ndof-1)<<BITRES; + qb = IMIN((num+(den>>1))/den, MAX_FINE_BITS); + celt_assert(qb >= 0); + max_bits += C*qb<<BITRES; + } + max_bits = (4*max_bits/(C*((m->eBands[j+1]-m->eBands[j])<<i)))-64; + celt_assert(max_bits >= 0); + celt_assert(max_bits < 256); + *cap++ = (unsigned char)max_bits; + } + } + } +} + +#endif /* CUSTOM_MODES */ + +#define ALLOC_STEPS 6 + +static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end, int skip_start, + const int *bits1, const int *bits2, const int *thresh, const int *cap, opus_int32 total, opus_int32 *_balance, + int skip_rsv, int *intensity, int intensity_rsv, int *dual_stereo, int dual_stereo_rsv, int *bits, + int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth) +{ + opus_int32 psum; + int lo, hi; + int i, j; + int logM; + int stereo; + int codedBands=-1; + int alloc_floor; + opus_int32 left, percoeff; + int done; + opus_int32 balance; + SAVE_STACK; + + alloc_floor = C<<BITRES; + stereo = C>1; + + logM = LM<<BITRES; + lo = 0; + hi = 1<<ALLOC_STEPS; + for (i=0;i<ALLOC_STEPS;i++) + { + int mid = (lo+hi)>>1; + psum = 0; + done = 0; + for (j=end;j-->start;) + { + int tmp = bits1[j] + (mid*(opus_int32)bits2[j]>>ALLOC_STEPS); + if (tmp >= thresh[j] || done) + { + done = 1; + /* Don't allocate more than we can actually use */ + psum += IMIN(tmp, cap[j]); + } else { + if (tmp >= alloc_floor) + psum += alloc_floor; + } + } + if (psum > total) + hi = mid; + else + lo = mid; + } + psum = 0; + /*printf ("interp bisection gave %d\n", lo);*/ + done = 0; + for (j=end;j-->start;) + { + int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS); + if (tmp < thresh[j] && !done) + { + if (tmp >= alloc_floor) + tmp = alloc_floor; + else + tmp = 0; + } else + done = 1; + /* Don't allocate more than we can actually use */ + tmp = IMIN(tmp, cap[j]); + bits[j] = tmp; + psum += tmp; + } + + /* Decide which bands to skip, working backwards from the end. */ + for (codedBands=end;;codedBands--) + { + int band_width; + int band_bits; + int rem; + j = codedBands-1; + /* Never skip the first band, nor a band that has been boosted by + dynalloc. + In the first case, we'd be coding a bit to signal we're going to waste + all the other bits. + In the second case, we'd be coding a bit to redistribute all the bits + we just signaled should be cocentrated in this band. */ + if (j<=skip_start) + { + /* Give the bit we reserved to end skipping back. */ + total += skip_rsv; + break; + } + /*Figure out how many left-over bits we would be adding to this band. + This can include bits we've stolen back from higher, skipped bands.*/ + left = total-psum; + percoeff = left/(m->eBands[codedBands]-m->eBands[start]); + left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; + rem = IMAX(left-(m->eBands[j]-m->eBands[start]),0); + band_width = m->eBands[codedBands]-m->eBands[j]; + band_bits = (int)(bits[j] + percoeff*band_width + rem); + /*Only code a skip decision if we're above the threshold for this band. + Otherwise it is force-skipped. + This ensures that we have enough bits to code the skip flag.*/ + if (band_bits >= IMAX(thresh[j], alloc_floor+(1<<BITRES))) + { + if (encode) + { + /*This if() block is the only part of the allocation function that + is not a mandatory part of the bitstream: any bands we choose to + skip here must be explicitly signaled.*/ + /*Choose a threshold with some hysteresis to keep bands from + fluctuating in and out.*/ +#ifdef FUZZING + if ((rand()&0x1) == 0) +#else + if (codedBands<=start+2 || (band_bits > ((j<prev?7:9)*band_width<<LM<<BITRES)>>4 && j<=signalBandwidth)) +#endif + { + ec_enc_bit_logp(ec, 1, 1); + break; + } + ec_enc_bit_logp(ec, 0, 1); + } else if (ec_dec_bit_logp(ec, 1)) { + break; + } + /*We used a bit to skip this band.*/ + psum += 1<<BITRES; + band_bits -= 1<<BITRES; + } + /*Reclaim the bits originally allocated to this band.*/ + psum -= bits[j]+intensity_rsv; + if (intensity_rsv > 0) + intensity_rsv = LOG2_FRAC_TABLE[j-start]; + psum += intensity_rsv; + if (band_bits >= alloc_floor) + { + /*If we have enough for a fine energy bit per channel, use it.*/ + psum += alloc_floor; + bits[j] = alloc_floor; + } else { + /*Otherwise this band gets nothing at all.*/ + bits[j] = 0; + } + } + + celt_assert(codedBands > start); + /* Code the intensity and dual stereo parameters. */ + if (intensity_rsv > 0) + { + if (encode) + { + *intensity = IMIN(*intensity, codedBands); + ec_enc_uint(ec, *intensity-start, codedBands+1-start); + } + else + *intensity = start+ec_dec_uint(ec, codedBands+1-start); + } + else + *intensity = 0; + if (*intensity <= start) + { + total += dual_stereo_rsv; + dual_stereo_rsv = 0; + } + if (dual_stereo_rsv > 0) + { + if (encode) + ec_enc_bit_logp(ec, *dual_stereo, 1); + else + *dual_stereo = ec_dec_bit_logp(ec, 1); + } + else + *dual_stereo = 0; + + /* Allocate the remaining bits */ + left = total-psum; + percoeff = left/(m->eBands[codedBands]-m->eBands[start]); + left -= (m->eBands[codedBands]-m->eBands[start])*percoeff; + for (j=start;j<codedBands;j++) + bits[j] += ((int)percoeff*(m->eBands[j+1]-m->eBands[j])); + for (j=start;j<codedBands;j++) + { + int tmp = (int)IMIN(left, m->eBands[j+1]-m->eBands[j]); + bits[j] += tmp; + left -= tmp; + } + /*for (j=0;j<end;j++)printf("%d ", bits[j]);printf("\n");*/ + + balance = 0; + for (j=start;j<codedBands;j++) + { + int N0, N, den; + int offset; + int NClogN; + opus_int32 excess, bit; + + celt_assert(bits[j] >= 0); + N0 = m->eBands[j+1]-m->eBands[j]; + N=N0<<LM; + bit = (opus_int32)bits[j]+balance; + + if (N>1) + { + excess = MAX32(bit-cap[j],0); + bits[j] = bit-excess; + + /* Compensate for the extra DoF in stereo */ + den=(C*N+ ((C==2 && N>2 && !*dual_stereo && j<*intensity) ? 1 : 0)); + + NClogN = den*(m->logN[j] + logM); + + /* Offset for the number of fine bits by log2(N)/2 + FINE_OFFSET + compared to their "fair share" of total/N */ + offset = (NClogN>>1)-den*FINE_OFFSET; + + /* N=2 is the only point that doesn't match the curve */ + if (N==2) + offset += den<<BITRES>>2; + + /* Changing the offset for allocating the second and third + fine energy bit */ + if (bits[j] + offset < den*2<<BITRES) + offset += NClogN>>2; + else if (bits[j] + offset < den*3<<BITRES) + offset += NClogN>>3; + + /* Divide with rounding */ + ebits[j] = IMAX(0, (bits[j] + offset + (den<<(BITRES-1))) / (den<<BITRES)); + + /* Make sure not to bust */ + if (C*ebits[j] > (bits[j]>>BITRES)) + ebits[j] = bits[j] >> stereo >> BITRES; + + /* More than that is useless because that's about as far as PVQ can go */ + ebits[j] = IMIN(ebits[j], MAX_FINE_BITS); + + /* If we rounded down or capped this band, make it a candidate for the + final fine energy pass */ + fine_priority[j] = ebits[j]*(den<<BITRES) >= bits[j]+offset; + + /* Remove the allocated fine bits; the rest are assigned to PVQ */ + bits[j] -= C*ebits[j]<<BITRES; + + } else { + /* For N=1, all bits go to fine energy except for a single sign bit */ + excess = MAX32(0,bit-(C<<BITRES)); + bits[j] = bit-excess; + ebits[j] = 0; + fine_priority[j] = 1; + } + + /* Fine energy can't take advantage of the re-balancing in + quant_all_bands(). + Instead, do the re-balancing here.*/ + if(excess > 0) + { + int extra_fine; + int extra_bits; + extra_fine = IMIN(excess>>(stereo+BITRES),MAX_FINE_BITS-ebits[j]); + ebits[j] += extra_fine; + extra_bits = extra_fine*C<<BITRES; + fine_priority[j] = extra_bits >= excess-balance; + excess -= extra_bits; + } + balance = excess; + + celt_assert(bits[j] >= 0); + celt_assert(ebits[j] >= 0); + } + /* Save any remaining bits over the cap for the rebalancing in + quant_all_bands(). */ + *_balance = balance; + + /* The skipped bands use all their bits for fine energy. */ + for (;j<end;j++) + { + ebits[j] = bits[j] >> stereo >> BITRES; + celt_assert(C*ebits[j]<<BITRES == bits[j]); + bits[j] = 0; + fine_priority[j] = ebits[j]<1; + } + RESTORE_STACK; + return codedBands; +} + +int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stereo, + opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth) +{ + int lo, hi, len, j; + int codedBands; + int skip_start; + int skip_rsv; + int intensity_rsv; + int dual_stereo_rsv; + VARDECL(int, bits1); + VARDECL(int, bits2); + VARDECL(int, thresh); + VARDECL(int, trim_offset); + SAVE_STACK; + + total = IMAX(total, 0); + len = m->nbEBands; + skip_start = start; + /* Reserve a bit to signal the end of manually skipped bands. */ + skip_rsv = total >= 1<<BITRES ? 1<<BITRES : 0; + total -= skip_rsv; + /* Reserve bits for the intensity and dual stereo parameters. */ + intensity_rsv = dual_stereo_rsv = 0; + if (C==2) + { + intensity_rsv = LOG2_FRAC_TABLE[end-start]; + if (intensity_rsv>total) + intensity_rsv = 0; + else + { + total -= intensity_rsv; + dual_stereo_rsv = total>=1<<BITRES ? 1<<BITRES : 0; + total -= dual_stereo_rsv; + } + } + ALLOC(bits1, len, int); + ALLOC(bits2, len, int); + ALLOC(thresh, len, int); + ALLOC(trim_offset, len, int); + + for (j=start;j<end;j++) + { + /* Below this threshold, we're sure not to allocate any PVQ bits */ + thresh[j] = IMAX((C)<<BITRES, (3*(m->eBands[j+1]-m->eBands[j])<<LM<<BITRES)>>4); + /* Tilt of the allocation curve */ + trim_offset[j] = C*(m->eBands[j+1]-m->eBands[j])*(alloc_trim-5-LM)*(end-j-1) + *(1<<(LM+BITRES))>>6; + /* Giving less resolution to single-coefficient bands because they get + more benefit from having one coarse value per coefficient*/ + if ((m->eBands[j+1]-m->eBands[j])<<LM==1) + trim_offset[j] -= C<<BITRES; + } + lo = 1; + hi = m->nbAllocVectors - 1; + do + { + int done = 0; + int psum = 0; + int mid = (lo+hi) >> 1; + for (j=end;j-->start;) + { + int bitsj; + int N = m->eBands[j+1]-m->eBands[j]; + bitsj = C*N*m->allocVectors[mid*len+j]<<LM>>2; + if (bitsj > 0) + bitsj = IMAX(0, bitsj + trim_offset[j]); + bitsj += offsets[j]; + if (bitsj >= thresh[j] || done) + { + done = 1; + /* Don't allocate more than we can actually use */ + psum += IMIN(bitsj, cap[j]); + } else { + if (bitsj >= C<<BITRES) + psum += C<<BITRES; + } + } + if (psum > total) + hi = mid - 1; + else + lo = mid + 1; + /*printf ("lo = %d, hi = %d\n", lo, hi);*/ + } + while (lo <= hi); + hi = lo--; + /*printf ("interp between %d and %d\n", lo, hi);*/ + for (j=start;j<end;j++) + { + int bits1j, bits2j; + int N = m->eBands[j+1]-m->eBands[j]; + bits1j = C*N*m->allocVectors[lo*len+j]<<LM>>2; + bits2j = hi>=m->nbAllocVectors ? + cap[j] : C*N*m->allocVectors[hi*len+j]<<LM>>2; + if (bits1j > 0) + bits1j = IMAX(0, bits1j + trim_offset[j]); + if (bits2j > 0) + bits2j = IMAX(0, bits2j + trim_offset[j]); + if (lo > 0) + bits1j += offsets[j]; + bits2j += offsets[j]; + if (offsets[j]>0) + skip_start = j; + bits2j = IMAX(0,bits2j-bits1j); + bits1[j] = bits1j; + bits2[j] = bits2j; + } + codedBands = interp_bits2pulses(m, start, end, skip_start, bits1, bits2, thresh, cap, + total, balance, skip_rsv, intensity, intensity_rsv, dual_stereo, dual_stereo_rsv, + pulses, ebits, fine_priority, C, LM, ec, encode, prev, signalBandwidth); + RESTORE_STACK; + return codedBands; +} + diff --git a/drivers/opus/celt/rate.h b/drivers/opus/celt/rate.h new file mode 100644 index 0000000000..7ced23ea09 --- /dev/null +++ b/drivers/opus/celt/rate.h @@ -0,0 +1,101 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef RATE_H +#define RATE_H + +#define MAX_PSEUDO 40 +#define LOG_MAX_PSEUDO 6 + +#define MAX_PULSES 128 + +#define MAX_FINE_BITS 8 + +#define FINE_OFFSET 21 +#define QTHETA_OFFSET 4 +#define QTHETA_OFFSET_TWOPHASE 16 + +#include "cwrs.h" +#include "opus_modes.h" + +void compute_pulse_cache(CELTMode *m, int LM); + +static OPUS_INLINE int get_pulses(int i) +{ + return i<8 ? i : (8 + (i&7)) << ((i>>3)-1); +} + +static OPUS_INLINE int bits2pulses(const CELTMode *m, int band, int LM, int bits) +{ + int i; + int lo, hi; + const unsigned char *cache; + + LM++; + cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band]; + + lo = 0; + hi = cache[0]; + bits--; + for (i=0;i<LOG_MAX_PSEUDO;i++) + { + int mid = (lo+hi+1)>>1; + /* OPT: Make sure this is implemented with a conditional move */ + if ((int)cache[mid] >= bits) + hi = mid; + else + lo = mid; + } + if (bits- (lo == 0 ? -1 : (int)cache[lo]) <= (int)cache[hi]-bits) + return lo; + else + return hi; +} + +static OPUS_INLINE int pulses2bits(const CELTMode *m, int band, int LM, int pulses) +{ + const unsigned char *cache; + + LM++; + cache = m->cache.bits + m->cache.index[LM*m->nbEBands+band]; + return pulses == 0 ? 0 : cache[pulses]+1; +} + +/** Compute the pulse allocation, i.e. how many pulses will go in each + * band. + @param m mode + @param offsets Requested increase or decrease in the number of bits for + each band + @param total Number of bands + @param pulses Number of pulses per band (returned) + @return Total number of bits allocated +*/ +int compute_allocation(const CELTMode *m, int start, int end, const int *offsets, const int *cap, int alloc_trim, int *intensity, int *dual_stero, + opus_int32 total, opus_int32 *balance, int *pulses, int *ebits, int *fine_priority, int C, int LM, ec_ctx *ec, int encode, int prev, int signalBandwidth); + +#endif diff --git a/drivers/opus/celt/stack_alloc.h b/drivers/opus/celt/stack_alloc.h new file mode 100644 index 0000000000..d500c4dab9 --- /dev/null +++ b/drivers/opus/celt/stack_alloc.h @@ -0,0 +1,182 @@ +/* Copyright (C) 2002-2003 Jean-Marc Valin + Copyright (C) 2007-2009 Xiph.Org Foundation */ +/** + @file stack_alloc.h + @brief Temporary memory allocation on stack +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef STACK_ALLOC_H +#define STACK_ALLOC_H + +#include "opus_types.h" +#include "opus_defines.h" + +#if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK)) +#define VAR_ARRAYS +#endif + +#ifdef USE_ALLOCA +# ifdef WIN32 +# include <malloc.h> +# else +# ifdef OPUS_HAVE_ALLOCA_H +# include <alloca.h> +# else +# ifdef __linux__ +# include <alloca.h> +# else +# include <stdlib.h> +# endif +# endif +# endif +#endif + +/** + * @def ALIGN(stack, size) + * + * Aligns the stack to a 'size' boundary + * + * @param stack Stack + * @param size New size boundary + */ + +/** + * @def PUSH(stack, size, type) + * + * Allocates 'size' elements of type 'type' on the stack + * + * @param stack Stack + * @param size Number of elements + * @param type Type of element + */ + +/** + * @def VARDECL(var) + * + * Declare variable on stack + * + * @param var Variable to declare + */ + +/** + * @def ALLOC(var, size, type) + * + * Allocate 'size' elements of 'type' on stack + * + * @param var Name of variable to allocate + * @param size Number of elements + * @param type Type of element + */ + +#if defined(VAR_ARRAYS) + +#define VARDECL(type, var) +#define ALLOC(var, size, type) type var[size] +#define SAVE_STACK +#define RESTORE_STACK +#define ALLOC_STACK +/* C99 does not allow VLAs of size zero */ +#define ALLOC_NONE 1 + +#elif defined(USE_ALLOCA) + +#define VARDECL(type, var) type *var + +# ifdef WIN32 +# define ALLOC(var, size, type) var = ((type*)_alloca(sizeof(type)*(size))) +# else +# define ALLOC(var, size, type) var = ((type*)alloca(sizeof(type)*(size))) +# endif + +#define SAVE_STACK +#define RESTORE_STACK +#define ALLOC_STACK +#define ALLOC_NONE 0 + +#else + +#ifdef CELT_C +char *global_stack=0; +#else +extern char *global_stack; +#endif /* CELT_C */ + +#ifdef ENABLE_VALGRIND + +#include <valgrind/memcheck.h> + +#ifdef CELT_C +char *global_stack_top=0; +#else +extern char *global_stack_top; +#endif /* CELT_C */ + +#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) +#define PUSH(stack, size, type) (VALGRIND_MAKE_MEM_NOACCESS(stack, global_stack_top-stack),ALIGN((stack),sizeof(type)/sizeof(char)),VALGRIND_MAKE_MEM_UNDEFINED(stack, ((size)*sizeof(type)/sizeof(char))),(stack)+=(2*(size)*sizeof(type)/sizeof(char)),(type*)((stack)-(2*(size)*sizeof(type)/sizeof(char)))) +#define RESTORE_STACK ((global_stack = _saved_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack)) +#define ALLOC_STACK char *_saved_stack; ((global_stack = (global_stack==0) ? ((global_stack_top=opus_alloc_scratch(GLOBAL_STACK_SIZE*2)+(GLOBAL_STACK_SIZE*2))-(GLOBAL_STACK_SIZE*2)) : global_stack),VALGRIND_MAKE_MEM_NOACCESS(global_stack, global_stack_top-global_stack)); _saved_stack = global_stack; + +#else + +#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1)) +#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char)))) +#define RESTORE_STACK (global_stack = _saved_stack) +#define ALLOC_STACK char *_saved_stack; (global_stack = (global_stack==0) ? opus_alloc_scratch(GLOBAL_STACK_SIZE) : global_stack); _saved_stack = global_stack; + +#endif /* ENABLE_VALGRIND */ + +#include "os_support.h" +#define VARDECL(type, var) type *var +#define ALLOC(var, size, type) var = PUSH(global_stack, size, type) +#define SAVE_STACK char *_saved_stack = global_stack; +#define ALLOC_NONE 0 + +#endif /* VAR_ARRAYS */ + + +#ifdef ENABLE_VALGRIND + +#include <valgrind/memcheck.h> +#define OPUS_CHECK_ARRAY(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr)) +#define OPUS_CHECK_VALUE(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value) +#define OPUS_CHECK_ARRAY_COND(ptr, len) VALGRIND_CHECK_MEM_IS_DEFINED(ptr, len*sizeof(*ptr)) +#define OPUS_CHECK_VALUE_COND(value) VALGRIND_CHECK_VALUE_IS_DEFINED(value) +#define OPUS_PRINT_INT(value) do {fprintf(stderr, #value " = %d at %s:%d\n", value, __FILE__, __LINE__);}while(0) +#define OPUS_FPRINTF fprintf + +#else + +static OPUS_INLINE int _opus_false(void) {return 0;} +#define OPUS_CHECK_ARRAY(ptr, len) _opus_false() +#define OPUS_CHECK_VALUE(value) _opus_false() +#define OPUS_PRINT_INT(value) do{}while(0) +#define OPUS_FPRINTF (void) + +#endif + + +#endif /* STACK_ALLOC_H */ diff --git a/drivers/opus/celt/static_modes_fixed.h b/drivers/opus/celt/static_modes_fixed.h new file mode 100644 index 0000000000..d23e2a66f5 --- /dev/null +++ b/drivers/opus/celt/static_modes_fixed.h @@ -0,0 +1,595 @@ +/* The contents of this file was automatically generated by dump_modes.c + with arguments: 48000 960 + It contains static definitions for some pre-defined modes. */ +#include "opus_modes.h" +#include "rate.h" + +#ifndef DEF_WINDOW120 +#define DEF_WINDOW120 +static const opus_val16 window120[120] = { +2, 20, 55, 108, 178, +266, 372, 494, 635, 792, +966, 1157, 1365, 1590, 1831, +2089, 2362, 2651, 2956, 3276, +3611, 3961, 4325, 4703, 5094, +5499, 5916, 6346, 6788, 7241, +7705, 8179, 8663, 9156, 9657, +10167, 10684, 11207, 11736, 12271, +12810, 13353, 13899, 14447, 14997, +15547, 16098, 16648, 17197, 17744, +18287, 18827, 19363, 19893, 20418, +20936, 21447, 21950, 22445, 22931, +23407, 23874, 24330, 24774, 25208, +25629, 26039, 26435, 26819, 27190, +27548, 27893, 28224, 28541, 28845, +29135, 29411, 29674, 29924, 30160, +30384, 30594, 30792, 30977, 31151, +31313, 31463, 31602, 31731, 31849, +31958, 32057, 32148, 32229, 32303, +32370, 32429, 32481, 32528, 32568, +32604, 32634, 32661, 32683, 32701, +32717, 32729, 32740, 32748, 32754, +32758, 32762, 32764, 32766, 32767, +32767, 32767, 32767, 32767, 32767, +}; +#endif + +#ifndef DEF_LOGN400 +#define DEF_LOGN400 +static const opus_int16 logN400[21] = { +0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, }; +#endif + +#ifndef DEF_PULSE_CACHE50 +#define DEF_PULSE_CACHE50 +static const opus_int16 cache_index50[105] = { +-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41, +82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41, +41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41, +41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305, +318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240, +305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240, +240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387, +}; +static const unsigned char cache_bits50[392] = { +40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28, +31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50, +51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65, +66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61, +64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92, +94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123, +124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94, +97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139, +142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35, +28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149, +153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225, +229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157, +166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63, +86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250, +25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180, +185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89, +110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41, +74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138, +163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214, +228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49, +90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47, +87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57, +106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187, +224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127, +182, 234, }; +static const unsigned char cache_caps50[168] = { +224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185, +178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240, +240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160, +160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172, +138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207, +204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185, +185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39, +207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201, +188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193, +193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204, +204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175, +140, 66, 40, }; +#endif + +#ifndef FFT_TWIDDLES48000_960 +#define FFT_TWIDDLES48000_960 +static const kiss_twiddle_cpx fft_twiddles48000_960[480] = { +{32767, 0}, {32766, -429}, +{32757, -858}, {32743, -1287}, +{32724, -1715}, {32698, -2143}, +{32667, -2570}, {32631, -2998}, +{32588, -3425}, {32541, -3851}, +{32488, -4277}, {32429, -4701}, +{32364, -5125}, {32295, -5548}, +{32219, -5971}, {32138, -6393}, +{32051, -6813}, {31960, -7231}, +{31863, -7650}, {31760, -8067}, +{31652, -8481}, {31539, -8895}, +{31419, -9306}, {31294, -9716}, +{31165, -10126}, {31030, -10532}, +{30889, -10937}, {30743, -11340}, +{30592, -11741}, {30436, -12141}, +{30274, -12540}, {30107, -12935}, +{29936, -13328}, {29758, -13718}, +{29577, -14107}, {29390, -14493}, +{29197, -14875}, {29000, -15257}, +{28797, -15635}, {28590, -16010}, +{28379, -16384}, {28162, -16753}, +{27940, -17119}, {27714, -17484}, +{27482, -17845}, {27246, -18205}, +{27006, -18560}, {26760, -18911}, +{26510, -19260}, {26257, -19606}, +{25997, -19947}, {25734, -20286}, +{25466, -20621}, {25194, -20952}, +{24918, -21281}, {24637, -21605}, +{24353, -21926}, {24063, -22242}, +{23770, -22555}, {23473, -22865}, +{23171, -23171}, {22866, -23472}, +{22557, -23769}, {22244, -24063}, +{21927, -24352}, {21606, -24636}, +{21282, -24917}, {20954, -25194}, +{20622, -25465}, {20288, -25733}, +{19949, -25997}, {19607, -26255}, +{19261, -26509}, {18914, -26760}, +{18561, -27004}, {18205, -27246}, +{17846, -27481}, {17485, -27713}, +{17122, -27940}, {16755, -28162}, +{16385, -28378}, {16012, -28590}, +{15636, -28797}, {15258, -28999}, +{14878, -29197}, {14494, -29389}, +{14108, -29576}, {13720, -29757}, +{13329, -29934}, {12937, -30107}, +{12540, -30274}, {12142, -30435}, +{11744, -30592}, {11342, -30743}, +{10939, -30889}, {10534, -31030}, +{10127, -31164}, {9718, -31294}, +{9307, -31418}, {8895, -31537}, +{8482, -31652}, {8067, -31759}, +{7650, -31862}, {7233, -31960}, +{6815, -32051}, {6393, -32138}, +{5973, -32219}, {5549, -32294}, +{5127, -32364}, {4703, -32429}, +{4278, -32487}, {3852, -32541}, +{3426, -32588}, {2999, -32630}, +{2572, -32667}, {2144, -32698}, +{1716, -32724}, {1287, -32742}, +{860, -32757}, {430, -32766}, +{0, -32767}, {-429, -32766}, +{-858, -32757}, {-1287, -32743}, +{-1715, -32724}, {-2143, -32698}, +{-2570, -32667}, {-2998, -32631}, +{-3425, -32588}, {-3851, -32541}, +{-4277, -32488}, {-4701, -32429}, +{-5125, -32364}, {-5548, -32295}, +{-5971, -32219}, {-6393, -32138}, +{-6813, -32051}, {-7231, -31960}, +{-7650, -31863}, {-8067, -31760}, +{-8481, -31652}, {-8895, -31539}, +{-9306, -31419}, {-9716, -31294}, +{-10126, -31165}, {-10532, -31030}, +{-10937, -30889}, {-11340, -30743}, +{-11741, -30592}, {-12141, -30436}, +{-12540, -30274}, {-12935, -30107}, +{-13328, -29936}, {-13718, -29758}, +{-14107, -29577}, {-14493, -29390}, +{-14875, -29197}, {-15257, -29000}, +{-15635, -28797}, {-16010, -28590}, +{-16384, -28379}, {-16753, -28162}, +{-17119, -27940}, {-17484, -27714}, +{-17845, -27482}, {-18205, -27246}, +{-18560, -27006}, {-18911, -26760}, +{-19260, -26510}, {-19606, -26257}, +{-19947, -25997}, {-20286, -25734}, +{-20621, -25466}, {-20952, -25194}, +{-21281, -24918}, {-21605, -24637}, +{-21926, -24353}, {-22242, -24063}, +{-22555, -23770}, {-22865, -23473}, +{-23171, -23171}, {-23472, -22866}, +{-23769, -22557}, {-24063, -22244}, +{-24352, -21927}, {-24636, -21606}, +{-24917, -21282}, {-25194, -20954}, +{-25465, -20622}, {-25733, -20288}, +{-25997, -19949}, {-26255, -19607}, +{-26509, -19261}, {-26760, -18914}, +{-27004, -18561}, {-27246, -18205}, +{-27481, -17846}, {-27713, -17485}, +{-27940, -17122}, {-28162, -16755}, +{-28378, -16385}, {-28590, -16012}, +{-28797, -15636}, {-28999, -15258}, +{-29197, -14878}, {-29389, -14494}, +{-29576, -14108}, {-29757, -13720}, +{-29934, -13329}, {-30107, -12937}, +{-30274, -12540}, {-30435, -12142}, +{-30592, -11744}, {-30743, -11342}, +{-30889, -10939}, {-31030, -10534}, +{-31164, -10127}, {-31294, -9718}, +{-31418, -9307}, {-31537, -8895}, +{-31652, -8482}, {-31759, -8067}, +{-31862, -7650}, {-31960, -7233}, +{-32051, -6815}, {-32138, -6393}, +{-32219, -5973}, {-32294, -5549}, +{-32364, -5127}, {-32429, -4703}, +{-32487, -4278}, {-32541, -3852}, +{-32588, -3426}, {-32630, -2999}, +{-32667, -2572}, {-32698, -2144}, +{-32724, -1716}, {-32742, -1287}, +{-32757, -860}, {-32766, -430}, +{-32767, 0}, {-32766, 429}, +{-32757, 858}, {-32743, 1287}, +{-32724, 1715}, {-32698, 2143}, +{-32667, 2570}, {-32631, 2998}, +{-32588, 3425}, {-32541, 3851}, +{-32488, 4277}, {-32429, 4701}, +{-32364, 5125}, {-32295, 5548}, +{-32219, 5971}, {-32138, 6393}, +{-32051, 6813}, {-31960, 7231}, +{-31863, 7650}, {-31760, 8067}, +{-31652, 8481}, {-31539, 8895}, +{-31419, 9306}, {-31294, 9716}, +{-31165, 10126}, {-31030, 10532}, +{-30889, 10937}, {-30743, 11340}, +{-30592, 11741}, {-30436, 12141}, +{-30274, 12540}, {-30107, 12935}, +{-29936, 13328}, {-29758, 13718}, +{-29577, 14107}, {-29390, 14493}, +{-29197, 14875}, {-29000, 15257}, +{-28797, 15635}, {-28590, 16010}, +{-28379, 16384}, {-28162, 16753}, +{-27940, 17119}, {-27714, 17484}, +{-27482, 17845}, {-27246, 18205}, +{-27006, 18560}, {-26760, 18911}, +{-26510, 19260}, {-26257, 19606}, +{-25997, 19947}, {-25734, 20286}, +{-25466, 20621}, {-25194, 20952}, +{-24918, 21281}, {-24637, 21605}, +{-24353, 21926}, {-24063, 22242}, +{-23770, 22555}, {-23473, 22865}, +{-23171, 23171}, {-22866, 23472}, +{-22557, 23769}, {-22244, 24063}, +{-21927, 24352}, {-21606, 24636}, +{-21282, 24917}, {-20954, 25194}, +{-20622, 25465}, {-20288, 25733}, +{-19949, 25997}, {-19607, 26255}, +{-19261, 26509}, {-18914, 26760}, +{-18561, 27004}, {-18205, 27246}, +{-17846, 27481}, {-17485, 27713}, +{-17122, 27940}, {-16755, 28162}, +{-16385, 28378}, {-16012, 28590}, +{-15636, 28797}, {-15258, 28999}, +{-14878, 29197}, {-14494, 29389}, +{-14108, 29576}, {-13720, 29757}, +{-13329, 29934}, {-12937, 30107}, +{-12540, 30274}, {-12142, 30435}, +{-11744, 30592}, {-11342, 30743}, +{-10939, 30889}, {-10534, 31030}, +{-10127, 31164}, {-9718, 31294}, +{-9307, 31418}, {-8895, 31537}, +{-8482, 31652}, {-8067, 31759}, +{-7650, 31862}, {-7233, 31960}, +{-6815, 32051}, {-6393, 32138}, +{-5973, 32219}, {-5549, 32294}, +{-5127, 32364}, {-4703, 32429}, +{-4278, 32487}, {-3852, 32541}, +{-3426, 32588}, {-2999, 32630}, +{-2572, 32667}, {-2144, 32698}, +{-1716, 32724}, {-1287, 32742}, +{-860, 32757}, {-430, 32766}, +{0, 32767}, {429, 32766}, +{858, 32757}, {1287, 32743}, +{1715, 32724}, {2143, 32698}, +{2570, 32667}, {2998, 32631}, +{3425, 32588}, {3851, 32541}, +{4277, 32488}, {4701, 32429}, +{5125, 32364}, {5548, 32295}, +{5971, 32219}, {6393, 32138}, +{6813, 32051}, {7231, 31960}, +{7650, 31863}, {8067, 31760}, +{8481, 31652}, {8895, 31539}, +{9306, 31419}, {9716, 31294}, +{10126, 31165}, {10532, 31030}, +{10937, 30889}, {11340, 30743}, +{11741, 30592}, {12141, 30436}, +{12540, 30274}, {12935, 30107}, +{13328, 29936}, {13718, 29758}, +{14107, 29577}, {14493, 29390}, +{14875, 29197}, {15257, 29000}, +{15635, 28797}, {16010, 28590}, +{16384, 28379}, {16753, 28162}, +{17119, 27940}, {17484, 27714}, +{17845, 27482}, {18205, 27246}, +{18560, 27006}, {18911, 26760}, +{19260, 26510}, {19606, 26257}, +{19947, 25997}, {20286, 25734}, +{20621, 25466}, {20952, 25194}, +{21281, 24918}, {21605, 24637}, +{21926, 24353}, {22242, 24063}, +{22555, 23770}, {22865, 23473}, +{23171, 23171}, {23472, 22866}, +{23769, 22557}, {24063, 22244}, +{24352, 21927}, {24636, 21606}, +{24917, 21282}, {25194, 20954}, +{25465, 20622}, {25733, 20288}, +{25997, 19949}, {26255, 19607}, +{26509, 19261}, {26760, 18914}, +{27004, 18561}, {27246, 18205}, +{27481, 17846}, {27713, 17485}, +{27940, 17122}, {28162, 16755}, +{28378, 16385}, {28590, 16012}, +{28797, 15636}, {28999, 15258}, +{29197, 14878}, {29389, 14494}, +{29576, 14108}, {29757, 13720}, +{29934, 13329}, {30107, 12937}, +{30274, 12540}, {30435, 12142}, +{30592, 11744}, {30743, 11342}, +{30889, 10939}, {31030, 10534}, +{31164, 10127}, {31294, 9718}, +{31418, 9307}, {31537, 8895}, +{31652, 8482}, {31759, 8067}, +{31862, 7650}, {31960, 7233}, +{32051, 6815}, {32138, 6393}, +{32219, 5973}, {32294, 5549}, +{32364, 5127}, {32429, 4703}, +{32487, 4278}, {32541, 3852}, +{32588, 3426}, {32630, 2999}, +{32667, 2572}, {32698, 2144}, +{32724, 1716}, {32742, 1287}, +{32757, 860}, {32766, 430}, +}; +#ifndef FFT_BITREV480 +#define FFT_BITREV480 +static const opus_int16 fft_bitrev480[480] = { +0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, +450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, +345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, +215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, +110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, +430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, +325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, +181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, +76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, +396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, +291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, +161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, +56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, +362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, +257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, +127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, +22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, +472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, +342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, +237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, +93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, +438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, +308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, +203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, +73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, +418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, +274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, +169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, +39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, +384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, +254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, +149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, +}; +#endif + +#ifndef FFT_BITREV240 +#define FFT_BITREV240 +static const opus_int16 fft_bitrev240[240] = { +0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, +225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, +170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, +115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, +46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, +216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, +161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, +92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, +37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, +207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, +138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, +83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, +28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, +184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, +129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, +74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, +}; +#endif + +#ifndef FFT_BITREV120 +#define FFT_BITREV120 +static const opus_int16 fft_bitrev120[120] = { +0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, +110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, +76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, +56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, +22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, +93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, +73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, +39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, +}; +#endif + +#ifndef FFT_BITREV60 +#define FFT_BITREV60 +static const opus_int16 fft_bitrev60[60] = { +0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, +46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, +37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, +28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, +}; +#endif + +#ifndef FFT_STATE48000_960_0 +#define FFT_STATE48000_960_0 +static const kiss_fft_state fft_state48000_960_0 = { +480, /* nfft */ +-1, /* shift */ +{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ +fft_bitrev480, /* bitrev */ +fft_twiddles48000_960, /* bitrev */ +}; +#endif + +#ifndef FFT_STATE48000_960_1 +#define FFT_STATE48000_960_1 +static const kiss_fft_state fft_state48000_960_1 = { +240, /* nfft */ +1, /* shift */ +{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +fft_bitrev240, /* bitrev */ +fft_twiddles48000_960, /* bitrev */ +}; +#endif + +#ifndef FFT_STATE48000_960_2 +#define FFT_STATE48000_960_2 +static const kiss_fft_state fft_state48000_960_2 = { +120, /* nfft */ +2, /* shift */ +{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +fft_bitrev120, /* bitrev */ +fft_twiddles48000_960, /* bitrev */ +}; +#endif + +#ifndef FFT_STATE48000_960_3 +#define FFT_STATE48000_960_3 +static const kiss_fft_state fft_state48000_960_3 = { +60, /* nfft */ +3, /* shift */ +{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +fft_bitrev60, /* bitrev */ +fft_twiddles48000_960, /* bitrev */ +}; +#endif + +#endif + +#ifndef MDCT_TWIDDLES960 +#define MDCT_TWIDDLES960 +static const opus_val16 mdct_twiddles960[481] = { +32767, 32767, 32767, 32767, 32766, +32763, 32762, 32759, 32757, 32753, +32751, 32747, 32743, 32738, 32733, +32729, 32724, 32717, 32711, 32705, +32698, 32690, 32683, 32676, 32667, +32658, 32650, 32640, 32631, 32620, +32610, 32599, 32588, 32577, 32566, +32554, 32541, 32528, 32515, 32502, +32487, 32474, 32459, 32444, 32429, +32413, 32397, 32381, 32364, 32348, +32331, 32313, 32294, 32277, 32257, +32239, 32219, 32200, 32180, 32159, +32138, 32118, 32096, 32074, 32051, +32029, 32006, 31984, 31960, 31936, +31912, 31888, 31863, 31837, 31812, +31786, 31760, 31734, 31707, 31679, +31652, 31624, 31596, 31567, 31539, +31508, 31479, 31450, 31419, 31388, +31357, 31326, 31294, 31262, 31230, +31198, 31164, 31131, 31097, 31063, +31030, 30994, 30959, 30924, 30889, +30853, 30816, 30779, 30743, 30705, +30668, 30629, 30592, 30553, 30515, +30475, 30435, 30396, 30356, 30315, +30274, 30233, 30191, 30149, 30107, +30065, 30022, 29979, 29936, 29891, +29847, 29803, 29758, 29713, 29668, +29622, 29577, 29529, 29483, 29436, +29390, 29341, 29293, 29246, 29197, +29148, 29098, 29050, 29000, 28949, +28899, 28848, 28797, 28746, 28694, +28642, 28590, 28537, 28485, 28432, +28378, 28324, 28271, 28217, 28162, +28106, 28051, 27995, 27940, 27884, +27827, 27770, 27713, 27657, 27598, +27540, 27481, 27423, 27365, 27305, +27246, 27187, 27126, 27066, 27006, +26945, 26883, 26822, 26760, 26698, +26636, 26574, 26510, 26448, 26383, +26320, 26257, 26191, 26127, 26062, +25997, 25931, 25866, 25800, 25734, +25667, 25601, 25533, 25466, 25398, +25330, 25262, 25194, 25125, 25056, +24987, 24917, 24848, 24778, 24707, +24636, 24566, 24495, 24424, 24352, +24280, 24208, 24135, 24063, 23990, +23917, 23842, 23769, 23695, 23622, +23546, 23472, 23398, 23322, 23246, +23171, 23095, 23018, 22942, 22866, +22788, 22711, 22634, 22557, 22478, +22400, 22322, 22244, 22165, 22085, +22006, 21927, 21846, 21766, 21687, +21606, 21524, 21443, 21363, 21282, +21199, 21118, 21035, 20954, 20870, +20788, 20705, 20621, 20538, 20455, +20371, 20286, 20202, 20118, 20034, +19947, 19863, 19777, 19692, 19606, +19520, 19434, 19347, 19260, 19174, +19088, 18999, 18911, 18825, 18737, +18648, 18560, 18472, 18384, 18294, +18205, 18116, 18025, 17936, 17846, +17757, 17666, 17576, 17485, 17395, +17303, 17212, 17122, 17030, 16937, +16846, 16755, 16662, 16569, 16477, +16385, 16291, 16198, 16105, 16012, +15917, 15824, 15730, 15636, 15541, +15447, 15352, 15257, 15162, 15067, +14973, 14875, 14781, 14685, 14589, +14493, 14396, 14300, 14204, 14107, +14010, 13914, 13815, 13718, 13621, +13524, 13425, 13328, 13230, 13133, +13033, 12935, 12836, 12738, 12638, +12540, 12441, 12341, 12241, 12142, +12044, 11943, 11843, 11744, 11643, +11542, 11442, 11342, 11241, 11139, +11039, 10939, 10836, 10736, 10635, +10534, 10431, 10330, 10228, 10127, +10024, 9921, 9820, 9718, 9614, +9512, 9410, 9306, 9204, 9101, +8998, 8895, 8791, 8689, 8585, +8481, 8377, 8274, 8171, 8067, +7962, 7858, 7753, 7650, 7545, +7441, 7336, 7231, 7129, 7023, +6917, 6813, 6709, 6604, 6498, +6393, 6288, 6182, 6077, 5973, +5867, 5760, 5656, 5549, 5445, +5339, 5232, 5127, 5022, 4914, +4809, 4703, 4596, 4490, 4384, +4278, 4171, 4065, 3958, 3852, +3745, 3640, 3532, 3426, 3318, +3212, 3106, 2998, 2891, 2786, +2679, 2570, 2465, 2358, 2251, +2143, 2037, 1929, 1823, 1715, +1609, 1501, 1393, 1287, 1180, +1073, 964, 858, 751, 644, +535, 429, 322, 214, 107, +0, }; +#endif + +static const CELTMode mode48000_960_120 = { +48000, /* Fs */ +120, /* overlap */ +21, /* nbEBands */ +21, /* effEBands */ +{27853, 0, 4096, 8192, }, /* preemph */ +eband5ms, /* eBands */ +3, /* maxLM */ +8, /* nbShortMdcts */ +120, /* shortMdctSize */ +11, /* nbAllocVectors */ +band_allocation, /* allocVectors */ +logN400, /* logN */ +window120, /* window */ +{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960}, /* mdct */ +{392, cache_index50, cache_bits50, cache_caps50}, /* cache */ +}; + +/* List of all the available modes */ +#define TOTAL_MODES 1 +static const CELTMode * const static_mode_list[TOTAL_MODES] = { +&mode48000_960_120, +}; diff --git a/drivers/opus/celt/static_modes_float.h b/drivers/opus/celt/static_modes_float.h new file mode 100644 index 0000000000..fe6bb4c8a3 --- /dev/null +++ b/drivers/opus/celt/static_modes_float.h @@ -0,0 +1,599 @@ +/* The contents of this file was automatically generated by dump_modes.c + with arguments: 48000 960 + It contains static definitions for some pre-defined modes. */ +#include "opus_modes.h" +#include "rate.h" + +#ifndef DEF_WINDOW120 +#define DEF_WINDOW120 +static const opus_val16 window120[120] = { +6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f, +0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f, +0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f, +0.063737999f, 0.072081616f, 0.080907428f, 0.090207705f, 0.099974111f, +0.11019769f, 0.12086883f, 0.13197729f, 0.14351214f, 0.15546177f, +0.16781389f, 0.18055550f, 0.19367290f, 0.20715171f, 0.22097682f, +0.23513243f, 0.24960208f, 0.26436860f, 0.27941419f, 0.29472040f, +0.31026818f, 0.32603788f, 0.34200931f, 0.35816177f, 0.37447407f, +0.39092462f, 0.40749142f, 0.42415215f, 0.44088423f, 0.45766484f, +0.47447104f, 0.49127978f, 0.50806798f, 0.52481261f, 0.54149077f, +0.55807973f, 0.57455701f, 0.59090049f, 0.60708841f, 0.62309951f, +0.63891306f, 0.65450896f, 0.66986776f, 0.68497077f, 0.69980010f, +0.71433873f, 0.72857055f, 0.74248043f, 0.75605424f, 0.76927895f, +0.78214257f, 0.79463430f, 0.80674445f, 0.81846456f, 0.82978733f, +0.84070669f, 0.85121779f, 0.86131698f, 0.87100183f, 0.88027111f, +0.88912479f, 0.89756398f, 0.90559094f, 0.91320904f, 0.92042270f, +0.92723738f, 0.93365955f, 0.93969656f, 0.94535671f, 0.95064907f, +0.95558353f, 0.96017067f, 0.96442171f, 0.96834849f, 0.97196334f, +0.97527906f, 0.97830883f, 0.98106616f, 0.98356480f, 0.98581869f, +0.98784191f, 0.98964856f, 0.99125274f, 0.99266849f, 0.99390969f, +0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f, +0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f, +0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f, +0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f, +}; +#endif + +#ifndef DEF_LOGN400 +#define DEF_LOGN400 +static const opus_int16 logN400[21] = { +0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 16, 16, 16, 21, 21, 24, 29, 34, 36, }; +#endif + +#ifndef DEF_PULSE_CACHE50 +#define DEF_PULSE_CACHE50 +static const opus_int16 cache_index50[105] = { +-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 41, 41, 41, +82, 82, 123, 164, 200, 222, 0, 0, 0, 0, 0, 0, 0, 0, 41, +41, 41, 41, 123, 123, 123, 164, 164, 240, 266, 283, 295, 41, 41, 41, +41, 41, 41, 41, 41, 123, 123, 123, 123, 240, 240, 240, 266, 266, 305, +318, 328, 336, 123, 123, 123, 123, 123, 123, 123, 123, 240, 240, 240, 240, +305, 305, 305, 318, 318, 343, 351, 358, 364, 240, 240, 240, 240, 240, 240, +240, 240, 305, 305, 305, 305, 343, 343, 343, 351, 351, 370, 376, 382, 387, +}; +static const unsigned char cache_bits50[392] = { +40, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, +7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 40, 15, 23, 28, +31, 34, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47, 47, 49, 50, +51, 52, 53, 54, 55, 55, 57, 58, 59, 60, 61, 62, 63, 63, 65, +66, 67, 68, 69, 70, 71, 71, 40, 20, 33, 41, 48, 53, 57, 61, +64, 66, 69, 71, 73, 75, 76, 78, 80, 82, 85, 87, 89, 91, 92, +94, 96, 98, 101, 103, 105, 107, 108, 110, 112, 114, 117, 119, 121, 123, +124, 126, 128, 40, 23, 39, 51, 60, 67, 73, 79, 83, 87, 91, 94, +97, 100, 102, 105, 107, 111, 115, 118, 121, 124, 126, 129, 131, 135, 139, +142, 145, 148, 150, 153, 155, 159, 163, 166, 169, 172, 174, 177, 179, 35, +28, 49, 65, 78, 89, 99, 107, 114, 120, 126, 132, 136, 141, 145, 149, +153, 159, 165, 171, 176, 180, 185, 189, 192, 199, 205, 211, 216, 220, 225, +229, 232, 239, 245, 251, 21, 33, 58, 79, 97, 112, 125, 137, 148, 157, +166, 174, 182, 189, 195, 201, 207, 217, 227, 235, 243, 251, 17, 35, 63, +86, 106, 123, 139, 152, 165, 177, 187, 197, 206, 214, 222, 230, 237, 250, +25, 31, 55, 75, 91, 105, 117, 128, 138, 146, 154, 161, 168, 174, 180, +185, 190, 200, 208, 215, 222, 229, 235, 240, 245, 255, 16, 36, 65, 89, +110, 128, 144, 159, 173, 185, 196, 207, 217, 226, 234, 242, 250, 11, 41, +74, 103, 128, 151, 172, 191, 209, 225, 241, 255, 9, 43, 79, 110, 138, +163, 186, 207, 227, 246, 12, 39, 71, 99, 123, 144, 164, 182, 198, 214, +228, 241, 253, 9, 44, 81, 113, 142, 168, 192, 214, 235, 255, 7, 49, +90, 127, 160, 191, 220, 247, 6, 51, 95, 134, 170, 203, 234, 7, 47, +87, 123, 155, 184, 212, 237, 6, 52, 97, 137, 174, 208, 240, 5, 57, +106, 151, 192, 231, 5, 59, 111, 158, 202, 243, 5, 55, 103, 147, 187, +224, 5, 60, 113, 161, 206, 248, 4, 65, 122, 175, 224, 4, 67, 127, +182, 234, }; +static const unsigned char cache_caps50[168] = { +224, 224, 224, 224, 224, 224, 224, 224, 160, 160, 160, 160, 185, 185, 185, +178, 178, 168, 134, 61, 37, 224, 224, 224, 224, 224, 224, 224, 224, 240, +240, 240, 240, 207, 207, 207, 198, 198, 183, 144, 66, 40, 160, 160, 160, +160, 160, 160, 160, 160, 185, 185, 185, 185, 193, 193, 193, 183, 183, 172, +138, 64, 38, 240, 240, 240, 240, 240, 240, 240, 240, 207, 207, 207, 207, +204, 204, 204, 193, 193, 180, 143, 66, 40, 185, 185, 185, 185, 185, 185, +185, 185, 193, 193, 193, 193, 193, 193, 193, 183, 183, 172, 138, 65, 39, +207, 207, 207, 207, 207, 207, 207, 207, 204, 204, 204, 204, 201, 201, 201, +188, 188, 176, 141, 66, 40, 193, 193, 193, 193, 193, 193, 193, 193, 193, +193, 193, 193, 194, 194, 194, 184, 184, 173, 139, 65, 39, 204, 204, 204, +204, 204, 204, 204, 204, 201, 201, 201, 201, 198, 198, 198, 187, 187, 175, +140, 66, 40, }; +#endif + +#ifndef FFT_TWIDDLES48000_960 +#define FFT_TWIDDLES48000_960 +static const kiss_twiddle_cpx fft_twiddles48000_960[480] = { +{1.0000000f, -0.0000000f}, {0.99991433f, -0.013089596f}, +{0.99965732f, -0.026176948f}, {0.99922904f, -0.039259816f}, +{0.99862953f, -0.052335956f}, {0.99785892f, -0.065403129f}, +{0.99691733f, -0.078459096f}, {0.99580493f, -0.091501619f}, +{0.99452190f, -0.10452846f}, {0.99306846f, -0.11753740f}, +{0.99144486f, -0.13052619f}, {0.98965139f, -0.14349262f}, +{0.98768834f, -0.15643447f}, {0.98555606f, -0.16934950f}, +{0.98325491f, -0.18223553f}, {0.98078528f, -0.19509032f}, +{0.97814760f, -0.20791169f}, {0.97534232f, -0.22069744f}, +{0.97236992f, -0.23344536f}, {0.96923091f, -0.24615329f}, +{0.96592583f, -0.25881905f}, {0.96245524f, -0.27144045f}, +{0.95881973f, -0.28401534f}, {0.95501994f, -0.29654157f}, +{0.95105652f, -0.30901699f}, {0.94693013f, -0.32143947f}, +{0.94264149f, -0.33380686f}, {0.93819134f, -0.34611706f}, +{0.93358043f, -0.35836795f}, {0.92880955f, -0.37055744f}, +{0.92387953f, -0.38268343f}, {0.91879121f, -0.39474386f}, +{0.91354546f, -0.40673664f}, {0.90814317f, -0.41865974f}, +{0.90258528f, -0.43051110f}, {0.89687274f, -0.44228869f}, +{0.89100652f, -0.45399050f}, {0.88498764f, -0.46561452f}, +{0.87881711f, -0.47715876f}, {0.87249601f, -0.48862124f}, +{0.86602540f, -0.50000000f}, {0.85940641f, -0.51129309f}, +{0.85264016f, -0.52249856f}, {0.84572782f, -0.53361452f}, +{0.83867057f, -0.54463904f}, {0.83146961f, -0.55557023f}, +{0.82412619f, -0.56640624f}, {0.81664156f, -0.57714519f}, +{0.80901699f, -0.58778525f}, {0.80125381f, -0.59832460f}, +{0.79335334f, -0.60876143f}, {0.78531693f, -0.61909395f}, +{0.77714596f, -0.62932039f}, {0.76884183f, -0.63943900f}, +{0.76040597f, -0.64944805f}, {0.75183981f, -0.65934582f}, +{0.74314483f, -0.66913061f}, {0.73432251f, -0.67880075f}, +{0.72537437f, -0.68835458f}, {0.71630194f, -0.69779046f}, +{0.70710678f, -0.70710678f}, {0.69779046f, -0.71630194f}, +{0.68835458f, -0.72537437f}, {0.67880075f, -0.73432251f}, +{0.66913061f, -0.74314483f}, {0.65934582f, -0.75183981f}, +{0.64944805f, -0.76040597f}, {0.63943900f, -0.76884183f}, +{0.62932039f, -0.77714596f}, {0.61909395f, -0.78531693f}, +{0.60876143f, -0.79335334f}, {0.59832460f, -0.80125381f}, +{0.58778525f, -0.80901699f}, {0.57714519f, -0.81664156f}, +{0.56640624f, -0.82412619f}, {0.55557023f, -0.83146961f}, +{0.54463904f, -0.83867057f}, {0.53361452f, -0.84572782f}, +{0.52249856f, -0.85264016f}, {0.51129309f, -0.85940641f}, +{0.50000000f, -0.86602540f}, {0.48862124f, -0.87249601f}, +{0.47715876f, -0.87881711f}, {0.46561452f, -0.88498764f}, +{0.45399050f, -0.89100652f}, {0.44228869f, -0.89687274f}, +{0.43051110f, -0.90258528f}, {0.41865974f, -0.90814317f}, +{0.40673664f, -0.91354546f}, {0.39474386f, -0.91879121f}, +{0.38268343f, -0.92387953f}, {0.37055744f, -0.92880955f}, +{0.35836795f, -0.93358043f}, {0.34611706f, -0.93819134f}, +{0.33380686f, -0.94264149f}, {0.32143947f, -0.94693013f}, +{0.30901699f, -0.95105652f}, {0.29654157f, -0.95501994f}, +{0.28401534f, -0.95881973f}, {0.27144045f, -0.96245524f}, +{0.25881905f, -0.96592583f}, {0.24615329f, -0.96923091f}, +{0.23344536f, -0.97236992f}, {0.22069744f, -0.97534232f}, +{0.20791169f, -0.97814760f}, {0.19509032f, -0.98078528f}, +{0.18223553f, -0.98325491f}, {0.16934950f, -0.98555606f}, +{0.15643447f, -0.98768834f}, {0.14349262f, -0.98965139f}, +{0.13052619f, -0.99144486f}, {0.11753740f, -0.99306846f}, +{0.10452846f, -0.99452190f}, {0.091501619f, -0.99580493f}, +{0.078459096f, -0.99691733f}, {0.065403129f, -0.99785892f}, +{0.052335956f, -0.99862953f}, {0.039259816f, -0.99922904f}, +{0.026176948f, -0.99965732f}, {0.013089596f, -0.99991433f}, +{6.1230318e-17f, -1.0000000f}, {-0.013089596f, -0.99991433f}, +{-0.026176948f, -0.99965732f}, {-0.039259816f, -0.99922904f}, +{-0.052335956f, -0.99862953f}, {-0.065403129f, -0.99785892f}, +{-0.078459096f, -0.99691733f}, {-0.091501619f, -0.99580493f}, +{-0.10452846f, -0.99452190f}, {-0.11753740f, -0.99306846f}, +{-0.13052619f, -0.99144486f}, {-0.14349262f, -0.98965139f}, +{-0.15643447f, -0.98768834f}, {-0.16934950f, -0.98555606f}, +{-0.18223553f, -0.98325491f}, {-0.19509032f, -0.98078528f}, +{-0.20791169f, -0.97814760f}, {-0.22069744f, -0.97534232f}, +{-0.23344536f, -0.97236992f}, {-0.24615329f, -0.96923091f}, +{-0.25881905f, -0.96592583f}, {-0.27144045f, -0.96245524f}, +{-0.28401534f, -0.95881973f}, {-0.29654157f, -0.95501994f}, +{-0.30901699f, -0.95105652f}, {-0.32143947f, -0.94693013f}, +{-0.33380686f, -0.94264149f}, {-0.34611706f, -0.93819134f}, +{-0.35836795f, -0.93358043f}, {-0.37055744f, -0.92880955f}, +{-0.38268343f, -0.92387953f}, {-0.39474386f, -0.91879121f}, +{-0.40673664f, -0.91354546f}, {-0.41865974f, -0.90814317f}, +{-0.43051110f, -0.90258528f}, {-0.44228869f, -0.89687274f}, +{-0.45399050f, -0.89100652f}, {-0.46561452f, -0.88498764f}, +{-0.47715876f, -0.87881711f}, {-0.48862124f, -0.87249601f}, +{-0.50000000f, -0.86602540f}, {-0.51129309f, -0.85940641f}, +{-0.52249856f, -0.85264016f}, {-0.53361452f, -0.84572782f}, +{-0.54463904f, -0.83867057f}, {-0.55557023f, -0.83146961f}, +{-0.56640624f, -0.82412619f}, {-0.57714519f, -0.81664156f}, +{-0.58778525f, -0.80901699f}, {-0.59832460f, -0.80125381f}, +{-0.60876143f, -0.79335334f}, {-0.61909395f, -0.78531693f}, +{-0.62932039f, -0.77714596f}, {-0.63943900f, -0.76884183f}, +{-0.64944805f, -0.76040597f}, {-0.65934582f, -0.75183981f}, +{-0.66913061f, -0.74314483f}, {-0.67880075f, -0.73432251f}, +{-0.68835458f, -0.72537437f}, {-0.69779046f, -0.71630194f}, +{-0.70710678f, -0.70710678f}, {-0.71630194f, -0.69779046f}, +{-0.72537437f, -0.68835458f}, {-0.73432251f, -0.67880075f}, +{-0.74314483f, -0.66913061f}, {-0.75183981f, -0.65934582f}, +{-0.76040597f, -0.64944805f}, {-0.76884183f, -0.63943900f}, +{-0.77714596f, -0.62932039f}, {-0.78531693f, -0.61909395f}, +{-0.79335334f, -0.60876143f}, {-0.80125381f, -0.59832460f}, +{-0.80901699f, -0.58778525f}, {-0.81664156f, -0.57714519f}, +{-0.82412619f, -0.56640624f}, {-0.83146961f, -0.55557023f}, +{-0.83867057f, -0.54463904f}, {-0.84572782f, -0.53361452f}, +{-0.85264016f, -0.52249856f}, {-0.85940641f, -0.51129309f}, +{-0.86602540f, -0.50000000f}, {-0.87249601f, -0.48862124f}, +{-0.87881711f, -0.47715876f}, {-0.88498764f, -0.46561452f}, +{-0.89100652f, -0.45399050f}, {-0.89687274f, -0.44228869f}, +{-0.90258528f, -0.43051110f}, {-0.90814317f, -0.41865974f}, +{-0.91354546f, -0.40673664f}, {-0.91879121f, -0.39474386f}, +{-0.92387953f, -0.38268343f}, {-0.92880955f, -0.37055744f}, +{-0.93358043f, -0.35836795f}, {-0.93819134f, -0.34611706f}, +{-0.94264149f, -0.33380686f}, {-0.94693013f, -0.32143947f}, +{-0.95105652f, -0.30901699f}, {-0.95501994f, -0.29654157f}, +{-0.95881973f, -0.28401534f}, {-0.96245524f, -0.27144045f}, +{-0.96592583f, -0.25881905f}, {-0.96923091f, -0.24615329f}, +{-0.97236992f, -0.23344536f}, {-0.97534232f, -0.22069744f}, +{-0.97814760f, -0.20791169f}, {-0.98078528f, -0.19509032f}, +{-0.98325491f, -0.18223553f}, {-0.98555606f, -0.16934950f}, +{-0.98768834f, -0.15643447f}, {-0.98965139f, -0.14349262f}, +{-0.99144486f, -0.13052619f}, {-0.99306846f, -0.11753740f}, +{-0.99452190f, -0.10452846f}, {-0.99580493f, -0.091501619f}, +{-0.99691733f, -0.078459096f}, {-0.99785892f, -0.065403129f}, +{-0.99862953f, -0.052335956f}, {-0.99922904f, -0.039259816f}, +{-0.99965732f, -0.026176948f}, {-0.99991433f, -0.013089596f}, +{-1.0000000f, -1.2246064e-16f}, {-0.99991433f, 0.013089596f}, +{-0.99965732f, 0.026176948f}, {-0.99922904f, 0.039259816f}, +{-0.99862953f, 0.052335956f}, {-0.99785892f, 0.065403129f}, +{-0.99691733f, 0.078459096f}, {-0.99580493f, 0.091501619f}, +{-0.99452190f, 0.10452846f}, {-0.99306846f, 0.11753740f}, +{-0.99144486f, 0.13052619f}, {-0.98965139f, 0.14349262f}, +{-0.98768834f, 0.15643447f}, {-0.98555606f, 0.16934950f}, +{-0.98325491f, 0.18223553f}, {-0.98078528f, 0.19509032f}, +{-0.97814760f, 0.20791169f}, {-0.97534232f, 0.22069744f}, +{-0.97236992f, 0.23344536f}, {-0.96923091f, 0.24615329f}, +{-0.96592583f, 0.25881905f}, {-0.96245524f, 0.27144045f}, +{-0.95881973f, 0.28401534f}, {-0.95501994f, 0.29654157f}, +{-0.95105652f, 0.30901699f}, {-0.94693013f, 0.32143947f}, +{-0.94264149f, 0.33380686f}, {-0.93819134f, 0.34611706f}, +{-0.93358043f, 0.35836795f}, {-0.92880955f, 0.37055744f}, +{-0.92387953f, 0.38268343f}, {-0.91879121f, 0.39474386f}, +{-0.91354546f, 0.40673664f}, {-0.90814317f, 0.41865974f}, +{-0.90258528f, 0.43051110f}, {-0.89687274f, 0.44228869f}, +{-0.89100652f, 0.45399050f}, {-0.88498764f, 0.46561452f}, +{-0.87881711f, 0.47715876f}, {-0.87249601f, 0.48862124f}, +{-0.86602540f, 0.50000000f}, {-0.85940641f, 0.51129309f}, +{-0.85264016f, 0.52249856f}, {-0.84572782f, 0.53361452f}, +{-0.83867057f, 0.54463904f}, {-0.83146961f, 0.55557023f}, +{-0.82412619f, 0.56640624f}, {-0.81664156f, 0.57714519f}, +{-0.80901699f, 0.58778525f}, {-0.80125381f, 0.59832460f}, +{-0.79335334f, 0.60876143f}, {-0.78531693f, 0.61909395f}, +{-0.77714596f, 0.62932039f}, {-0.76884183f, 0.63943900f}, +{-0.76040597f, 0.64944805f}, {-0.75183981f, 0.65934582f}, +{-0.74314483f, 0.66913061f}, {-0.73432251f, 0.67880075f}, +{-0.72537437f, 0.68835458f}, {-0.71630194f, 0.69779046f}, +{-0.70710678f, 0.70710678f}, {-0.69779046f, 0.71630194f}, +{-0.68835458f, 0.72537437f}, {-0.67880075f, 0.73432251f}, +{-0.66913061f, 0.74314483f}, {-0.65934582f, 0.75183981f}, +{-0.64944805f, 0.76040597f}, {-0.63943900f, 0.76884183f}, +{-0.62932039f, 0.77714596f}, {-0.61909395f, 0.78531693f}, +{-0.60876143f, 0.79335334f}, {-0.59832460f, 0.80125381f}, +{-0.58778525f, 0.80901699f}, {-0.57714519f, 0.81664156f}, +{-0.56640624f, 0.82412619f}, {-0.55557023f, 0.83146961f}, +{-0.54463904f, 0.83867057f}, {-0.53361452f, 0.84572782f}, +{-0.52249856f, 0.85264016f}, {-0.51129309f, 0.85940641f}, +{-0.50000000f, 0.86602540f}, {-0.48862124f, 0.87249601f}, +{-0.47715876f, 0.87881711f}, {-0.46561452f, 0.88498764f}, +{-0.45399050f, 0.89100652f}, {-0.44228869f, 0.89687274f}, +{-0.43051110f, 0.90258528f}, {-0.41865974f, 0.90814317f}, +{-0.40673664f, 0.91354546f}, {-0.39474386f, 0.91879121f}, +{-0.38268343f, 0.92387953f}, {-0.37055744f, 0.92880955f}, +{-0.35836795f, 0.93358043f}, {-0.34611706f, 0.93819134f}, +{-0.33380686f, 0.94264149f}, {-0.32143947f, 0.94693013f}, +{-0.30901699f, 0.95105652f}, {-0.29654157f, 0.95501994f}, +{-0.28401534f, 0.95881973f}, {-0.27144045f, 0.96245524f}, +{-0.25881905f, 0.96592583f}, {-0.24615329f, 0.96923091f}, +{-0.23344536f, 0.97236992f}, {-0.22069744f, 0.97534232f}, +{-0.20791169f, 0.97814760f}, {-0.19509032f, 0.98078528f}, +{-0.18223553f, 0.98325491f}, {-0.16934950f, 0.98555606f}, +{-0.15643447f, 0.98768834f}, {-0.14349262f, 0.98965139f}, +{-0.13052619f, 0.99144486f}, {-0.11753740f, 0.99306846f}, +{-0.10452846f, 0.99452190f}, {-0.091501619f, 0.99580493f}, +{-0.078459096f, 0.99691733f}, {-0.065403129f, 0.99785892f}, +{-0.052335956f, 0.99862953f}, {-0.039259816f, 0.99922904f}, +{-0.026176948f, 0.99965732f}, {-0.013089596f, 0.99991433f}, +{-1.8369095e-16f, 1.0000000f}, {0.013089596f, 0.99991433f}, +{0.026176948f, 0.99965732f}, {0.039259816f, 0.99922904f}, +{0.052335956f, 0.99862953f}, {0.065403129f, 0.99785892f}, +{0.078459096f, 0.99691733f}, {0.091501619f, 0.99580493f}, +{0.10452846f, 0.99452190f}, {0.11753740f, 0.99306846f}, +{0.13052619f, 0.99144486f}, {0.14349262f, 0.98965139f}, +{0.15643447f, 0.98768834f}, {0.16934950f, 0.98555606f}, +{0.18223553f, 0.98325491f}, {0.19509032f, 0.98078528f}, +{0.20791169f, 0.97814760f}, {0.22069744f, 0.97534232f}, +{0.23344536f, 0.97236992f}, {0.24615329f, 0.96923091f}, +{0.25881905f, 0.96592583f}, {0.27144045f, 0.96245524f}, +{0.28401534f, 0.95881973f}, {0.29654157f, 0.95501994f}, +{0.30901699f, 0.95105652f}, {0.32143947f, 0.94693013f}, +{0.33380686f, 0.94264149f}, {0.34611706f, 0.93819134f}, +{0.35836795f, 0.93358043f}, {0.37055744f, 0.92880955f}, +{0.38268343f, 0.92387953f}, {0.39474386f, 0.91879121f}, +{0.40673664f, 0.91354546f}, {0.41865974f, 0.90814317f}, +{0.43051110f, 0.90258528f}, {0.44228869f, 0.89687274f}, +{0.45399050f, 0.89100652f}, {0.46561452f, 0.88498764f}, +{0.47715876f, 0.87881711f}, {0.48862124f, 0.87249601f}, +{0.50000000f, 0.86602540f}, {0.51129309f, 0.85940641f}, +{0.52249856f, 0.85264016f}, {0.53361452f, 0.84572782f}, +{0.54463904f, 0.83867057f}, {0.55557023f, 0.83146961f}, +{0.56640624f, 0.82412619f}, {0.57714519f, 0.81664156f}, +{0.58778525f, 0.80901699f}, {0.59832460f, 0.80125381f}, +{0.60876143f, 0.79335334f}, {0.61909395f, 0.78531693f}, +{0.62932039f, 0.77714596f}, {0.63943900f, 0.76884183f}, +{0.64944805f, 0.76040597f}, {0.65934582f, 0.75183981f}, +{0.66913061f, 0.74314483f}, {0.67880075f, 0.73432251f}, +{0.68835458f, 0.72537437f}, {0.69779046f, 0.71630194f}, +{0.70710678f, 0.70710678f}, {0.71630194f, 0.69779046f}, +{0.72537437f, 0.68835458f}, {0.73432251f, 0.67880075f}, +{0.74314483f, 0.66913061f}, {0.75183981f, 0.65934582f}, +{0.76040597f, 0.64944805f}, {0.76884183f, 0.63943900f}, +{0.77714596f, 0.62932039f}, {0.78531693f, 0.61909395f}, +{0.79335334f, 0.60876143f}, {0.80125381f, 0.59832460f}, +{0.80901699f, 0.58778525f}, {0.81664156f, 0.57714519f}, +{0.82412619f, 0.56640624f}, {0.83146961f, 0.55557023f}, +{0.83867057f, 0.54463904f}, {0.84572782f, 0.53361452f}, +{0.85264016f, 0.52249856f}, {0.85940641f, 0.51129309f}, +{0.86602540f, 0.50000000f}, {0.87249601f, 0.48862124f}, +{0.87881711f, 0.47715876f}, {0.88498764f, 0.46561452f}, +{0.89100652f, 0.45399050f}, {0.89687274f, 0.44228869f}, +{0.90258528f, 0.43051110f}, {0.90814317f, 0.41865974f}, +{0.91354546f, 0.40673664f}, {0.91879121f, 0.39474386f}, +{0.92387953f, 0.38268343f}, {0.92880955f, 0.37055744f}, +{0.93358043f, 0.35836795f}, {0.93819134f, 0.34611706f}, +{0.94264149f, 0.33380686f}, {0.94693013f, 0.32143947f}, +{0.95105652f, 0.30901699f}, {0.95501994f, 0.29654157f}, +{0.95881973f, 0.28401534f}, {0.96245524f, 0.27144045f}, +{0.96592583f, 0.25881905f}, {0.96923091f, 0.24615329f}, +{0.97236992f, 0.23344536f}, {0.97534232f, 0.22069744f}, +{0.97814760f, 0.20791169f}, {0.98078528f, 0.19509032f}, +{0.98325491f, 0.18223553f}, {0.98555606f, 0.16934950f}, +{0.98768834f, 0.15643447f}, {0.98965139f, 0.14349262f}, +{0.99144486f, 0.13052619f}, {0.99306846f, 0.11753740f}, +{0.99452190f, 0.10452846f}, {0.99580493f, 0.091501619f}, +{0.99691733f, 0.078459096f}, {0.99785892f, 0.065403129f}, +{0.99862953f, 0.052335956f}, {0.99922904f, 0.039259816f}, +{0.99965732f, 0.026176948f}, {0.99991433f, 0.013089596f}, +}; +#ifndef FFT_BITREV480 +#define FFT_BITREV480 +static const opus_int16 fft_bitrev480[480] = { +0, 120, 240, 360, 30, 150, 270, 390, 60, 180, 300, 420, 90, 210, 330, +450, 15, 135, 255, 375, 45, 165, 285, 405, 75, 195, 315, 435, 105, 225, +345, 465, 5, 125, 245, 365, 35, 155, 275, 395, 65, 185, 305, 425, 95, +215, 335, 455, 20, 140, 260, 380, 50, 170, 290, 410, 80, 200, 320, 440, +110, 230, 350, 470, 10, 130, 250, 370, 40, 160, 280, 400, 70, 190, 310, +430, 100, 220, 340, 460, 25, 145, 265, 385, 55, 175, 295, 415, 85, 205, +325, 445, 115, 235, 355, 475, 1, 121, 241, 361, 31, 151, 271, 391, 61, +181, 301, 421, 91, 211, 331, 451, 16, 136, 256, 376, 46, 166, 286, 406, +76, 196, 316, 436, 106, 226, 346, 466, 6, 126, 246, 366, 36, 156, 276, +396, 66, 186, 306, 426, 96, 216, 336, 456, 21, 141, 261, 381, 51, 171, +291, 411, 81, 201, 321, 441, 111, 231, 351, 471, 11, 131, 251, 371, 41, +161, 281, 401, 71, 191, 311, 431, 101, 221, 341, 461, 26, 146, 266, 386, +56, 176, 296, 416, 86, 206, 326, 446, 116, 236, 356, 476, 2, 122, 242, +362, 32, 152, 272, 392, 62, 182, 302, 422, 92, 212, 332, 452, 17, 137, +257, 377, 47, 167, 287, 407, 77, 197, 317, 437, 107, 227, 347, 467, 7, +127, 247, 367, 37, 157, 277, 397, 67, 187, 307, 427, 97, 217, 337, 457, +22, 142, 262, 382, 52, 172, 292, 412, 82, 202, 322, 442, 112, 232, 352, +472, 12, 132, 252, 372, 42, 162, 282, 402, 72, 192, 312, 432, 102, 222, +342, 462, 27, 147, 267, 387, 57, 177, 297, 417, 87, 207, 327, 447, 117, +237, 357, 477, 3, 123, 243, 363, 33, 153, 273, 393, 63, 183, 303, 423, +93, 213, 333, 453, 18, 138, 258, 378, 48, 168, 288, 408, 78, 198, 318, +438, 108, 228, 348, 468, 8, 128, 248, 368, 38, 158, 278, 398, 68, 188, +308, 428, 98, 218, 338, 458, 23, 143, 263, 383, 53, 173, 293, 413, 83, +203, 323, 443, 113, 233, 353, 473, 13, 133, 253, 373, 43, 163, 283, 403, +73, 193, 313, 433, 103, 223, 343, 463, 28, 148, 268, 388, 58, 178, 298, +418, 88, 208, 328, 448, 118, 238, 358, 478, 4, 124, 244, 364, 34, 154, +274, 394, 64, 184, 304, 424, 94, 214, 334, 454, 19, 139, 259, 379, 49, +169, 289, 409, 79, 199, 319, 439, 109, 229, 349, 469, 9, 129, 249, 369, +39, 159, 279, 399, 69, 189, 309, 429, 99, 219, 339, 459, 24, 144, 264, +384, 54, 174, 294, 414, 84, 204, 324, 444, 114, 234, 354, 474, 14, 134, +254, 374, 44, 164, 284, 404, 74, 194, 314, 434, 104, 224, 344, 464, 29, +149, 269, 389, 59, 179, 299, 419, 89, 209, 329, 449, 119, 239, 359, 479, +}; +#endif + +#ifndef FFT_BITREV240 +#define FFT_BITREV240 +static const opus_int16 fft_bitrev240[240] = { +0, 60, 120, 180, 15, 75, 135, 195, 30, 90, 150, 210, 45, 105, 165, +225, 5, 65, 125, 185, 20, 80, 140, 200, 35, 95, 155, 215, 50, 110, +170, 230, 10, 70, 130, 190, 25, 85, 145, 205, 40, 100, 160, 220, 55, +115, 175, 235, 1, 61, 121, 181, 16, 76, 136, 196, 31, 91, 151, 211, +46, 106, 166, 226, 6, 66, 126, 186, 21, 81, 141, 201, 36, 96, 156, +216, 51, 111, 171, 231, 11, 71, 131, 191, 26, 86, 146, 206, 41, 101, +161, 221, 56, 116, 176, 236, 2, 62, 122, 182, 17, 77, 137, 197, 32, +92, 152, 212, 47, 107, 167, 227, 7, 67, 127, 187, 22, 82, 142, 202, +37, 97, 157, 217, 52, 112, 172, 232, 12, 72, 132, 192, 27, 87, 147, +207, 42, 102, 162, 222, 57, 117, 177, 237, 3, 63, 123, 183, 18, 78, +138, 198, 33, 93, 153, 213, 48, 108, 168, 228, 8, 68, 128, 188, 23, +83, 143, 203, 38, 98, 158, 218, 53, 113, 173, 233, 13, 73, 133, 193, +28, 88, 148, 208, 43, 103, 163, 223, 58, 118, 178, 238, 4, 64, 124, +184, 19, 79, 139, 199, 34, 94, 154, 214, 49, 109, 169, 229, 9, 69, +129, 189, 24, 84, 144, 204, 39, 99, 159, 219, 54, 114, 174, 234, 14, +74, 134, 194, 29, 89, 149, 209, 44, 104, 164, 224, 59, 119, 179, 239, +}; +#endif + +#ifndef FFT_BITREV120 +#define FFT_BITREV120 +static const opus_int16 fft_bitrev120[120] = { +0, 30, 60, 90, 15, 45, 75, 105, 5, 35, 65, 95, 20, 50, 80, +110, 10, 40, 70, 100, 25, 55, 85, 115, 1, 31, 61, 91, 16, 46, +76, 106, 6, 36, 66, 96, 21, 51, 81, 111, 11, 41, 71, 101, 26, +56, 86, 116, 2, 32, 62, 92, 17, 47, 77, 107, 7, 37, 67, 97, +22, 52, 82, 112, 12, 42, 72, 102, 27, 57, 87, 117, 3, 33, 63, +93, 18, 48, 78, 108, 8, 38, 68, 98, 23, 53, 83, 113, 13, 43, +73, 103, 28, 58, 88, 118, 4, 34, 64, 94, 19, 49, 79, 109, 9, +39, 69, 99, 24, 54, 84, 114, 14, 44, 74, 104, 29, 59, 89, 119, +}; +#endif + +#ifndef FFT_BITREV60 +#define FFT_BITREV60 +static const opus_int16 fft_bitrev60[60] = { +0, 15, 30, 45, 5, 20, 35, 50, 10, 25, 40, 55, 1, 16, 31, +46, 6, 21, 36, 51, 11, 26, 41, 56, 2, 17, 32, 47, 7, 22, +37, 52, 12, 27, 42, 57, 3, 18, 33, 48, 8, 23, 38, 53, 13, +28, 43, 58, 4, 19, 34, 49, 9, 24, 39, 54, 14, 29, 44, 59, +}; +#endif + +#ifndef FFT_STATE48000_960_0 +#define FFT_STATE48000_960_0 +static const kiss_fft_state fft_state48000_960_0 = { +480, /* nfft */ +0.002083333f, /* scale */ +-1, /* shift */ +{4, 120, 4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, }, /* factors */ +fft_bitrev480, /* bitrev */ +fft_twiddles48000_960, /* bitrev */ +}; +#endif + +#ifndef FFT_STATE48000_960_1 +#define FFT_STATE48000_960_1 +static const kiss_fft_state fft_state48000_960_1 = { +240, /* nfft */ +0.004166667f, /* scale */ +1, /* shift */ +{4, 60, 4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +fft_bitrev240, /* bitrev */ +fft_twiddles48000_960, /* bitrev */ +}; +#endif + +#ifndef FFT_STATE48000_960_2 +#define FFT_STATE48000_960_2 +static const kiss_fft_state fft_state48000_960_2 = { +120, /* nfft */ +0.008333333f, /* scale */ +2, /* shift */ +{4, 30, 2, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +fft_bitrev120, /* bitrev */ +fft_twiddles48000_960, /* bitrev */ +}; +#endif + +#ifndef FFT_STATE48000_960_3 +#define FFT_STATE48000_960_3 +static const kiss_fft_state fft_state48000_960_3 = { +60, /* nfft */ +0.016666667f, /* scale */ +3, /* shift */ +{4, 15, 3, 5, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */ +fft_bitrev60, /* bitrev */ +fft_twiddles48000_960, /* bitrev */ +}; +#endif + +#endif + +#ifndef MDCT_TWIDDLES960 +#define MDCT_TWIDDLES960 +static const opus_val16 mdct_twiddles960[481] = { +1.0000000f, 0.99999465f, 0.99997858f, 0.99995181f, 0.99991433f, +0.99986614f, 0.99980724f, 0.99973764f, 0.99965732f, 0.99956631f, +0.99946459f, 0.99935216f, 0.99922904f, 0.99909521f, 0.99895068f, +0.99879546f, 0.99862953f, 0.99845292f, 0.99826561f, 0.99806761f, +0.99785892f, 0.99763955f, 0.99740949f, 0.99716875f, 0.99691733f, +0.99665524f, 0.99638247f, 0.99609903f, 0.99580493f, 0.99550016f, +0.99518473f, 0.99485864f, 0.99452190f, 0.99417450f, 0.99381646f, +0.99344778f, 0.99306846f, 0.99267850f, 0.99227791f, 0.99186670f, +0.99144486f, 0.99101241f, 0.99056934f, 0.99011566f, 0.98965139f, +0.98917651f, 0.98869104f, 0.98819498f, 0.98768834f, 0.98717112f, +0.98664333f, 0.98610497f, 0.98555606f, 0.98499659f, 0.98442657f, +0.98384600f, 0.98325491f, 0.98265328f, 0.98204113f, 0.98141846f, +0.98078528f, 0.98014159f, 0.97948742f, 0.97882275f, 0.97814760f, +0.97746197f, 0.97676588f, 0.97605933f, 0.97534232f, 0.97461487f, +0.97387698f, 0.97312866f, 0.97236992f, 0.97160077f, 0.97082121f, +0.97003125f, 0.96923091f, 0.96842019f, 0.96759909f, 0.96676764f, +0.96592582f, 0.96507367f, 0.96421118f, 0.96333837f, 0.96245523f, +0.96156180f, 0.96065806f, 0.95974403f, 0.95881973f, 0.95788517f, +0.95694034f, 0.95598526f, 0.95501995f, 0.95404440f, 0.95305864f, +0.95206267f, 0.95105651f, 0.95004016f, 0.94901364f, 0.94797697f, +0.94693013f, 0.94587315f, 0.94480604f, 0.94372882f, 0.94264149f, +0.94154406f, 0.94043656f, 0.93931897f, 0.93819133f, 0.93705365f, +0.93590592f, 0.93474818f, 0.93358042f, 0.93240268f, 0.93121493f, +0.93001722f, 0.92880955f, 0.92759193f, 0.92636438f, 0.92512690f, +0.92387953f, 0.92262225f, 0.92135509f, 0.92007809f, 0.91879121f, +0.91749449f, 0.91618795f, 0.91487161f, 0.91354545f, 0.91220952f, +0.91086382f, 0.90950836f, 0.90814316f, 0.90676824f, 0.90538363f, +0.90398929f, 0.90258528f, 0.90117161f, 0.89974828f, 0.89831532f, +0.89687273f, 0.89542055f, 0.89395877f, 0.89248742f, 0.89100652f, +0.88951606f, 0.88801610f, 0.88650661f, 0.88498764f, 0.88345918f, +0.88192125f, 0.88037390f, 0.87881711f, 0.87725090f, 0.87567531f, +0.87409035f, 0.87249599f, 0.87089232f, 0.86927933f, 0.86765699f, +0.86602540f, 0.86438453f, 0.86273437f, 0.86107503f, 0.85940641f, +0.85772862f, 0.85604161f, 0.85434547f, 0.85264014f, 0.85092572f, +0.84920218f, 0.84746955f, 0.84572781f, 0.84397704f, 0.84221721f, +0.84044838f, 0.83867056f, 0.83688375f, 0.83508799f, 0.83328325f, +0.83146961f, 0.82964704f, 0.82781562f, 0.82597530f, 0.82412620f, +0.82226820f, 0.82040144f, 0.81852589f, 0.81664154f, 0.81474847f, +0.81284665f, 0.81093620f, 0.80901698f, 0.80708914f, 0.80515262f, +0.80320752f, 0.80125378f, 0.79929149f, 0.79732067f, 0.79534125f, +0.79335335f, 0.79135691f, 0.78935204f, 0.78733867f, 0.78531691f, +0.78328674f, 0.78124818f, 0.77920122f, 0.77714595f, 0.77508232f, +0.77301043f, 0.77093026f, 0.76884183f, 0.76674517f, 0.76464026f, +0.76252720f, 0.76040593f, 0.75827656f, 0.75613907f, 0.75399349f, +0.75183978f, 0.74967807f, 0.74750833f, 0.74533054f, 0.74314481f, +0.74095112f, 0.73874950f, 0.73653993f, 0.73432251f, 0.73209718f, +0.72986405f, 0.72762307f, 0.72537438f, 0.72311787f, 0.72085359f, +0.71858162f, 0.71630192f, 0.71401459f, 0.71171956f, 0.70941701f, +0.70710677f, 0.70478900f, 0.70246363f, 0.70013079f, 0.69779041f, +0.69544260f, 0.69308738f, 0.69072466f, 0.68835458f, 0.68597709f, +0.68359229f, 0.68120013f, 0.67880072f, 0.67639404f, 0.67398011f, +0.67155892f, 0.66913059f, 0.66669509f, 0.66425240f, 0.66180265f, +0.65934581f, 0.65688191f, 0.65441092f, 0.65193298f, 0.64944801f, +0.64695613f, 0.64445727f, 0.64195160f, 0.63943902f, 0.63691954f, +0.63439328f, 0.63186019f, 0.62932037f, 0.62677377f, 0.62422055f, +0.62166055f, 0.61909394f, 0.61652065f, 0.61394081f, 0.61135435f, +0.60876139f, 0.60616195f, 0.60355593f, 0.60094349f, 0.59832457f, +0.59569929f, 0.59306758f, 0.59042957f, 0.58778523f, 0.58513460f, +0.58247766f, 0.57981452f, 0.57714518f, 0.57446961f, 0.57178793f, +0.56910013f, 0.56640624f, 0.56370623f, 0.56100023f, 0.55828818f, +0.55557020f, 0.55284627f, 0.55011641f, 0.54738067f, 0.54463901f, +0.54189157f, 0.53913828f, 0.53637921f, 0.53361450f, 0.53084398f, +0.52806787f, 0.52528601f, 0.52249852f, 0.51970543f, 0.51690688f, +0.51410279f, 0.51129310f, 0.50847793f, 0.50565732f, 0.50283139f, +0.49999997f, 0.49716321f, 0.49432122f, 0.49147383f, 0.48862118f, +0.48576340f, 0.48290042f, 0.48003216f, 0.47715876f, 0.47428025f, +0.47139677f, 0.46850813f, 0.46561448f, 0.46271584f, 0.45981235f, +0.45690383f, 0.45399042f, 0.45107214f, 0.44814915f, 0.44522124f, +0.44228868f, 0.43935137f, 0.43640926f, 0.43346247f, 0.43051104f, +0.42755511f, 0.42459449f, 0.42162932f, 0.41865964f, 0.41568558f, +0.41270697f, 0.40972393f, 0.40673661f, 0.40374494f, 0.40074884f, +0.39774844f, 0.39474390f, 0.39173501f, 0.38872193f, 0.38570469f, +0.38268343f, 0.37965796f, 0.37662842f, 0.37359496f, 0.37055739f, +0.36751585f, 0.36447038f, 0.36142122f, 0.35836797f, 0.35531089f, +0.35225000f, 0.34918544f, 0.34611704f, 0.34304493f, 0.33996926f, +0.33688983f, 0.33380680f, 0.33072019f, 0.32763015f, 0.32453650f, +0.32143936f, 0.31833890f, 0.31523503f, 0.31212767f, 0.30901696f, +0.30590306f, 0.30278577f, 0.29966524f, 0.29654150f, 0.29341470f, +0.29028464f, 0.28715147f, 0.28401522f, 0.28087605f, 0.27773376f, +0.27458861f, 0.27144052f, 0.26828940f, 0.26513541f, 0.26197859f, +0.25881907f, 0.25565666f, 0.25249152f, 0.24932367f, 0.24615327f, +0.24298012f, 0.23980436f, 0.23662604f, 0.23344530f, 0.23026206f, +0.22707623f, 0.22388809f, 0.22069744f, 0.21750443f, 0.21430908f, +0.21111156f, 0.20791165f, 0.20470953f, 0.20150520f, 0.19829884f, +0.19509024f, 0.19187955f, 0.18866692f, 0.18545227f, 0.18223552f, +0.17901681f, 0.17579631f, 0.17257380f, 0.16934945f, 0.16612328f, +0.16289546f, 0.15966577f, 0.15643437f, 0.15320141f, 0.14996669f, +0.14673037f, 0.14349260f, 0.14025329f, 0.13701235f, 0.13376995f, +0.13052612f, 0.12728101f, 0.12403442f, 0.12078650f, 0.11753740f, +0.11428693f, 0.11103523f, 0.10778234f, 0.10452842f, 0.10127326f, +0.098017137f, 0.094759842f, 0.091501652f, 0.088242363f, 0.084982129f, +0.081721103f, 0.078459084f, 0.075196224f, 0.071932560f, 0.068668243f, +0.065403073f, 0.062137201f, 0.058870665f, 0.055603617f, 0.052335974f, +0.049067651f, 0.045798921f, 0.042529582f, 0.039259788f, 0.035989573f, +0.032719092f, 0.029448142f, 0.026176876f, 0.022905329f, 0.019633657f, +0.016361655f, 0.013089478f, 0.0098171604f, 0.0065449764f, 0.0032724839f, +-4.3711390e-08f, }; +#endif + +static const CELTMode mode48000_960_120 = { +48000, /* Fs */ +120, /* overlap */ +21, /* nbEBands */ +21, /* effEBands */ +{0.85000610f, 0.0000000f, 1.0000000f, 1.0000000f, }, /* preemph */ +eband5ms, /* eBands */ +3, /* maxLM */ +8, /* nbShortMdcts */ +120, /* shortMdctSize */ +11, /* nbAllocVectors */ +band_allocation, /* allocVectors */ +logN400, /* logN */ +window120, /* window */ +{1920, 3, {&fft_state48000_960_0, &fft_state48000_960_1, &fft_state48000_960_2, &fft_state48000_960_3, }, mdct_twiddles960}, /* mdct */ +{392, cache_index50, cache_bits50, cache_caps50}, /* cache */ +}; + +/* List of all the available modes */ +#define TOTAL_MODES 1 +static const CELTMode * const static_mode_list[TOTAL_MODES] = { +&mode48000_960_120, +}; diff --git a/drivers/opus/celt/tests/test_unit_cwrs32.c b/drivers/opus/celt/tests/test_unit_cwrs32.c new file mode 100644 index 0000000000..9cf124336a --- /dev/null +++ b/drivers/opus/celt/tests/test_unit_cwrs32.c @@ -0,0 +1,161 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation, + Gregory Maxwell + Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include <stdio.h> +#include <string.h> + +#ifndef CUSTOM_MODES +#define CUSTOM_MODES +#else +#define TEST_CUSTOM_MODES +#endif + +#define CELT_C +#include "stack_alloc.h" +#include "entenc.c" +#include "entdec.c" +#include "entcode.c" +#include "cwrs.c" +#include "mathops.c" +#include "rate.h" + +#define NMAX (240) +#define KMAX (128) + +#ifdef TEST_CUSTOM_MODES + +#define NDIMS (44) +static const int pn[NDIMS]={ + 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 18, 20, 22, + 24, 26, 28, 30, 32, 36, 40, 44, 48, + 52, 56, 60, 64, 72, 80, 88, 96, 104, + 112, 120, 128, 144, 160, 176, 192, 208 +}; +static const int pkmax[NDIMS]={ + 128, 128, 128, 128, 88, 52, 36, 26, 22, + 18, 16, 15, 13, 12, 12, 11, 10, 9, + 9, 8, 8, 7, 7, 7, 7, 6, 6, + 6, 6, 6, 5, 5, 5, 5, 5, 5, + 4, 4, 4, 4, 4, 4, 4, 4 +}; + +#else /* TEST_CUSTOM_MODES */ + +#define NDIMS (22) +static const int pn[NDIMS]={ + 2, 3, 4, 6, 8, 9, 11, 12, 16, + 18, 22, 24, 32, 36, 44, 48, 64, 72, + 88, 96, 144, 176 +}; +static const int pkmax[NDIMS]={ + 128, 128, 128, 88, 36, 26, 18, 16, 12, + 11, 9, 9, 7, 7, 6, 6, 5, 5, + 5, 5, 4, 4 +}; + +#endif + +int main(void){ + int t; + int n; + ALLOC_STACK; + for(t=0;t<NDIMS;t++){ + int pseudo; + n=pn[t]; + for(pseudo=1;pseudo<41;pseudo++) + { + int k; +#if defined(SMALL_FOOTPRINT) + opus_uint32 uu[KMAX+2U]; +#endif + opus_uint32 inc; + opus_uint32 nc; + opus_uint32 i; + k=get_pulses(pseudo); + if (k>pkmax[t])break; + printf("Testing CWRS with N=%i, K=%i...\n",n,k); +#if defined(SMALL_FOOTPRINT) + nc=ncwrs_urow(n,k,uu); +#else + nc=CELT_PVQ_V(n,k); +#endif + inc=nc/20000; + if(inc<1)inc=1; + for(i=0;i<nc;i+=inc){ +#if defined(SMALL_FOOTPRINT) + opus_uint32 u[KMAX+2U]; +#endif + int y[NMAX]; + int sy; + opus_uint32 v; + opus_uint32 ii; + int j; +#if defined(SMALL_FOOTPRINT) + memcpy(u,uu,(k+2U)*sizeof(*u)); + cwrsi(n,k,i,y,u); +#else + cwrsi(n,k,i,y); +#endif + sy=0; + for(j=0;j<n;j++)sy+=ABS(y[j]); + if(sy!=k){ + fprintf(stderr,"N=%d Pulse count mismatch in cwrsi (%d!=%d).\n", + n,sy,k); + return 99; + } + /*printf("%6u of %u:",i,nc); + for(j=0;j<n;j++)printf(" %+3i",y[j]); + printf(" ->");*/ +#if defined(SMALL_FOOTPRINT) + ii=icwrs(n,k,&v,y,u); +#else + ii=icwrs(n,y); + v=CELT_PVQ_V(n,k); +#endif + if(ii!=i){ + fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n", + (long)ii,(long)i); + return 1; + } + if(v!=nc){ + fprintf(stderr,"Combination count mismatch (%lu!=%lu).\n", + (long)v,(long)nc); + return 2; + } + /*printf(" %6u\n",i);*/ + } + /*printf("\n");*/ + } + } + return 0; +} diff --git a/drivers/opus/celt/tests/test_unit_dft.c b/drivers/opus/celt/tests/test_unit_dft.c new file mode 100644 index 0000000000..4a00013b2a --- /dev/null +++ b/drivers/opus/celt/tests/test_unit_dft.c @@ -0,0 +1,164 @@ +/* Copyright (c) 2008 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#define SKIP_CONFIG_H + +#ifndef CUSTOM_MODES +#define CUSTOM_MODES +#endif + +#include <stdio.h> + +#define CELT_C +#include "stack_alloc.h" +#include "kiss_fft.h" +#include "kiss_fft.c" +#include "mathops.c" +#include "entcode.c" + + +#ifndef M_PI +#define M_PI 3.141592653 +#endif + +int ret = 0; + +void check(kiss_fft_cpx * in,kiss_fft_cpx * out,int nfft,int isinverse) +{ + int bin,k; + double errpow=0,sigpow=0, snr; + + for (bin=0;bin<nfft;++bin) { + double ansr = 0; + double ansi = 0; + double difr; + double difi; + + for (k=0;k<nfft;++k) { + double phase = -2*M_PI*bin*k/nfft; + double re = cos(phase); + double im = sin(phase); + if (isinverse) + im = -im; + + if (!isinverse) + { + re /= nfft; + im /= nfft; + } + + ansr += in[k].r * re - in[k].i * im; + ansi += in[k].r * im + in[k].i * re; + } + /*printf ("%d %d ", (int)ansr, (int)ansi);*/ + difr = ansr - out[bin].r; + difi = ansi - out[bin].i; + errpow += difr*difr + difi*difi; + sigpow += ansr*ansr+ansi*ansi; + } + snr = 10*log10(sigpow/errpow); + printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr ); + if (snr<60) { + printf( "** poor snr: %f ** \n", snr); + ret = 1; + } +} + +void test1d(int nfft,int isinverse) +{ + size_t buflen = sizeof(kiss_fft_cpx)*nfft; + + kiss_fft_cpx * in = (kiss_fft_cpx*)malloc(buflen); + kiss_fft_cpx * out= (kiss_fft_cpx*)malloc(buflen); + kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0); + int k; + + for (k=0;k<nfft;++k) { + in[k].r = (rand() % 32767) - 16384; + in[k].i = (rand() % 32767) - 16384; + } + + for (k=0;k<nfft;++k) { + in[k].r *= 32768; + in[k].i *= 32768; + } + + if (isinverse) + { + for (k=0;k<nfft;++k) { + in[k].r /= nfft; + in[k].i /= nfft; + } + } + + /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/ + + if (isinverse) + opus_ifft(cfg,in,out); + else + opus_fft(cfg,in,out); + + /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/ + + check(in,out,nfft,isinverse); + + free(in); + free(out); + free(cfg); +} + +int main(int argc,char ** argv) +{ + ALLOC_STACK; + if (argc>1) { + int k; + for (k=1;k<argc;++k) { + test1d(atoi(argv[k]),0); + test1d(atoi(argv[k]),1); + } + }else{ + test1d(32,0); + test1d(32,1); + test1d(128,0); + test1d(128,1); + test1d(256,0); + test1d(256,1); +#ifndef RADIX_TWO_ONLY + test1d(36,0); + test1d(36,1); + test1d(50,0); + test1d(50,1); + test1d(120,0); + test1d(120,1); +#endif + } + return ret; +} diff --git a/drivers/opus/celt/tests/test_unit_entropy.c b/drivers/opus/celt/tests/test_unit_entropy.c new file mode 100644 index 0000000000..62268b1564 --- /dev/null +++ b/drivers/opus/celt/tests/test_unit_entropy.c @@ -0,0 +1,382 @@ +/* Copyright (c) 2007-2011 Xiph.Org Foundation, Mozilla Corporation, + Gregory Maxwell + Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <time.h> +#include "entcode.h" +#include "entenc.h" +#include "entdec.h" +#include <string.h> + +#include "entenc.c" +#include "entdec.c" +#include "entcode.c" + +#ifndef M_LOG2E +# define M_LOG2E 1.4426950408889634074 +#endif +#define DATA_SIZE 10000000 +#define DATA_SIZE2 10000 + +int main(int _argc,char **_argv){ + ec_enc enc; + ec_dec dec; + long nbits; + long nbits2; + double entropy; + int ft; + int ftb; + int sz; + int i; + int ret; + unsigned int sym; + unsigned int seed; + unsigned char *ptr; + const char *env_seed; + ret=0; + entropy=0; + if (_argc > 2) { + fprintf(stderr, "Usage: %s [<seed>]\n", _argv[0]); + return 1; + } + env_seed = getenv("SEED"); + if (_argc > 1) + seed = atoi(_argv[1]); + else if (env_seed) + seed = atoi(env_seed); + else + seed = time(NULL); + /*Testing encoding of raw bit values.*/ + ptr = (unsigned char *)malloc(DATA_SIZE); + ec_enc_init(&enc,ptr, DATA_SIZE); + for(ft=2;ft<1024;ft++){ + for(i=0;i<ft;i++){ + entropy+=log(ft)*M_LOG2E; + ec_enc_uint(&enc,i,ft); + } + } + /*Testing encoding of raw bit values.*/ + for(ftb=1;ftb<16;ftb++){ + for(i=0;i<(1<<ftb);i++){ + entropy+=ftb; + nbits=ec_tell(&enc); + ec_enc_bits(&enc,i,ftb); + nbits2=ec_tell(&enc); + if(nbits2-nbits!=ftb){ + fprintf(stderr,"Used %li bits to encode %i bits directly.\n", + nbits2-nbits,ftb); + ret=-1; + } + } + } + nbits=ec_tell_frac(&enc); + ec_enc_done(&enc); + fprintf(stderr, + "Encoded %0.2lf bits of entropy to %0.2lf bits (%0.3lf%% wasted).\n", + entropy,ldexp(nbits,-3),100*(nbits-ldexp(entropy,3))/nbits); + fprintf(stderr,"Packed to %li bytes.\n",(long)ec_range_bytes(&enc)); + ec_dec_init(&dec,ptr,DATA_SIZE); + for(ft=2;ft<1024;ft++){ + for(i=0;i<ft;i++){ + sym=ec_dec_uint(&dec,ft); + if(sym!=(unsigned)i){ + fprintf(stderr,"Decoded %i instead of %i with ft of %i.\n",sym,i,ft); + ret=-1; + } + } + } + for(ftb=1;ftb<16;ftb++){ + for(i=0;i<(1<<ftb);i++){ + sym=ec_dec_bits(&dec,ftb); + if(sym!=(unsigned)i){ + fprintf(stderr,"Decoded %i instead of %i with ftb of %i.\n",sym,i,ftb); + ret=-1; + } + } + } + nbits2=ec_tell_frac(&dec); + if(nbits!=nbits2){ + fprintf(stderr, + "Reported number of bits used was %0.2lf, should be %0.2lf.\n", + ldexp(nbits2,-3),ldexp(nbits,-3)); + ret=-1; + } + /*Testing an encoder bust prefers range coder data over raw bits. + This isn't a general guarantee, will only work for data that is buffered in + the encoder state and not yet stored in the user buffer, and should never + get used in practice. + It's mostly here for code coverage completeness.*/ + /*Start with a 16-bit buffer.*/ + ec_enc_init(&enc,ptr,2); + /*Write 7 raw bits.*/ + ec_enc_bits(&enc,0x55,7); + /*Write 12.3 bits of range coder data.*/ + ec_enc_uint(&enc,1,2); + ec_enc_uint(&enc,1,3); + ec_enc_uint(&enc,1,4); + ec_enc_uint(&enc,1,5); + ec_enc_uint(&enc,2,6); + ec_enc_uint(&enc,6,7); + ec_enc_done(&enc); + ec_dec_init(&dec,ptr,2); + if(!enc.error + /*The raw bits should have been overwritten by the range coder data.*/ + ||ec_dec_bits(&dec,7)!=0x05 + /*And all the range coder data should have been encoded correctly.*/ + ||ec_dec_uint(&dec,2)!=1 + ||ec_dec_uint(&dec,3)!=1 + ||ec_dec_uint(&dec,4)!=1 + ||ec_dec_uint(&dec,5)!=1 + ||ec_dec_uint(&dec,6)!=2 + ||ec_dec_uint(&dec,7)!=6){ + fprintf(stderr,"Encoder bust overwrote range coder data with raw bits.\n"); + ret=-1; + } + srand(seed); + fprintf(stderr,"Testing random streams... Random seed: %u (%.4X)\n", seed, rand() % 65536); + for(i=0;i<409600;i++){ + unsigned *data; + unsigned *tell; + unsigned tell_bits; + int j; + int zeros; + ft=rand()/((RAND_MAX>>(rand()%11U))+1U)+10; + sz=rand()/((RAND_MAX>>(rand()%9U))+1U); + data=(unsigned *)malloc(sz*sizeof(*data)); + tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); + ec_enc_init(&enc,ptr,DATA_SIZE2); + zeros = rand()%13==0; + tell[0]=ec_tell_frac(&enc); + for(j=0;j<sz;j++){ + if (zeros) + data[j]=0; + else + data[j]=rand()%ft; + ec_enc_uint(&enc,data[j],ft); + tell[j+1]=ec_tell_frac(&enc); + } + if (rand()%2==0) + while(ec_tell(&enc)%8 != 0) + ec_enc_uint(&enc, rand()%2, 2); + tell_bits = ec_tell(&enc); + ec_enc_done(&enc); + if(tell_bits!=(unsigned)ec_tell(&enc)){ + fprintf(stderr,"ec_tell() changed after ec_enc_done(): %i instead of %i (Random seed: %u)\n", + ec_tell(&enc),tell_bits,seed); + ret=-1; + } + if ((tell_bits+7)/8 < ec_range_bytes(&enc)) + { + fprintf (stderr, "ec_tell() lied, there's %i bytes instead of %d (Random seed: %u)\n", + ec_range_bytes(&enc), (tell_bits+7)/8,seed); + ret=-1; + } + ec_dec_init(&dec,ptr,DATA_SIZE2); + if(ec_tell_frac(&dec)!=tell[0]){ + fprintf(stderr, + "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", + 0,ec_tell_frac(&dec),tell[0],seed); + } + for(j=0;j<sz;j++){ + sym=ec_dec_uint(&dec,ft); + if(sym!=data[j]){ + fprintf(stderr, + "Decoded %i instead of %i with ft of %i at position %i of %i (Random seed: %u).\n", + sym,data[j],ft,j,sz,seed); + ret=-1; + } + if(ec_tell_frac(&dec)!=tell[j+1]){ + fprintf(stderr, + "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", + j+1,ec_tell_frac(&dec),tell[j+1],seed); + } + } + free(tell); + free(data); + } + /*Test compatibility between multiple different encode/decode routines.*/ + for(i=0;i<409600;i++){ + unsigned *logp1; + unsigned *data; + unsigned *tell; + unsigned *enc_method; + int j; + sz=rand()/((RAND_MAX>>(rand()%9U))+1U); + logp1=(unsigned *)malloc(sz*sizeof(*logp1)); + data=(unsigned *)malloc(sz*sizeof(*data)); + tell=(unsigned *)malloc((sz+1)*sizeof(*tell)); + enc_method=(unsigned *)malloc(sz*sizeof(*enc_method)); + ec_enc_init(&enc,ptr,DATA_SIZE2); + tell[0]=ec_tell_frac(&enc); + for(j=0;j<sz;j++){ + data[j]=rand()/((RAND_MAX>>1)+1); + logp1[j]=(rand()%15)+1; + enc_method[j]=rand()/((RAND_MAX>>2)+1); + switch(enc_method[j]){ + case 0:{ + ec_encode(&enc,data[j]?(1<<logp1[j])-1:0, + (1<<logp1[j])-(data[j]?0:1),1<<logp1[j]); + }break; + case 1:{ + ec_encode_bin(&enc,data[j]?(1<<logp1[j])-1:0, + (1<<logp1[j])-(data[j]?0:1),logp1[j]); + }break; + case 2:{ + ec_enc_bit_logp(&enc,data[j],logp1[j]); + }break; + case 3:{ + unsigned char icdf[2]; + icdf[0]=1; + icdf[1]=0; + ec_enc_icdf(&enc,data[j],icdf,logp1[j]); + }break; + } + tell[j+1]=ec_tell_frac(&enc); + } + ec_enc_done(&enc); + if((ec_tell(&enc)+7U)/8U<ec_range_bytes(&enc)){ + fprintf(stderr,"tell() lied, there's %i bytes instead of %d (Random seed: %u)\n", + ec_range_bytes(&enc),(ec_tell(&enc)+7)/8,seed); + ret=-1; + } + ec_dec_init(&dec,ptr,DATA_SIZE2); + if(ec_tell_frac(&dec)!=tell[0]){ + fprintf(stderr, + "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", + 0,ec_tell_frac(&dec),tell[0],seed); + } + for(j=0;j<sz;j++){ + int fs; + int dec_method; + dec_method=rand()/((RAND_MAX>>2)+1); + switch(dec_method){ + case 0:{ + fs=ec_decode(&dec,1<<logp1[j]); + sym=fs>=(1<<logp1[j])-1; + ec_dec_update(&dec,sym?(1<<logp1[j])-1:0, + (1<<logp1[j])-(sym?0:1),1<<logp1[j]); + }break; + case 1:{ + fs=ec_decode_bin(&dec,logp1[j]); + sym=fs>=(1<<logp1[j])-1; + ec_dec_update(&dec,sym?(1<<logp1[j])-1:0, + (1<<logp1[j])-(sym?0:1),1<<logp1[j]); + }break; + case 2:{ + sym=ec_dec_bit_logp(&dec,logp1[j]); + }break; + case 3:{ + unsigned char icdf[2]; + icdf[0]=1; + icdf[1]=0; + sym=ec_dec_icdf(&dec,icdf,logp1[j]); + }break; + } + if(sym!=data[j]){ + fprintf(stderr, + "Decoded %i instead of %i with logp1 of %i at position %i of %i (Random seed: %u).\n", + sym,data[j],logp1[j],j,sz,seed); + fprintf(stderr,"Encoding method: %i, decoding method: %i\n", + enc_method[j],dec_method); + ret=-1; + } + if(ec_tell_frac(&dec)!=tell[j+1]){ + fprintf(stderr, + "Tell mismatch between encoder and decoder at symbol %i: %i instead of %i (Random seed: %u).\n", + j+1,ec_tell_frac(&dec),tell[j+1],seed); + } + } + free(enc_method); + free(tell); + free(data); + free(logp1); + } + ec_enc_init(&enc,ptr,DATA_SIZE2); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,0,2); + ec_enc_patch_initial_bits(&enc,3,2); + if(enc.error){ + fprintf(stderr,"patch_initial_bits failed"); + ret=-1; + } + ec_enc_patch_initial_bits(&enc,0,5); + if(!enc.error){ + fprintf(stderr,"patch_initial_bits didn't fail when it should have"); + ret=-1; + } + ec_enc_done(&enc); + if(ec_range_bytes(&enc)!=1||ptr[0]!=192){ + fprintf(stderr,"Got %d when expecting 192 for patch_initial_bits",ptr[0]); + ret=-1; + } + ec_enc_init(&enc,ptr,DATA_SIZE2); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,0,1); + ec_enc_bit_logp(&enc,1,6); + ec_enc_bit_logp(&enc,0,2); + ec_enc_patch_initial_bits(&enc,0,2); + if(enc.error){ + fprintf(stderr,"patch_initial_bits failed"); + ret=-1; + } + ec_enc_done(&enc); + if(ec_range_bytes(&enc)!=2||ptr[0]!=63){ + fprintf(stderr,"Got %d when expecting 63 for patch_initial_bits",ptr[0]); + ret=-1; + } + ec_enc_init(&enc,ptr,2); + ec_enc_bit_logp(&enc,0,2); + for(i=0;i<48;i++){ + ec_enc_bits(&enc,0,1); + } + ec_enc_done(&enc); + if(!enc.error){ + fprintf(stderr,"Raw bits overfill didn't fail when it should have"); + ret=-1; + } + ec_enc_init(&enc,ptr,2); + for(i=0;i<17;i++){ + ec_enc_bits(&enc,0,1); + } + ec_enc_done(&enc); + if(!enc.error){ + fprintf(stderr,"17 raw bits encoded in two bytes"); + ret=-1; + } + free(ptr); + return ret; +} diff --git a/drivers/opus/celt/tests/test_unit_laplace.c b/drivers/opus/celt/tests/test_unit_laplace.c new file mode 100644 index 0000000000..af7d471045 --- /dev/null +++ b/drivers/opus/celt/tests/test_unit_laplace.c @@ -0,0 +1,92 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation + Written by Jean-Marc Valin and Timothy B. Terriberry */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include "laplace.h" +#define CELT_C +#include "stack_alloc.h" + +#include "entenc.c" +#include "entdec.c" +#include "entcode.c" +#include "laplace.c" + +#define DATA_SIZE 40000 + +int ec_laplace_get_start_freq(int decay) +{ + opus_uint32 ft = 32768 - LAPLACE_MINP*(2*LAPLACE_NMIN+1); + int fs = (ft*(16384-decay))/(16384+decay); + return fs+LAPLACE_MINP; +} + +int main(void) +{ + int i; + int ret = 0; + ec_enc enc; + ec_dec dec; + unsigned char *ptr; + int val[10000], decay[10000]; + ALLOC_STACK; + ptr = (unsigned char *)malloc(DATA_SIZE); + ec_enc_init(&enc,ptr,DATA_SIZE); + + val[0] = 3; decay[0] = 6000; + val[1] = 0; decay[1] = 5800; + val[2] = -1; decay[2] = 5600; + for (i=3;i<10000;i++) + { + val[i] = rand()%15-7; + decay[i] = rand()%11000+5000; + } + for (i=0;i<10000;i++) + ec_laplace_encode(&enc, &val[i], + ec_laplace_get_start_freq(decay[i]), decay[i]); + + ec_enc_done(&enc); + + ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc)); + + for (i=0;i<10000;i++) + { + int d = ec_laplace_decode(&dec, + ec_laplace_get_start_freq(decay[i]), decay[i]); + if (d != val[i]) + { + fprintf (stderr, "Got %d instead of %d\n", d, val[i]); + ret = 1; + } + } + + return ret; +} diff --git a/drivers/opus/celt/tests/test_unit_mathops.c b/drivers/opus/celt/tests/test_unit_mathops.c new file mode 100644 index 0000000000..36d6a4bfb4 --- /dev/null +++ b/drivers/opus/celt/tests/test_unit_mathops.c @@ -0,0 +1,275 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation, + Gregory Maxwell + Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#ifndef CUSTOM_MODES +#define CUSTOM_MODES +#endif + +#define CELT_C + +#include "mathops.c" +#include "entenc.c" +#include "entdec.c" +#include "entcode.c" +#include "bands.c" +#include "quant_bands.c" +#include "laplace.c" +#include "vq.c" +#include "cwrs.c" +#include <stdio.h> +#include <math.h> + +#ifdef OPUS_FIXED_POINT +#define WORD "%d" +#else +#define WORD "%f" +#endif + +int ret = 0; + +void testdiv(void) +{ + opus_int32 i; + for (i=1;i<=327670;i++) + { + double prod; + opus_val32 val; + val = celt_rcp(i); +#ifdef OPUS_FIXED_POINT + prod = (1./32768./65526.)*val*i; +#else + prod = val*i; +#endif + if (fabs(prod-1) > .00025) + { + fprintf (stderr, "div failed: 1/%d="WORD" (product = %f)\n", i, val, prod); + ret = 1; + } + } +} + +void testsqrt(void) +{ + opus_int32 i; + for (i=1;i<=1000000000;i++) + { + double ratio; + opus_val16 val; + val = celt_sqrt(i); + ratio = val/sqrt(i); + if (fabs(ratio - 1) > .0005 && fabs(val-sqrt(i)) > 2) + { + fprintf (stderr, "sqrt failed: sqrt(%d)="WORD" (ratio = %f)\n", i, val, ratio); + ret = 1; + } + i+= i>>10; + } +} + +void testbitexactcos(void) +{ + int i; + opus_int32 min_d,max_d,last,chk; + chk=max_d=0; + last=min_d=32767; + for(i=64;i<=16320;i++) + { + opus_int32 d; + opus_int32 q=bitexact_cos(i); + chk ^= q*i; + d = last - q; + if (d>max_d)max_d=d; + if (d<min_d)min_d=d; + last = q; + } + if ((chk!=89408644)||(max_d!=5)||(min_d!=0)||(bitexact_cos(64)!=32767)|| + (bitexact_cos(16320)!=200)||(bitexact_cos(8192)!=23171)) + { + fprintf (stderr, "bitexact_cos failed\n"); + ret = 1; + } +} + +void testbitexactlog2tan(void) +{ + int i,fail; + opus_int32 min_d,max_d,last,chk; + fail=chk=max_d=0; + last=min_d=15059; + for(i=64;i<8193;i++) + { + opus_int32 d; + opus_int32 mid=bitexact_cos(i); + opus_int32 side=bitexact_cos(16384-i); + opus_int32 q=bitexact_log2tan(mid,side); + chk ^= q*i; + d = last - q; + if (q!=-1*bitexact_log2tan(side,mid)) + fail = 1; + if (d>max_d)max_d=d; + if (d<min_d)min_d=d; + last = q; + } + if ((chk!=15821257)||(max_d!=61)||(min_d!=-2)||fail|| + (bitexact_log2tan(32767,200)!=15059)||(bitexact_log2tan(30274,12540)!=2611)|| + (bitexact_log2tan(23171,23171)!=0)) + { + fprintf (stderr, "bitexact_log2tan failed\n"); + ret = 1; + } +} + +#ifndef OPUS_FIXED_POINT +void testlog2(void) +{ + float x; + for (x=0.001;x<1677700.0;x+=(x/8.0)) + { + float error = fabs((1.442695040888963387*log(x))-celt_log2(x)); + if (error>0.0009) + { + fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error); + ret = 1; + } + } +} + +void testexp2(void) +{ + float x; + for (x=-11.0;x<24.0;x+=0.0007) + { + float error = fabs(x-(1.442695040888963387*log(celt_exp2(x)))); + if (error>0.0002) + { + fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error); + ret = 1; + } + } +} + +void testexp2log2(void) +{ + float x; + for (x=-11.0;x<24.0;x+=0.0007) + { + float error = fabs(x-(celt_log2(celt_exp2(x)))); + if (error>0.001) + { + fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %f, error = %f)\n", x,error); + ret = 1; + } + } +} +#else +void testlog2(void) +{ + opus_val32 x; + for (x=8;x<1073741824;x+=(x>>3)) + { + float error = fabs((1.442695040888963387*log(x/16384.0))-celt_log2(x)/1024.0); + if (error>0.003) + { + fprintf (stderr, "celt_log2 failed: x = %ld, error = %f\n", (long)x,error); + ret = 1; + } + } +} + +void testexp2(void) +{ + opus_val16 x; + for (x=-32768;x<15360;x++) + { + float error1 = fabs(x/1024.0-(1.442695040888963387*log(celt_exp2(x)/65536.0))); + float error2 = fabs(exp(0.6931471805599453094*x/1024.0)-celt_exp2(x)/65536.0); + if (error1>0.0002&&error2>0.00004) + { + fprintf (stderr, "celt_exp2 failed: x = "WORD", error1 = %f, error2 = %f\n", x,error1,error2); + ret = 1; + } + } +} + +void testexp2log2(void) +{ + opus_val32 x; + for (x=8;x<65536;x+=(x>>3)) + { + float error = fabs(x-0.25*celt_exp2(celt_log2(x)))/16384; + if (error>0.004) + { + fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_exp2(celt_log2(x))))>0.001 (x = %ld, error = %f)\n", (long)x,error); + ret = 1; + } + } +} + +void testilog2(void) +{ + opus_val32 x; + for (x=1;x<=268435455;x+=127) + { + opus_val32 lg; + opus_val32 y; + + lg = celt_ilog2(x); + if (lg<0 || lg>=31) + { + printf("celt_ilog2 failed: 0<=celt_ilog2(x)<31 (x = %d, celt_ilog2(x) = %d)\n",x,lg); + ret = 1; + } + y = 1<<lg; + + if (x<y || (x>>1)>=y) + { + printf("celt_ilog2 failed: 2**celt_ilog2(x)<=x<2**(celt_ilog2(x)+1) (x = %d, 2**celt_ilog2(x) = %d)\n",x,y); + ret = 1; + } + } +} +#endif + +int main(void) +{ + testbitexactcos(); + testbitexactlog2tan(); + testdiv(); + testsqrt(); + testlog2(); + testexp2(); + testexp2log2(); +#ifdef OPUS_FIXED_POINT + testilog2(); +#endif + return ret; +} diff --git a/drivers/opus/celt/tests/test_unit_mdct.c b/drivers/opus/celt/tests/test_unit_mdct.c new file mode 100644 index 0000000000..e3b5eec11c --- /dev/null +++ b/drivers/opus/celt/tests/test_unit_mdct.c @@ -0,0 +1,210 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#define SKIP_CONFIG_H + +#ifndef CUSTOM_MODES +#define CUSTOM_MODES +#endif + +#include <stdio.h> + +#define CELT_C +#include "mdct.h" +#include "stack_alloc.h" + +#include "kiss_fft.c" +#include "mdct.c" +#include "mathops.c" +#include "entcode.c" + +#ifndef M_PI +#define M_PI 3.141592653 +#endif + +int ret = 0; +void check(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse) +{ + int bin,k; + double errpow=0,sigpow=0; + double snr; + for (bin=0;bin<nfft/2;++bin) { + double ansr = 0; + double difr; + + for (k=0;k<nfft;++k) { + double phase = 2*M_PI*(k+.5+.25*nfft)*(bin+.5)/nfft; + double re = cos(phase); + + re /= nfft/4; + + ansr += in[k] * re; + } + /*printf ("%f %f\n", ansr, out[bin]);*/ + difr = ansr - out[bin]; + errpow += difr*difr; + sigpow += ansr*ansr; + } + snr = 10*log10(sigpow/errpow); + printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr ); + if (snr<60) { + printf( "** poor snr: %f **\n", snr); + ret = 1; + } +} + +void check_inv(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinverse) +{ + int bin,k; + double errpow=0,sigpow=0; + double snr; + for (bin=0;bin<nfft;++bin) { + double ansr = 0; + double difr; + + for (k=0;k<nfft/2;++k) { + double phase = 2*M_PI*(bin+.5+.25*nfft)*(k+.5)/nfft; + double re = cos(phase); + + /*re *= 2;*/ + + ansr += in[k] * re; + } + /*printf ("%f %f\n", ansr, out[bin]);*/ + difr = ansr - out[bin]; + errpow += difr*difr; + sigpow += ansr*ansr; + } + snr = 10*log10(sigpow/errpow); + printf("nfft=%d inverse=%d,snr = %f\n",nfft,isinverse,snr ); + if (snr<60) { + printf( "** poor snr: %f **\n", snr); + ret = 1; + } +} + + +void test1d(int nfft,int isinverse) +{ + celt_mdct_lookup cfg; + size_t buflen = sizeof(kiss_fft_scalar)*nfft; + + kiss_fft_scalar * in = (kiss_fft_scalar*)malloc(buflen); + kiss_fft_scalar * in_copy = (kiss_fft_scalar*)malloc(buflen); + kiss_fft_scalar * out= (kiss_fft_scalar*)malloc(buflen); + opus_val16 * window= (opus_val16*)malloc(sizeof(opus_val16)*nfft/2); + int k; + + clt_mdct_init(&cfg, nfft, 0); + for (k=0;k<nfft;++k) { + in[k] = (rand() % 32768) - 16384; + } + + for (k=0;k<nfft/2;++k) { + window[k] = Q15ONE; + } + for (k=0;k<nfft;++k) { + in[k] *= 32768; + } + + if (isinverse) + { + for (k=0;k<nfft;++k) { + in[k] /= nfft; + } + } + + for (k=0;k<nfft;++k) + in_copy[k] = in[k]; + /*for (k=0;k<nfft;++k) printf("%d %d ", in[k].r, in[k].i);printf("\n");*/ + + if (isinverse) + { + for (k=0;k<nfft;++k) + out[k] = 0; + clt_mdct_backward(&cfg,in,out, window, nfft/2, 0, 1); + /* apply TDAC because clt_mdct_backward() no longer does that */ + for (k=0;k<nfft/4;++k) + out[nfft-k-1] = out[nfft/2+k]; + check_inv(in,out,nfft,isinverse); + } else { + clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1); + check(in_copy,out,nfft,isinverse); + } + /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/ + + + free(in); + free(out); + clt_mdct_clear(&cfg); +} + +int main(int argc,char ** argv) +{ + ALLOC_STACK; + if (argc>1) { + int k; + for (k=1;k<argc;++k) { + test1d(atoi(argv[k]),0); + test1d(atoi(argv[k]),1); + } + }else{ + test1d(32,0); + test1d(32,1); + test1d(256,0); + test1d(256,1); + test1d(512,0); + test1d(512,1); + test1d(1024,0); + test1d(1024,1); + test1d(2048,0); + test1d(2048,1); +#ifndef RADIX_TWO_ONLY + test1d(36,0); + test1d(36,1); + test1d(40,0); + test1d(40,1); + test1d(60,0); + test1d(60,1); + test1d(120,0); + test1d(120,1); + test1d(240,0); + test1d(240,1); + test1d(480,0); + test1d(480,1); + test1d(960,0); + test1d(960,1); + test1d(1920,0); + test1d(1920,1); +#endif + } + return ret; +} diff --git a/drivers/opus/celt/tests/test_unit_rotation.c b/drivers/opus/celt/tests/test_unit_rotation.c new file mode 100644 index 0000000000..c12cc3f02f --- /dev/null +++ b/drivers/opus/celt/tests/test_unit_rotation.c @@ -0,0 +1,90 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#ifndef CUSTOM_MODES +#define CUSTOM_MODES +#endif + +#define CELT_C + +#include <stdio.h> +#include <stdlib.h> +#include "vq.c" +#include "cwrs.c" +#include "entcode.c" +#include "entenc.c" +#include "entdec.c" +#include "mathops.c" +#include "bands.h" +#include <math.h> +#define MAX_SIZE 100 + +int ret=0; +void test_rotation(int N, int K) +{ + int i; + double err = 0, ener = 0, snr, snr0; + opus_val16 x0[MAX_SIZE]; + opus_val16 x1[MAX_SIZE]; + for (i=0;i<N;i++) + x1[i] = x0[i] = rand()%32767-16384; + exp_rotation(x1, N, 1, 1, K, SPREAD_NORMAL); + for (i=0;i<N;i++) + { + err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]); + ener += x0[i]*(double)x0[i]; + } + snr0 = 20*log10(ener/err); + err = ener = 0; + exp_rotation(x1, N, -1, 1, K, SPREAD_NORMAL); + for (i=0;i<N;i++) + { + err += (x0[i]-(double)x1[i])*(x0[i]-(double)x1[i]); + ener += x0[i]*(double)x0[i]; + } + snr = 20*log10(ener/err); + printf ("SNR for size %d (%d pulses) is %f (was %f without inverse)\n", N, K, snr, snr0); + if (snr < 60 || snr0 > 20) + { + fprintf(stderr, "FAIL!\n"); + ret = 1; + } +} + +int main(void) +{ + ALLOC_STACK; + test_rotation(15, 3); + test_rotation(23, 5); + test_rotation(50, 3); + test_rotation(80, 1); + return ret; +} diff --git a/drivers/opus/celt/tests/test_unit_types.c b/drivers/opus/celt/tests/test_unit_types.c new file mode 100644 index 0000000000..29e671067f --- /dev/null +++ b/drivers/opus/celt/tests/test_unit_types.c @@ -0,0 +1,50 @@ +/* Copyright (c) 2008-2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "opus_types.h" +#include <stdio.h> + +int main(void) +{ + opus_int16 i = 1; + i <<= 14; + if (i>>14 != 1) + { + fprintf(stderr, "opus_int16 isn't 16 bits\n"); + return 1; + } + if (sizeof(opus_int16)*2 != sizeof(opus_int32)) + { + fprintf(stderr, "16*2 != 32\n"); + return 1; + } + return 0; +} diff --git a/drivers/opus/celt/vq.c b/drivers/opus/celt/vq.c new file mode 100644 index 0000000000..20b0b82728 --- /dev/null +++ b/drivers/opus/celt/vq.c @@ -0,0 +1,415 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "mathops.h" +#include "cwrs.h" +#include "vq.h" +#include "arch.h" +#include "os_support.h" +#include "bands.h" +#include "rate.h" + +static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) +{ + int i; + celt_norm *Xptr; + Xptr = X; + for (i=0;i<len-stride;i++) + { + celt_norm x1, x2; + x1 = Xptr[0]; + x2 = Xptr[stride]; + Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); + *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); + } + Xptr = &X[len-2*stride-1]; + for (i=len-2*stride-1;i>=0;i--) + { + celt_norm x1, x2; + x1 = Xptr[0]; + x2 = Xptr[stride]; + Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); + *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); + } +} + +static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) +{ + static const int SPREAD_FACTOR[3]={15,10,5}; + int i; + opus_val16 c, s; + opus_val16 gain, theta; + int stride2=0; + int factor; + + if (2*K>=len || spread==SPREAD_NONE) + return; + factor = SPREAD_FACTOR[spread-1]; + + gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); + theta = HALF16(MULT16_16_Q15(gain,gain)); + + c = celt_cos_norm(EXTEND32(theta)); + s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ + + if (len>=8*stride) + { + stride2 = 1; + /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. + It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ + while ((stride2*stride2+stride2)*stride + (stride>>2) < len) + stride2++; + } + /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for + extract_collapse_mask().*/ + len /= stride; + for (i=0;i<stride;i++) + { + if (dir < 0) + { + if (stride2) + exp_rotation1(X+i*len, len, stride2, s, c); + exp_rotation1(X+i*len, len, 1, c, s); + } else { + exp_rotation1(X+i*len, len, 1, c, -s); + if (stride2) + exp_rotation1(X+i*len, len, stride2, s, -c); + } + } +} + +/** Takes the pitch vector and the decoded residual vector, computes the gain + that will give ||p+g*y||=1 and mixes the residual with the pitch. */ +static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, + int N, opus_val32 Ryy, opus_val16 gain) +{ + int i; +#ifdef OPUS_FIXED_POINT + int k; +#endif + opus_val32 t; + opus_val16 g; + +#ifdef OPUS_FIXED_POINT + k = celt_ilog2(Ryy)>>1; +#endif + t = VSHR32(Ryy, 2*(k-7)); + g = MULT16_16_P15(celt_rsqrt_norm(t),gain); + + i=0; + do + X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1)); + while (++i < N); +} + +static unsigned extract_collapse_mask(int *iy, int N, int B) +{ + unsigned collapse_mask; + int N0; + int i; + if (B<=1) + return 1; + /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for + exp_rotation().*/ + N0 = N/B; + collapse_mask = 0; + i=0; do { + int j; + j=0; do { + collapse_mask |= (iy[i*N0+j]!=0)<<i; + } while (++j<N0); + } while (++i<B); + return collapse_mask; +} + +unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc +#ifdef RESYNTH + , opus_val16 gain +#endif + ) +{ + VARDECL(celt_norm, y); + VARDECL(int, iy); + VARDECL(opus_val16, signx); + int i, j; + opus_val16 s; + int pulsesLeft; + opus_val32 sum; + opus_val32 xy; + opus_val16 yy; + unsigned collapse_mask; + SAVE_STACK; + + celt_assert2(K>0, "alg_quant() needs at least one pulse"); + celt_assert2(N>1, "alg_quant() needs at least two dimensions"); + + ALLOC(y, N, celt_norm); + ALLOC(iy, N, int); + ALLOC(signx, N, opus_val16); + + exp_rotation(X, N, 1, B, K, spread); + + /* Get rid of the sign */ + sum = 0; + j=0; do { + if (X[j]>0) + signx[j]=1; + else { + signx[j]=-1; + X[j]=-X[j]; + } + iy[j] = 0; + y[j] = 0; + } while (++j<N); + + xy = yy = 0; + + pulsesLeft = K; + + /* Do a pre-search by projecting on the pyramid */ + if (K > (N>>1)) + { + opus_val16 rcp; + j=0; do { + sum += X[j]; + } while (++j<N); + + /* If X is too small, just replace it with a pulse at 0 */ +#ifdef OPUS_FIXED_POINT + if (sum <= K) +#else + /* Prevents infinities and NaNs from causing too many pulses + to be allocated. 64 is an approximation of infinity here. */ + if (!(sum > EPSILON && sum < 64)) +#endif + { + X[0] = QCONST16(1.f,14); + j=1; do + X[j]=0; + while (++j<N); + sum = QCONST16(1.f,14); + } + rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); + j=0; do { +#ifdef OPUS_FIXED_POINT + /* It's really important to round *towards zero* here */ + iy[j] = MULT16_16_Q15(X[j],rcp); +#else + iy[j] = (int)floor(rcp*X[j]); +#endif + y[j] = (celt_norm)iy[j]; + yy = MAC16_16(yy, y[j],y[j]); + xy = MAC16_16(xy, X[j],y[j]); + y[j] *= 2; + pulsesLeft -= iy[j]; + } while (++j<N); + } + celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); + + /* This should never happen, but just in case it does (e.g. on silence) + we fill the first bin with pulses. */ +#ifdef OPUS_FIXED_POINT_DEBUG + celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); +#endif + if (pulsesLeft > N+3) + { + opus_val16 tmp = (opus_val16)pulsesLeft; + yy = MAC16_16(yy, tmp, tmp); + yy = MAC16_16(yy, tmp, y[0]); + iy[0] += pulsesLeft; + pulsesLeft=0; + } + + s = 1; + for (i=0;i<pulsesLeft;i++) + { + int best_id; + opus_val32 best_num = -VERY_LARGE16; + opus_val16 best_den = 0; +#ifdef OPUS_FIXED_POINT + int rshift; +#endif +#ifdef OPUS_FIXED_POINT + rshift = 1+celt_ilog2(K-pulsesLeft+i+1); +#endif + best_id = 0; + /* The squared magnitude term gets added anyway, so we might as well + add it outside the loop */ + yy = ADD32(yy, 1); + j=0; + do { + opus_val16 Rxy, Ryy; + /* Temporary sums of the new pulse(s) */ + Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); + /* We're multiplying y[j] by two so we don't have to do it here */ + Ryy = ADD16(yy, y[j]); + + /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that + Rxy is positive because the sign is pre-computed) */ + Rxy = MULT16_16_Q15(Rxy,Rxy); + /* The idea is to check for num/den >= best_num/best_den, but that way + we can do it without any division */ + /* OPT: Make sure to use conditional moves here */ + if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) + { + best_den = Ryy; + best_num = Rxy; + best_id = j; + } + } while (++j<N); + + /* Updating the sums of the new pulse(s) */ + xy = ADD32(xy, EXTEND32(X[best_id])); + /* We're multiplying y[j] by two so we don't have to do it here */ + yy = ADD16(yy, y[best_id]); + + /* Only now that we've made the final choice, update y/iy */ + /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ + y[best_id] += 2*s; + iy[best_id]++; + } + + /* Put the original sign back */ + j=0; + do { + X[j] = MULT16_16(signx[j],X[j]); + if (signx[j] < 0) + iy[j] = -iy[j]; + } while (++j<N); + encode_pulses(iy, N, K, enc); + +#ifdef RESYNTH + normalise_residual(iy, X, N, yy, gain); + exp_rotation(X, N, -1, B, K, spread); +#endif + + collapse_mask = extract_collapse_mask(iy, N, B); + RESTORE_STACK; + return collapse_mask; +} + +/** Decode pulse vector and combine the result with the pitch vector to produce + the final normalised signal in the current band. */ +unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, + ec_dec *dec, opus_val16 gain) +{ + int i; + opus_val32 Ryy; + unsigned collapse_mask; + VARDECL(int, iy); + SAVE_STACK; + + celt_assert2(K>0, "alg_unquant() needs at least one pulse"); + celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); + ALLOC(iy, N, int); + decode_pulses(iy, N, K, dec); + Ryy = 0; + i=0; + do { + Ryy = MAC16_16(Ryy, iy[i], iy[i]); + } while (++i < N); + normalise_residual(iy, X, N, Ryy, gain); + exp_rotation(X, N, -1, B, K, spread); + collapse_mask = extract_collapse_mask(iy, N, B); + RESTORE_STACK; + return collapse_mask; +} + +void renormalise_vector(celt_norm *X, int N, opus_val16 gain) +{ + int i; +#ifdef OPUS_FIXED_POINT + int k; +#endif + opus_val32 E = EPSILON; + opus_val16 g; + opus_val32 t; + celt_norm *xptr = X; + for (i=0;i<N;i++) + { + E = MAC16_16(E, *xptr, *xptr); + xptr++; + } +#ifdef OPUS_FIXED_POINT + k = celt_ilog2(E)>>1; +#endif + t = VSHR32(E, 2*(k-7)); + g = MULT16_16_P15(celt_rsqrt_norm(t),gain); + + xptr = X; + for (i=0;i<N;i++) + { + *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); + xptr++; + } + /*return celt_sqrt(E);*/ +} + +int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) +{ + int i; + int itheta; + opus_val16 mid, side; + opus_val32 Emid, Eside; + + Emid = Eside = EPSILON; + if (stereo) + { + for (i=0;i<N;i++) + { + celt_norm m, s; + m = ADD16(SHR16(X[i],1),SHR16(Y[i],1)); + s = SUB16(SHR16(X[i],1),SHR16(Y[i],1)); + Emid = MAC16_16(Emid, m, m); + Eside = MAC16_16(Eside, s, s); + } + } else { + for (i=0;i<N;i++) + { + celt_norm m, s; + m = X[i]; + s = Y[i]; + Emid = MAC16_16(Emid, m, m); + Eside = MAC16_16(Eside, s, s); + } + } + mid = celt_sqrt(Emid); + side = celt_sqrt(Eside); +#ifdef OPUS_FIXED_POINT + /* 0.63662 = 2/pi */ + itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); +#else + itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); +#endif + + return itheta; +} diff --git a/drivers/opus/celt/vq.h b/drivers/opus/celt/vq.h new file mode 100644 index 0000000000..8bab59c5e0 --- /dev/null +++ b/drivers/opus/celt/vq.h @@ -0,0 +1,70 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/** + @file vq.h + @brief Vector quantisation of the residual + */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef VQ_H +#define VQ_H + +#include "entenc.h" +#include "entdec.h" +#include "opus_modes.h" + +/** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of + * the pitch and a combination of pulses such that its norm is still equal + * to 1. This is the function that will typically require the most CPU. + * @param X Residual signal to quantise/encode (returns quantised version) + * @param N Number of samples to encode + * @param K Number of pulses to use + * @param enc Entropy encoder state + * @ret A mask indicating which blocks in the band received pulses +*/ +unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, + ec_enc *enc +#ifdef RESYNTH + , opus_val16 gain +#endif + ); + +/** Algebraic pulse decoder + * @param X Decoded normalised spectrum (returned) + * @param N Number of samples to decode + * @param K Number of pulses to use + * @param dec Entropy decoder state + * @ret A mask indicating which blocks in the band received pulses + */ +unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, + ec_dec *dec, opus_val16 gain); + +void renormalise_vector(celt_norm *X, int N, opus_val16 gain); + +int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N); + +#endif /* VQ_H */ diff --git a/drivers/opus/celt/x86/pitch_sse.h b/drivers/opus/celt/x86/pitch_sse.h new file mode 100644 index 0000000000..695122a5ad --- /dev/null +++ b/drivers/opus/celt/x86/pitch_sse.h @@ -0,0 +1,156 @@ +/* Copyright (c) 2013 Jean-Marc Valin and John Ridges */ +/** + @file pitch_sse.h + @brief Pitch analysis + */ + +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef PITCH_SSE_H +#define PITCH_SSE_H + +#include <xmmintrin.h> +#include "arch.h" + +#define OVERRIDE_XCORR_KERNEL +static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len) +{ + int j; + __m128 xsum1, xsum2; + xsum1 = _mm_loadu_ps(sum); + xsum2 = _mm_setzero_ps(); + + for (j = 0; j < len-3; j += 4) + { + __m128 x0 = _mm_loadu_ps(x+j); + __m128 yj = _mm_loadu_ps(y+j); + __m128 y3 = _mm_loadu_ps(y+j+3); + + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x00),yj)); + xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0x55), + _mm_shuffle_ps(yj,y3,0x49))); + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xaa), + _mm_shuffle_ps(yj,y3,0x9e))); + xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_shuffle_ps(x0,x0,0xff),y3)); + } + if (j < len) + { + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); + if (++j < len) + { + xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); + if (++j < len) + { + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); + } + } + } + _mm_storeu_ps(sum,_mm_add_ps(xsum1,xsum2)); +} + +#define OVERRIDE_DUAL_INNER_PROD +static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, + int N, opus_val32 *xy1, opus_val32 *xy2) +{ + int i; + __m128 xsum1, xsum2; + xsum1 = _mm_setzero_ps(); + xsum2 = _mm_setzero_ps(); + for (i=0;i<N-3;i+=4) + { + __m128 xi = _mm_loadu_ps(x+i); + __m128 y1i = _mm_loadu_ps(y01+i); + __m128 y2i = _mm_loadu_ps(y02+i); + xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(xi, y1i)); + xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(xi, y2i)); + } + /* Horizontal sum */ + xsum1 = _mm_add_ps(xsum1, _mm_movehl_ps(xsum1, xsum1)); + xsum1 = _mm_add_ss(xsum1, _mm_shuffle_ps(xsum1, xsum1, 0x55)); + _mm_store_ss(xy1, xsum1); + xsum2 = _mm_add_ps(xsum2, _mm_movehl_ps(xsum2, xsum2)); + xsum2 = _mm_add_ss(xsum2, _mm_shuffle_ps(xsum2, xsum2, 0x55)); + _mm_store_ss(xy2, xsum2); + for (;i<N;i++) + { + *xy1 = MAC16_16(*xy1, x[i], y01[i]); + *xy2 = MAC16_16(*xy2, x[i], y02[i]); + } +} + +#define OVERRIDE_COMB_FILTER_CONST +static OPUS_INLINE void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N, + opus_val16 g10, opus_val16 g11, opus_val16 g12) +{ + int i; + __m128 x0v; + __m128 g10v, g11v, g12v; + g10v = _mm_load1_ps(&g10); + g11v = _mm_load1_ps(&g11); + g12v = _mm_load1_ps(&g12); + x0v = _mm_loadu_ps(&x[-T-2]); + for (i=0;i<N-3;i+=4) + { + __m128 yi, yi2, x1v, x2v, x3v, x4v; + const opus_val32 *xp = &x[i-T-2]; + yi = _mm_loadu_ps(x+i); + x4v = _mm_loadu_ps(xp+4); +#if 0 + /* Slower version with all loads */ + x1v = _mm_loadu_ps(xp+1); + x2v = _mm_loadu_ps(xp+2); + x3v = _mm_loadu_ps(xp+3); +#else + x2v = _mm_shuffle_ps(x0v, x4v, 0x4e); + x1v = _mm_shuffle_ps(x0v, x2v, 0x99); + x3v = _mm_shuffle_ps(x2v, x4v, 0x99); +#endif + + yi = _mm_add_ps(yi, _mm_mul_ps(g10v,x2v)); +#if 0 /* Set to 1 to make it bit-exact with the non-SSE version */ + yi = _mm_add_ps(yi, _mm_mul_ps(g11v,_mm_add_ps(x3v,x1v))); + yi = _mm_add_ps(yi, _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v))); +#else + /* Use partial sums */ + yi2 = _mm_add_ps(_mm_mul_ps(g11v,_mm_add_ps(x3v,x1v)), + _mm_mul_ps(g12v,_mm_add_ps(x4v,x0v))); + yi = _mm_add_ps(yi, yi2); +#endif + x0v=x4v; + _mm_storeu_ps(y+i, yi); + } +#ifdef CUSTOM_MODES + for (;i<N;i++) + { + y[i] = x[i] + + MULT16_32_Q15(g10,x[i-T]) + + MULT16_32_Q15(g11,ADD32(x[i-T+1],x[i-T-1])) + + MULT16_32_Q15(g12,ADD32(x[i-T+2],x[i-T-2])); + } +#endif +} + +#endif diff --git a/drivers/opus/http.c b/drivers/opus/http.c new file mode 100644 index 0000000000..803db044af --- /dev/null +++ b/drivers/opus/http.c @@ -0,0 +1,3391 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ********************************************************************/ +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "internal.h" +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <string.h> + +/*RFCs referenced in this file: + RFC 761: DOD Standard Transmission Control Protocol + RFC 1535: A Security Problem and Proposed Correction With Widely Deployed DNS + Software + RFC 1738: Uniform Resource Locators (URL) + RFC 1945: Hypertext Transfer Protocol -- HTTP/1.0 + RFC 2068: Hypertext Transfer Protocol -- HTTP/1.1 + RFC 2145: Use and Interpretation of HTTP Version Numbers + RFC 2246: The TLS Protocol Version 1.0 + RFC 2459: Internet X.509 Public Key Infrastructure Certificate and + Certificate Revocation List (CRL) Profile + RFC 2616: Hypertext Transfer Protocol -- HTTP/1.1 + RFC 2617: HTTP Authentication: Basic and Digest Access Authentication + RFC 2817: Upgrading to TLS Within HTTP/1.1 + RFC 2818: HTTP Over TLS + RFC 3492: Punycode: A Bootstring encoding of Unicode for Internationalized + Domain Names in Applications (IDNA) + RFC 3986: Uniform Resource Identifier (URI): Generic Syntax + RFC 3987: Internationalized Resource Identifiers (IRIs) + RFC 4343: Domain Name System (DNS) Case Insensitivity Clarification + RFC 5894: Internationalized Domain Names for Applications (IDNA): + Background, Explanation, and Rationale + RFC 6066: Transport Layer Security (TLS) Extensions: Extension Definitions + RFC 6125: Representation and Verification of Domain-Based Application Service + Identity within Internet Public Key Infrastructure Using X.509 (PKIX) + Certificates in the Context of Transport Layer Security (TLS) + RFC 6555: Happy Eyeballs: Success with Dual-Stack Hosts*/ + +typedef struct OpusParsedURL OpusParsedURL; +typedef struct OpusStringBuf OpusStringBuf; +typedef struct OpusHTTPConn OpusHTTPConn; +typedef struct OpusHTTPStream OpusHTTPStream; + +static char *op_string_range_dup(const char *_start,const char *_end){ + size_t len; + char *ret; + OP_ASSERT(_start<=_end); + len=_end-_start; + /*This is to help avoid overflow elsewhere, later.*/ + if(OP_UNLIKELY(len>=INT_MAX))return NULL; + ret=(char *)_ogg_malloc(sizeof(*ret)*(len+1)); + if(OP_LIKELY(ret!=NULL)){ + ret=(char *)memcpy(ret,_start,sizeof(*ret)*(len)); + ret[len]='\0'; + } + return ret; +} + +static char *op_string_dup(const char *_s){ + return op_string_range_dup(_s,_s+strlen(_s)); +} + +static char *op_string_tolower(char *_s){ + int i; + for(i=0;_s[i]!='\0';i++){ + int c; + c=_s[i]; + if(c>='A'&&c<='Z')c+='a'-'A'; + _s[i]=(char)c; + } + return _s; +} + +/*URI character classes (from RFC 3986).*/ +#define OP_URL_ALPHA \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +#define OP_URL_DIGIT "0123456789" +#define OP_URL_HEXDIGIT "0123456789ABCDEFabcdef" +/*Not a character class, but the characters allowed in <scheme>.*/ +#define OP_URL_SCHEME OP_URL_ALPHA OP_URL_DIGIT "+-." +#define OP_URL_GEN_DELIMS "#/:?@[]" +#define OP_URL_SUB_DELIMS "!$&'()*+,;=" +#define OP_URL_RESERVED OP_URL_GEN_DELIMS OP_URL_SUB_DELIMS +#define OP_URL_UNRESERVED OP_URL_ALPHA OP_URL_DIGIT "-._~" +/*Not a character class, but the characters allowed in <pct-encoded>.*/ +#define OP_URL_PCT_ENCODED "%" +/*Not a character class or production rule, but for convenience.*/ +#define OP_URL_PCHAR_BASE \ + OP_URL_UNRESERVED OP_URL_PCT_ENCODED OP_URL_SUB_DELIMS +#define OP_URL_PCHAR OP_URL_PCHAR_BASE ":@" +/*Not a character class, but the characters allowed in <userinfo> and + <IP-literal>.*/ +#define OP_URL_PCHAR_NA OP_URL_PCHAR_BASE ":" +/*Not a character class, but the characters allowed in <segment-nz-nc>.*/ +#define OP_URL_PCHAR_NC OP_URL_PCHAR_BASE "@" +/*Not a character clsss, but the characters allowed in <path>.*/ +#define OP_URL_PATH OP_URL_PCHAR "/" +/*Not a character class, but the characters allowed in <query> / <fragment>.*/ +#define OP_URL_QUERY_FRAG OP_URL_PCHAR "/?" + +/*Check the <% HEXDIG HEXDIG> escapes of a URL for validity. + Return: 0 if valid, or a negative value on failure.*/ +static int op_validate_url_escapes(const char *_s){ + int i; + for(i=0;_s[i];i++){ + if(_s[i]=='%'){ + if(OP_UNLIKELY(!isxdigit(_s[i+1])) + ||OP_UNLIKELY(!isxdigit(_s[i+2])) + /*RFC 3986 says %00 "should be rejected if the application is not + expecting to receive raw data within a component."*/ + ||OP_UNLIKELY(_s[i+1]=='0'&&_s[i+2]=='0')){ + return OP_FALSE; + } + i+=2; + } + } + return 0; +} + +/*Convert a hex digit to its actual value. + _c: The hex digit to convert. + Presumed to be valid ('0'...'9', 'A'...'F', or 'a'...'f'). + Return: The value of the digit, in the range [0,15].*/ +static int op_hex_value(int _c){ + return _c>='a'?_c-'a'+10:_c>='A'?_c-'A'+10:_c-'0'; +} + +/*Unescape all the <% HEXDIG HEXDIG> sequences in a string in-place. + This does no validity checking.*/ +static char *op_unescape_url_component(char *_s){ + int i; + int j; + for(i=j=0;_s[i];i++,j++){ + if(_s[i]=='%'){ + _s[i]=(char)(op_hex_value(_s[i+1])<<4|op_hex_value(_s[i+2])); + i+=2; + } + } + return _s; +} + +/*Parse a file: URL. + This code is not meant to be fast: strspn() with large sets is likely to be + slow, but it is very convenient. + It is meant to be RFC 1738-compliant (as updated by RFC 3986).*/ +static const char *op_parse_file_url(const char *_src){ + const char *scheme_end; + const char *path; + const char *path_end; + scheme_end=_src+strspn(_src,OP_URL_SCHEME); + if(OP_UNLIKELY(*scheme_end!=':') + ||scheme_end-_src!=4||op_strncasecmp(_src,"file",4)!=0){ + /*Unsupported protocol.*/ + return NULL; + } + /*Make sure all escape sequences are valid to simplify unescaping later.*/ + if(OP_UNLIKELY(op_validate_url_escapes(scheme_end+1)<0))return NULL; + if(scheme_end[1]=='/'&&scheme_end[2]=='/'){ + const char *host; + /*file: URLs can have a host! + Yeah, I was surprised, too, but that's what RFC 1738 says. + It also says, "The file URL scheme is unusual in that it does not specify + an Internet protocol or access method for such files; as such, its + utility in network protocols between hosts is limited," which is a mild + understatement.*/ + host=scheme_end+3; + /*The empty host is what we expect.*/ + if(OP_LIKELY(*host=='/'))path=host; + else{ + const char *host_end; + char host_buf[28]; + /*RFC 1738 says localhost "is interpreted as `the machine from which the + URL is being interpreted,'" so let's check for it.*/ + host_end=host+strspn(host,OP_URL_PCHAR_BASE); + /*No <port> allowed. + This also rejects IP-Literals.*/ + if(*host_end!='/')return NULL; + /*An escaped "localhost" can take at most 27 characters.*/ + if(OP_UNLIKELY(host_end-host>27))return NULL; + memcpy(host_buf,host,sizeof(*host_buf)*(host_end-host)); + host_buf[host_end-host]='\0'; + op_unescape_url_component(host_buf); + op_string_tolower(host_buf); + /*Some other host: give up.*/ + if(OP_UNLIKELY(strcmp(host_buf,"localhost")!=0))return NULL; + path=host_end; + } + } + else path=scheme_end+1; + path_end=path+strspn(path,OP_URL_PATH); + /*This will reject a <query> or <fragment> component, too. + I don't know what to do with queries, but a temporal fragment would at + least make sense. + RFC 1738 pretty clearly defines a <searchpart> that's equivalent to the + RFC 3986 <query> component for other schemes, but not the file: scheme, + so I'm going to just reject it.*/ + if(*path_end!='\0')return NULL; + return path; +} + +#if defined(OP_ENABLE_HTTP) +# if defined(_WIN32) +# include <winsock2.h> +# include <ws2tcpip.h> +# include <openssl/ssl.h> +# include "winerrno.h" + +typedef SOCKET op_sock; + +# define OP_INVALID_SOCKET (INVALID_SOCKET) + +/*Vista and later support WSAPoll(), but we don't want to rely on that. + Instead we re-implement it badly using select(). + Unfortunately, they define a conflicting struct pollfd, so we only define our + own if it looks like that one has not already been defined.*/ +# if !defined(POLLIN) +/*Equivalent to POLLIN.*/ +# define POLLRDNORM (0x0100) +/*Priority band data can be read.*/ +# define POLLRDBAND (0x0200) +/*There is data to read.*/ +# define POLLIN (POLLRDNORM|POLLRDBAND) +/* There is urgent data to read.*/ +# define POLLPRI (0x0400) +/*Equivalent to POLLOUT.*/ +# define POLLWRNORM (0x0010) +/*Writing now will not block.*/ +# define POLLOUT (POLLWRNORM) +/*Priority data may be written.*/ +# define POLLWRBAND (0x0020) +/*Error condition (output only).*/ +# define POLLERR (0x0001) +/*Hang up (output only).*/ +# define POLLHUP (0x0002) +/*Invalid request: fd not open (output only).*/ +# define POLLNVAL (0x0004) + +struct pollfd{ + /*File descriptor.*/ + op_sock fd; + /*Requested events.*/ + short events; + /*Returned events.*/ + short revents; +}; +# endif + +/*But Winsock never defines nfds_t (it's simply hard-coded to ULONG).*/ +typedef unsigned long nfds_t; + +/*The usage of FD_SET() below is O(N^2). + This is okay because select() is limited to 64 sockets in Winsock, anyway. + In practice, we only ever call it with one or two sockets.*/ +static int op_poll_win32(struct pollfd *_fds,nfds_t _nfds,int _timeout){ + struct timeval tv; + fd_set ifds; + fd_set ofds; + fd_set efds; + nfds_t i; + int ret; + FD_ZERO(&ifds); + FD_ZERO(&ofds); + FD_ZERO(&efds); + for(i=0;i<_nfds;i++){ + _fds[i].revents=0; + if(_fds[i].events&POLLIN)FD_SET(_fds[i].fd,&ifds); + if(_fds[i].events&POLLOUT)FD_SET(_fds[i].fd,&ofds); + FD_SET(_fds[i].fd,&efds); + } + if(_timeout>=0){ + tv.tv_sec=_timeout/1000; + tv.tv_usec=(_timeout%1000)*1000; + } + ret=select(-1,&ifds,&ofds,&efds,_timeout<0?NULL:&tv); + if(ret>0){ + for(i=0;i<_nfds;i++){ + if(FD_ISSET(_fds[i].fd,&ifds))_fds[i].revents|=POLLIN; + if(FD_ISSET(_fds[i].fd,&ofds))_fds[i].revents|=POLLOUT; + /*This isn't correct: there are several different things that might have + happened to a fd in efds, but I don't know a good way to distinguish + them without more context from the caller. + It's okay, because we don't actually check any of these bits, we just + need _some_ bit set.*/ + if(FD_ISSET(_fds[i].fd,&efds))_fds[i].revents|=POLLHUP; + } + } + return ret; +} + +/*We define op_errno() to make it clear that it's not an l-value like normal + errno is.*/ +# define op_errno() (WSAGetLastError()?WSAGetLastError()-WSABASEERR:0) +# define op_reset_errno() (WSASetLastError(0)) + +/*The remaining functions don't get an op_ prefix even though they only + operate on sockets, because we don't use non-socket I/O here, and this + minimizes the changes needed to deal with Winsock.*/ +# define close(_fd) closesocket(_fd) +/*This relies on sizeof(u_long)==sizeof(int), which is always true on both + Win32 and Win64.*/ +# define ioctl(_fd,_req,_arg) ioctlsocket(_fd,_req,(u_long *)(_arg)) +# define getsockopt(_fd,_level,_name,_val,_len) \ + getsockopt(_fd,_level,_name,(char *)(_val),_len) +# define setsockopt(_fd,_level,_name,_val,_len) \ + setsockopt(_fd,_level,_name,(const char *)(_val),_len) +# define poll(_fds,_nfds,_timeout) op_poll_win32(_fds,_nfds,_timeout) + +# if defined(_MSC_VER) +typedef ptrdiff_t ssize_t; +# endif + +/*Load certificates from the built-in certificate store.*/ +int SSL_CTX_set_default_verify_paths_win32(SSL_CTX *_ssl_ctx); +# define SSL_CTX_set_default_verify_paths \ + SSL_CTX_set_default_verify_paths_win32 + +# else +/*Normal Berkeley sockets.*/ +# include <sys/ioctl.h> +# include <sys/types.h> +# include <sys/socket.h> +# include <arpa/inet.h> +# include <netinet/in.h> +# include <netinet/tcp.h> +# include <fcntl.h> +# include <netdb.h> +# include <poll.h> +# include <unistd.h> +# include <openssl/ssl.h> + +typedef int op_sock; + +# define OP_INVALID_SOCKET (-1) + +# define op_errno() (errno) +# define op_reset_errno() (errno=0) + +# endif +# include <sys/timeb.h> +# include <openssl/x509v3.h> + +/*The maximum number of simultaneous connections. + RFC 2616 says this SHOULD NOT be more than 2, but everyone on the modern web + ignores that (e.g., IE 8 bumped theirs up from 2 to 6, Firefox uses 15). + If it makes you feel better, we'll only ever actively read from one of these + at a time. + The others are kept around mainly to avoid slow-starting a new connection + when seeking, and time out rapidly.*/ +# define OP_NCONNS_MAX (4) + +/*The amount of time before we attempt to re-resolve the host. + This is 10 minutes, as recommended in RFC 6555 for expiring cached connection + results for dual-stack hosts.*/ +# define OP_RESOLVE_CACHE_TIMEOUT_MS (10*60*(opus_int32)1000) + +/*The number of redirections at which we give up. + The value here is the current default in Firefox. + RFC 2068 mandated a maximum of 5, but RFC 2616 relaxed that to "a client + SHOULD detect infinite redirection loops." + Fortunately, 20 is less than infinity.*/ +# define OP_REDIRECT_LIMIT (20) + +/*The initial size of the buffer used to read a response message (before the + body).*/ +# define OP_RESPONSE_SIZE_MIN (510) +/*The maximum size of a response message (before the body). + Responses larger than this will be discarded. + I've seen a real server return 20 kB of data for a 302 Found response. + Increasing this beyond 32kB will cause problems on platforms with a 16-bit + int.*/ +# define OP_RESPONSE_SIZE_MAX (32766) + +/*The number of milliseconds we will allow a connection to sit idle before we + refuse to resurrect it. + Apache as of 2.2 has reduced its default timeout to 5 seconds (from 15), so + that's what we'll use here.*/ +# define OP_CONNECTION_IDLE_TIMEOUT_MS (5*1000) + +/*The number of milliseconds we will wait to send or receive data before giving + up.*/ +# define OP_POLL_TIMEOUT_MS (30*1000) + +/*We will always attempt to read ahead at least this much in preference to + opening a new connection.*/ +# define OP_READAHEAD_THRESH_MIN (32*(opus_int32)1024) + +/*The amount of data to request after a seek. + This is a trade-off between read throughput after a seek vs. the the ability + to quickly perform another seek with the same connection.*/ +# define OP_PIPELINE_CHUNK_SIZE (32*(opus_int32)1024) +/*Subsequent chunks are requested with larger and larger sizes until they pass + this threshold, after which we just ask for the rest of the resource.*/ +# define OP_PIPELINE_CHUNK_SIZE_MAX (1024*(opus_int32)1024) +/*This is the maximum number of requests we'll make with a single connection. + Many servers will simply disconnect after we attempt some number of requests, + possibly without sending a Connection: close header, meaning we won't + discover it until we try to read beyond the end of the current chunk. + We can reconnect when that happens, but this is slow. + Instead, we impose a limit ourselves (set to the default for Apache + installations and thus likely the most common value in use).*/ +# define OP_PIPELINE_MAX_REQUESTS (100) +/*This should be the number of requests, starting from a chunk size of + OP_PIPELINE_CHUNK_SIZE and doubling each time, until we exceed + OP_PIPELINE_CHUNK_SIZE_MAX and just request the rest of the file. + We won't reuse a connection when seeking unless it has at least this many + requests left, to reduce the chances we'll have to open a new connection + while reading forward afterwards.*/ +# define OP_PIPELINE_MIN_REQUESTS (7) + +/*Is this an https URL? + For now we can simply check the last letter of the scheme.*/ +# define OP_URL_IS_SSL(_url) ((_url)->scheme[4]=='s') + +/*Does this URL use the default port for its scheme?*/ +# define OP_URL_IS_DEFAULT_PORT(_url) \ + (!OP_URL_IS_SSL(_url)&&(_url)->port==80 \ + ||OP_URL_IS_SSL(_url)&&(_url)->port==443) + +struct OpusParsedURL{ + /*Either "http" or "https".*/ + char *scheme; + /*The user name from the <userinfo> component, or NULL.*/ + char *user; + /*The password from the <userinfo> component, or NULL.*/ + char *pass; + /*The <host> component. + This may not be NULL.*/ + char *host; + /*The <path> and <query> components. + This may not be NULL.*/ + char *path; + /*The <port> component. + This is set to the default port if the URL did not contain one.*/ + unsigned port; +}; + +/*Parse a URL. + This code is not meant to be fast: strspn() with large sets is likely to be + slow, but it is very convenient. + It is meant to be RFC 3986-compliant. + We currently do not support IRIs (Internationalized Resource Identifiers, + RFC 3987). + Callers should translate them to URIs first.*/ +static int op_parse_url_impl(OpusParsedURL *_dst,const char *_src){ + const char *scheme_end; + const char *authority; + const char *userinfo_end; + const char *user; + const char *user_end; + const char *pass; + const char *hostport; + const char *hostport_end; + const char *host_end; + const char *port; + opus_int32 port_num; + const char *port_end; + const char *path; + const char *path_end; + const char *uri_end; + scheme_end=_src+strspn(_src,OP_URL_SCHEME); + if(OP_UNLIKELY(*scheme_end!=':') + ||OP_UNLIKELY(scheme_end-_src<4)||OP_UNLIKELY(scheme_end-_src>5) + ||OP_UNLIKELY(op_strncasecmp(_src,"https",scheme_end-_src)!=0)){ + /*Unsupported protocol.*/ + return OP_EIMPL; + } + if(OP_UNLIKELY(scheme_end[1]!='/')||OP_UNLIKELY(scheme_end[2]!='/')){ + /*We require an <authority> component.*/ + return OP_EINVAL; + } + authority=scheme_end+3; + /*Make sure all escape sequences are valid to simplify unescaping later.*/ + if(OP_UNLIKELY(op_validate_url_escapes(authority)<0))return OP_EINVAL; + /*Look for a <userinfo> component.*/ + userinfo_end=authority+strspn(authority,OP_URL_PCHAR_NA); + if(*userinfo_end=='@'){ + /*Found one.*/ + user=authority; + /*Look for a password (yes, clear-text passwords are deprecated, I know, + but what else are people supposed to use? use SSL if you care).*/ + user_end=authority+strspn(authority,OP_URL_PCHAR_BASE); + if(*user_end==':')pass=user_end+1; + else pass=NULL; + hostport=userinfo_end+1; + } + else{ + /*We shouldn't have to initialize user_end, but gcc is too dumb to figure + out that user!=NULL below means we didn't take this else branch.*/ + user=user_end=NULL; + pass=NULL; + hostport=authority; + } + /*Try to figure out where the <host> component ends.*/ + if(hostport[0]=='['){ + hostport++; + /*We have an <IP-literal>, which can contain colons.*/ + hostport_end=host_end=hostport+strspn(hostport,OP_URL_PCHAR_NA); + if(OP_UNLIKELY(*hostport_end++!=']'))return OP_EINVAL; + } + /*Currently we don't support IDNA (RFC 5894), because I don't want to deal + with the policy about which domains should not be internationalized to + avoid confusing similarities. + Give this API Punycode (RFC 3492) domain names instead.*/ + else hostport_end=host_end=hostport+strspn(hostport,OP_URL_PCHAR_BASE); + /*TODO: Validate host.*/ + /*Is there a port number?*/ + port_num=-1; + if(*hostport_end==':'){ + int i; + port=hostport_end+1; + port_end=port+strspn(port,OP_URL_DIGIT); + path=port_end; + /*Not part of RFC 3986, but require port numbers in the range 0...65535.*/ + if(OP_LIKELY(port_end-port>0)){ + while(*port=='0')port++; + if(OP_UNLIKELY(port_end-port>5))return OP_EINVAL; + port_num=0; + for(i=0;i<port_end-port;i++)port_num=port_num*10+port[i]-'0'; + if(OP_UNLIKELY(port_num>65535))return OP_EINVAL; + } + } + else path=hostport_end; + path_end=path+strspn(path,OP_URL_PATH); + /*If the path is not empty, it must begin with a '/'.*/ + if(OP_LIKELY(path_end>path)&&OP_UNLIKELY(path[0]!='/'))return OP_EINVAL; + /*Consume the <query> component, if any (right now we don't split this out + from the <path> component).*/ + if(*path_end=='?')path_end=path_end+strspn(path_end,OP_URL_QUERY_FRAG); + /*Discard the <fragment> component, if any. + This doesn't get sent to the server. + Some day we should add support for Media Fragment URIs + <http://www.w3.org/TR/media-frags/>.*/ + if(*path_end=='#')uri_end=path_end+1+strspn(path_end+1,OP_URL_QUERY_FRAG); + else uri_end=path_end; + /*If there's anything left, this was not a valid URL.*/ + if(OP_UNLIKELY(*uri_end!='\0'))return OP_EINVAL; + _dst->scheme=op_string_range_dup(_src,scheme_end); + if(OP_UNLIKELY(_dst->scheme==NULL))return OP_EFAULT; + op_string_tolower(_dst->scheme); + if(user!=NULL){ + _dst->user=op_string_range_dup(user,user_end); + if(OP_UNLIKELY(_dst->user==NULL))return OP_EFAULT; + op_unescape_url_component(_dst->user); + /*Unescaping might have created a ':' in the username. + That's not allowed by RFC 2617's Basic Authentication Scheme.*/ + if(OP_UNLIKELY(strchr(_dst->user,':')!=NULL))return OP_EINVAL; + } + else _dst->user=NULL; + if(pass!=NULL){ + _dst->pass=op_string_range_dup(pass,userinfo_end); + if(OP_UNLIKELY(_dst->pass==NULL))return OP_EFAULT; + op_unescape_url_component(_dst->pass); + } + else _dst->pass=NULL; + _dst->host=op_string_range_dup(hostport,host_end); + if(OP_UNLIKELY(_dst->host==NULL))return OP_EFAULT; + if(port_num<0){ + if(_src[4]=='s')port_num=443; + else port_num=80; + } + _dst->port=(unsigned)port_num; + /*RFC 2616 says an empty <abs-path> component is equivalent to "/", and we + MUST use the latter in the Request-URI. + Reserve space for the slash here.*/ + if(path==path_end||path[0]=='?')path--; + _dst->path=op_string_range_dup(path,path_end); + if(OP_UNLIKELY(_dst->path==NULL))return OP_EFAULT; + /*And force-set it here.*/ + _dst->path[0]='/'; + return 0; +} + +static void op_parsed_url_init(OpusParsedURL *_url){ + memset(_url,0,sizeof(*_url)); +} + +static void op_parsed_url_clear(OpusParsedURL *_url){ + _ogg_free(_url->scheme); + _ogg_free(_url->user); + _ogg_free(_url->pass); + _ogg_free(_url->host); + _ogg_free(_url->path); +} + +static int op_parse_url(OpusParsedURL *_dst,const char *_src){ + OpusParsedURL url; + int ret; + op_parsed_url_init(&url); + ret=op_parse_url_impl(&url,_src); + if(OP_UNLIKELY(ret<0))op_parsed_url_clear(&url); + else *_dst=*&url; + return ret; +} + +/*A buffer to hold growing strings. + The main purpose of this is to consolidate allocation checks and simplify + cleanup on a failed allocation.*/ +struct OpusStringBuf{ + char *buf; + int nbuf; + int cbuf; +}; + +static void op_sb_init(OpusStringBuf *_sb){ + _sb->buf=NULL; + _sb->nbuf=0; + _sb->cbuf=0; +} + +static void op_sb_clear(OpusStringBuf *_sb){ + _ogg_free(_sb->buf); +} + +/*Make sure we have room for at least _capacity characters (plus 1 more for the + terminating NUL).*/ +static int op_sb_ensure_capacity(OpusStringBuf *_sb,int _capacity){ + char *buf; + int cbuf; + buf=_sb->buf; + cbuf=_sb->cbuf; + if(_capacity>=cbuf-1){ + if(OP_UNLIKELY(cbuf>INT_MAX-1>>1))return OP_EFAULT; + if(OP_UNLIKELY(_capacity>=INT_MAX-1))return OP_EFAULT; + cbuf=OP_MAX(2*cbuf+1,_capacity+1); + buf=_ogg_realloc(buf,sizeof(*buf)*cbuf); + if(OP_UNLIKELY(buf==NULL))return OP_EFAULT; + _sb->buf=buf; + _sb->cbuf=cbuf; + } + return 0; +} + +/*Increase the capacity of the buffer, but not to more than _max_size + characters (plus 1 more for the terminating NUL).*/ +static int op_sb_grow(OpusStringBuf *_sb,int _max_size){ + char *buf; + int cbuf; + buf=_sb->buf; + cbuf=_sb->cbuf; + OP_ASSERT(_max_size<=INT_MAX-1); + cbuf=cbuf<=_max_size-1>>1?2*cbuf+1:_max_size+1; + buf=_ogg_realloc(buf,sizeof(*buf)*cbuf); + if(OP_UNLIKELY(buf==NULL))return OP_EFAULT; + _sb->buf=buf; + _sb->cbuf=cbuf; + return 0; +} + +static int op_sb_append(OpusStringBuf *_sb,const char *_s,int _len){ + char *buf; + int nbuf; + int ret; + nbuf=_sb->nbuf; + if(OP_UNLIKELY(nbuf>INT_MAX-_len))return OP_EFAULT; + ret=op_sb_ensure_capacity(_sb,nbuf+_len); + if(OP_UNLIKELY(ret<0))return ret; + buf=_sb->buf; + memcpy(buf+nbuf,_s,sizeof(*buf)*_len); + nbuf+=_len; + buf[nbuf]='\0'; + _sb->nbuf=nbuf; + return 0; +} + +static int op_sb_append_string(OpusStringBuf *_sb,const char *_s){ + return op_sb_append(_sb,_s,strlen(_s)); +} + +static int op_sb_append_port(OpusStringBuf *_sb,unsigned _port){ + char port_buf[7]; + OP_ASSERT(_port<=65535U); + sprintf(port_buf,":%u",_port); + return op_sb_append_string(_sb,port_buf); +} + +static int op_sb_append_nonnegative_int64(OpusStringBuf *_sb,opus_int64 _i){ + char digit; + int nbuf_start; + int ret; + OP_ASSERT(_i>=0); + nbuf_start=_sb->nbuf; + ret=0; + do{ + digit='0'+_i%10; + ret|=op_sb_append(_sb,&digit,1); + _i/=10; + } + while(_i>0); + if(OP_LIKELY(ret>=0)){ + char *buf; + int nbuf_end; + buf=_sb->buf; + nbuf_end=_sb->nbuf-1; + /*We've added the digits backwards. + Reverse them.*/ + while(nbuf_start<nbuf_end){ + digit=buf[nbuf_start]; + buf[nbuf_start]=buf[nbuf_end]; + buf[nbuf_end]=digit; + nbuf_start++; + nbuf_end--; + } + } + return ret; +} + +static struct addrinfo *op_resolve(const char *_host,unsigned _port){ + struct addrinfo *addrs; + struct addrinfo hints; + char service[6]; + memset(&hints,0,sizeof(hints)); + hints.ai_socktype=SOCK_STREAM; +#if !defined(_WIN32) + hints.ai_flags=AI_NUMERICSERV; +#endif + OP_ASSERT(_port<=65535U); + sprintf(service,"%u",_port); + if(OP_LIKELY(!getaddrinfo(_host,service,&hints,&addrs)))return addrs; + return NULL; +} + +static int op_sock_set_nonblocking(op_sock _fd,int _nonblocking){ +#if !defined(_WIN32) + int flags; + flags=fcntl(_fd,F_GETFL); + if(OP_UNLIKELY(flags<0))return flags; + if(_nonblocking)flags|=O_NONBLOCK; + else flags&=~O_NONBLOCK; + return fcntl(_fd,F_SETFL,flags); +#else + return ioctl(_fd,FIONBIO,&_nonblocking); +#endif +} + +/*Disable/enable write coalescing if we can. + We always send whole requests at once and always parse the response headers + before sending another one, so normally write coalescing just causes added + delay.*/ +static void op_sock_set_tcp_nodelay(op_sock _fd,int _nodelay){ +# if defined(TCP_NODELAY)&&(defined(IPPROTO_TCP)||defined(SOL_TCP)) +# if defined(IPPROTO_TCP) +# define OP_SO_LEVEL IPPROTO_TCP +# else +# define OP_SO_LEVEL SOL_TCP +# endif + /*It doesn't really matter if this call fails, but it would be interesting + to hit a case where it does.*/ + OP_ALWAYS_TRUE(!setsockopt(_fd,OP_SO_LEVEL,TCP_NODELAY, + &_nodelay,sizeof(_nodelay))); +# endif +} + +#if defined(_WIN32) +static void op_init_winsock(){ + static LONG count; + static WSADATA wsadata; + if(InterlockedIncrement(&count)==1)WSAStartup(0x0202,&wsadata); +} +#endif + +/*A single physical connection to an HTTP server. + We may have several of these open at once.*/ +struct OpusHTTPConn{ + /*The current position indicator for this connection.*/ + opus_int64 pos; + /*The position where the current request will end, or -1 if we're reading + until EOF (an unseekable stream or the initial HTTP/1.0 request).*/ + opus_int64 end_pos; + /*The position where next request we've sent will start, or -1 if we haven't + sent the next request yet.*/ + opus_int64 next_pos; + /*The end of the next request or -1 if we requested the rest of the resource. + This is only set to a meaningful value if next_pos is not -1.*/ + opus_int64 next_end; + /*The SSL connection, if this is https.*/ + SSL *ssl_conn; + /*The next connection in either the LRU or free list.*/ + OpusHTTPConn *next; + /*The last time we blocked for reading from this connection.*/ + struct timeb read_time; + /*The number of bytes we've read since the last time we blocked.*/ + opus_int64 read_bytes; + /*The estimated throughput of this connection, in bytes/s.*/ + opus_int64 read_rate; + /*The socket we're reading from.*/ + op_sock fd; + /*The number of remaining requests we are allowed on this connection.*/ + int nrequests_left; + /*The chunk size to use for pipelining requests.*/ + opus_int32 chunk_size; +}; + +static void op_http_conn_init(OpusHTTPConn *_conn){ + _conn->next_pos=-1; + _conn->ssl_conn=NULL; + _conn->next=NULL; + _conn->fd=OP_INVALID_SOCKET; +} + +static void op_http_conn_clear(OpusHTTPConn *_conn){ + if(_conn->ssl_conn!=NULL)SSL_free(_conn->ssl_conn); + /*SSL frees the BIO for us.*/ + if(_conn->fd!=OP_INVALID_SOCKET)close(_conn->fd); +} + +/*The global stream state.*/ +struct OpusHTTPStream{ + /*The list of connections.*/ + OpusHTTPConn conns[OP_NCONNS_MAX]; + /*The context object used as a framework for TLS/SSL functions.*/ + SSL_CTX *ssl_ctx; + /*The cached session to reuse for future connections.*/ + SSL_SESSION *ssl_session; + /*The LRU list (ordered from MRU to LRU) of currently connected + connections.*/ + OpusHTTPConn *lru_head; + /*The free list.*/ + OpusHTTPConn *free_head; + /*The URL to connect to.*/ + OpusParsedURL url; + /*Information about the address we connected to.*/ + struct addrinfo addr_info; + /*The address we connected to.*/ + union{ + struct sockaddr s; + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } addr; + /*The last time we re-resolved the host.*/ + struct timeb resolve_time; + /*A buffer used to build HTTP requests.*/ + OpusStringBuf request; + /*A buffer used to build proxy CONNECT requests.*/ + OpusStringBuf proxy_connect; + /*A buffer used to receive the response headers.*/ + OpusStringBuf response; + /*The Content-Length, if specified, or -1 otherwise. + This will always be specified for seekable streams.*/ + opus_int64 content_length; + /*The position indicator used when no connection is active.*/ + opus_int64 pos; + /*The host we actually connected to.*/ + char *connect_host; + /*The port we actually connected to.*/ + unsigned connect_port; + /*The connection we're currently reading from. + This can be -1 if no connection is active.*/ + int cur_conni; + /*Whether or not the server supports range requests.*/ + int seekable; + /*Whether or not the server supports HTTP/1.1 with persistent connections.*/ + int pipeline; + /*Whether or not we should skip certificate checks.*/ + int skip_certificate_check; + /*The offset of the tail of the request. + Only the offset in the Range: header appears after this, allowing us to + quickly edit the request to ask for a new range.*/ + int request_tail; + /*The estimated time required to open a new connection, in milliseconds.*/ + opus_int32 connect_rate; +}; + +static void op_http_stream_init(OpusHTTPStream *_stream){ + OpusHTTPConn **pnext; + int ci; + pnext=&_stream->free_head; + for(ci=0;ci<OP_NCONNS_MAX;ci++){ + op_http_conn_init(_stream->conns+ci); + *pnext=_stream->conns+ci; + pnext=&_stream->conns[ci].next; + } + _stream->ssl_ctx=NULL; + _stream->ssl_session=NULL; + _stream->lru_head=NULL; + op_parsed_url_init(&_stream->url); + op_sb_init(&_stream->request); + op_sb_init(&_stream->proxy_connect); + op_sb_init(&_stream->response); + _stream->connect_host=NULL; + _stream->seekable=0; +} + +/*Close the connection and move it to the free list. + _stream: The stream containing the free list. + _conn: The connection to close. + _penxt: The linked-list pointer currently pointing to this connection. + _gracefully: Whether or not to shut down cleanly.*/ +static void op_http_conn_close(OpusHTTPStream *_stream,OpusHTTPConn *_conn, + OpusHTTPConn **_pnext,int _gracefully){ + /*If we don't shut down gracefully, the server MUST NOT re-use our session + according to RFC 2246, because it can't tell the difference between an + abrupt close and a truncation attack. + So we shut down gracefully if we can. + However, we will not wait if this would block (it's not worth the savings + from session resumption to do so). + Clients (that's us) MAY resume a TLS session that ended with an incomplete + close, according to RFC 2818, so there's no reason to make sure the server + shut things down gracefully.*/ + if(_gracefully&&_conn->ssl_conn!=NULL)SSL_shutdown(_conn->ssl_conn); + op_http_conn_clear(_conn); + _conn->next_pos=-1; + _conn->ssl_conn=NULL; + _conn->fd=OP_INVALID_SOCKET; + OP_ASSERT(*_pnext==_conn); + *_pnext=_conn->next; + _conn->next=_stream->free_head; + _stream->free_head=_conn; +} + +static void op_http_stream_clear(OpusHTTPStream *_stream){ + while(_stream->lru_head!=NULL){ + op_http_conn_close(_stream,_stream->lru_head,&_stream->lru_head,0); + } + if(_stream->ssl_session!=NULL)SSL_SESSION_free(_stream->ssl_session); + if(_stream->ssl_ctx!=NULL)SSL_CTX_free(_stream->ssl_ctx); + op_sb_clear(&_stream->response); + op_sb_clear(&_stream->proxy_connect); + op_sb_clear(&_stream->request); + if(_stream->connect_host!=_stream->url.host)_ogg_free(_stream->connect_host); + op_parsed_url_clear(&_stream->url); +} + +static int op_http_conn_write_fully(OpusHTTPConn *_conn, + const char *_buf,int _buf_size){ + struct pollfd fd; + SSL *ssl_conn; + fd.fd=_conn->fd; + ssl_conn=_conn->ssl_conn; + while(_buf_size>0){ + int err; + if(ssl_conn!=NULL){ + int ret; + ret=SSL_write(ssl_conn,_buf,_buf_size); + if(ret>0){ + /*Wrote some data.*/ + _buf+=ret; + _buf_size-=ret; + continue; + } + /*Connection closed.*/ + else if(ret==0)return OP_FALSE; + err=SSL_get_error(ssl_conn,ret); + /*Yes, renegotiations can cause SSL_write() to block for reading.*/ + if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN; + else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT; + else return OP_FALSE; + } + else{ + ssize_t ret; + op_reset_errno(); + ret=send(fd.fd,_buf,_buf_size,0); + if(ret>0){ + _buf+=ret; + _buf_size-=ret; + continue; + } + err=op_errno(); + if(err!=EAGAIN&&err!=EWOULDBLOCK)return OP_FALSE; + fd.events=POLLOUT; + } + if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return OP_FALSE; + } + return 0; +} + +static int op_http_conn_estimate_available(OpusHTTPConn *_conn){ + int available; + int ret; + ret=ioctl(_conn->fd,FIONREAD,&available); + if(ret<0)available=0; + /*This requires the SSL read_ahead flag to be unset to work. + We ignore partial records as well as the protocol overhead for any pending + bytes. + This means we might return somewhat less than can truly be read without + blocking (if there's a partial record). + This is okay, because we're using this value to estimate network transfer + time, and we _have_ already received those bytes. + We also might return slightly more (due to protocol overhead), but that's + small enough that it probably doesn't matter.*/ + if(_conn->ssl_conn!=NULL)available+=SSL_pending(_conn->ssl_conn); + return available; +} + +static opus_int32 op_time_diff_ms(const struct timeb *_end, + const struct timeb *_start){ + opus_int64 dtime; + dtime=_end->time-(opus_int64)_start->time; + OP_ASSERT(_end->millitm<1000); + OP_ASSERT(_start->millitm<1000); + if(OP_UNLIKELY(dtime>(OP_INT32_MAX-1000)/1000))return OP_INT32_MAX; + if(OP_UNLIKELY(dtime<(OP_INT32_MIN+1000)/1000))return OP_INT32_MIN; + return (opus_int32)dtime*1000+_end->millitm-_start->millitm; +} + +/*Update the read rate estimate for this connection.*/ +static void op_http_conn_read_rate_update(OpusHTTPConn *_conn){ + struct timeb read_time; + opus_int32 read_delta_ms; + opus_int64 read_delta_bytes; + opus_int64 read_rate; + read_delta_bytes=_conn->read_bytes; + if(read_delta_bytes<=0)return; + ftime(&read_time); + read_delta_ms=op_time_diff_ms(&read_time,&_conn->read_time); + read_rate=_conn->read_rate; + read_delta_ms=OP_MAX(read_delta_ms,1); + read_rate+=read_delta_bytes*1000/read_delta_ms-read_rate+4>>3; + *&_conn->read_time=*&read_time; + _conn->read_bytes=0; + _conn->read_rate=read_rate; +} + +/*Tries to read from the given connection. + [out] _buf: Returns the data read. + _buf_size: The size of the buffer. + _blocking: Whether or not to block until some data is retrieved. + Return: A positive number of bytes read on success. + 0: The read would block, or the connection was closed. + OP_EREAD: There was a fatal read error.*/ +static int op_http_conn_read(OpusHTTPConn *_conn, + char *_buf,int _buf_size,int _blocking){ + struct pollfd fd; + SSL *ssl_conn; + int nread; + int nread_unblocked; + fd.fd=_conn->fd; + ssl_conn=_conn->ssl_conn; + nread=nread_unblocked=0; + /*RFC 2818 says "client implementations MUST treat any premature closes as + errors and the data received as potentially truncated," so we make very + sure to report read errors upwards.*/ + do{ + int err; + if(ssl_conn!=NULL){ + int ret; + ret=SSL_read(ssl_conn,_buf+nread,_buf_size-nread); + OP_ASSERT(ret<=_buf_size-nread); + if(ret>0){ + /*Read some data. + Keep going to see if there's more.*/ + nread+=ret; + nread_unblocked+=ret; + continue; + } + /*If we already read some data, return it right now.*/ + if(nread>0)break; + err=SSL_get_error(ssl_conn,ret); + if(ret==0){ + /*Connection close. + Check for a clean shutdown to prevent truncation attacks. + This check always succeeds for SSLv2, as it has no "close notify" + message and thus can't verify an orderly shutdown.*/ + return err==SSL_ERROR_ZERO_RETURN?0:OP_EREAD; + } + if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN; + /*Yes, renegotiations can cause SSL_read() to block for writing.*/ + else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT; + /*Some other error.*/ + else return OP_EREAD; + } + else{ + ssize_t ret; + op_reset_errno(); + ret=recv(fd.fd,_buf+nread,_buf_size-nread,0); + OP_ASSERT(ret<=_buf_size-nread); + if(ret>0){ + /*Read some data. + Keep going to see if there's more.*/ + nread+=ret; + nread_unblocked+=ret; + continue; + } + /*If we already read some data or the connection was closed, return + right now.*/ + if(ret==0||nread>0)break; + err=op_errno(); + if(err!=EAGAIN&&err!=EWOULDBLOCK)return OP_EREAD; + fd.events=POLLIN; + } + _conn->read_bytes+=nread_unblocked; + op_http_conn_read_rate_update(_conn); + nread_unblocked=0; + if(!_blocking)break; + /*Need to wait to get any data at all.*/ + if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return OP_EREAD; + } + while(nread<_buf_size); + _conn->read_bytes+=nread_unblocked; + return nread; +} + +/*Tries to look at the pending data for a connection without consuming it. + [out] _buf: Returns the data at which we're peeking. + _buf_size: The size of the buffer.*/ +static int op_http_conn_peek(OpusHTTPConn *_conn,char *_buf,int _buf_size){ + struct pollfd fd; + SSL *ssl_conn; + int ret; + fd.fd=_conn->fd; + ssl_conn=_conn->ssl_conn; + for(;;){ + int err; + if(ssl_conn!=NULL){ + ret=SSL_peek(ssl_conn,_buf,_buf_size); + /*Either saw some data or the connection was closed.*/ + if(ret>=0)return ret; + err=SSL_get_error(ssl_conn,ret); + if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN; + /*Yes, renegotiations can cause SSL_peek() to block for writing.*/ + else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT; + else return 0; + } + else{ + op_reset_errno(); + ret=(int)recv(fd.fd,_buf,_buf_size,MSG_PEEK); + /*Either saw some data or the connection was closed.*/ + if(ret>=0)return ret; + err=op_errno(); + if(err!=EAGAIN&&err!=EWOULDBLOCK)return 0; + fd.events=POLLIN; + } + /*Need to wait to get any data at all.*/ + if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return 0; + } +} + +/*When parsing response headers, RFC 2616 mandates that all lines end in CR LF. + However, even in the year 2012, I have seen broken servers use just a LF. + This is the evil that Postel's advice from RFC 761 breeds.*/ + +/*Reads the entirety of a response to an HTTP request into the response buffer. + Actual parsing and validation is done later. + Return: The number of bytes in the response on success, OP_EREAD if the + connection was closed before reading any data, or another negative + value on any other error.*/ +static int op_http_conn_read_response(OpusHTTPConn *_conn, + OpusStringBuf *_response){ + int ret; + _response->nbuf=0; + ret=op_sb_ensure_capacity(_response,OP_RESPONSE_SIZE_MIN); + if(OP_UNLIKELY(ret<0))return ret; + for(;;){ + char *buf; + int size; + int capacity; + int read_limit; + int terminated; + size=_response->nbuf; + capacity=_response->cbuf-1; + if(OP_UNLIKELY(size>=capacity)){ + ret=op_sb_grow(_response,OP_RESPONSE_SIZE_MAX); + if(OP_UNLIKELY(ret<0))return ret; + capacity=_response->cbuf-1; + /*The response was too large. + This prevents a bad server from running us out of memory.*/ + if(OP_UNLIKELY(size>=capacity))return OP_EIMPL; + } + buf=_response->buf; + ret=op_http_conn_peek(_conn,buf+size,capacity-size); + if(OP_UNLIKELY(ret<=0))return size<=0?OP_EREAD:OP_FALSE; + /*We read some data.*/ + /*Make sure the starting characters are "HTTP". + Otherwise we could wind up waiting forever for a response from + something that is not an HTTP server.*/ + if(size<4&&op_strncasecmp(buf,"HTTP",OP_MIN(size+ret,4))!=0){ + return OP_FALSE; + } + /*How far can we read without passing the "\r\n\r\n" terminator?*/ + buf[size+ret]='\0'; + terminated=0; + for(read_limit=OP_MAX(size-3,0);read_limit<size+ret;read_limit++){ + /*We don't look for the leading '\r' thanks to broken servers.*/ + if(buf[read_limit]=='\n'){ + if(buf[read_limit+1]=='\r'&&OP_LIKELY(buf[read_limit+2]=='\n')){ + terminated=3; + break; + } + /*This case is for broken servers.*/ + else if(OP_UNLIKELY(buf[read_limit+1]=='\n')){ + terminated=2; + break; + } + } + } + read_limit+=terminated; + OP_ASSERT(size<=read_limit); + OP_ASSERT(read_limit<=size+ret); + /*Actually consume that data.*/ + ret=op_http_conn_read(_conn,buf+size,read_limit-size,1); + if(OP_UNLIKELY(ret<=0))return OP_FALSE; + size+=ret; + buf[size]='\0'; + _response->nbuf=size; + /*We found the terminator and read all the data up to and including it.*/ + if(terminated&&OP_LIKELY(size>=read_limit))return size; + } + return OP_EIMPL; +} + +# define OP_HTTP_DIGIT "0123456789" + +/*The Reason-Phrase is not allowed to contain control characters, except + horizontal tab (HT: \011).*/ +# define OP_HTTP_CREASON_PHRASE \ + "\001\002\003\004\005\006\007\010\012\013\014\015\016\017\020\021" \ + "\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177" + +# define OP_HTTP_CTLS \ + "\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020" \ + "\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037\177" + +/*This also includes '\t', but we get that from OP_HTTP_CTLS.*/ +# define OP_HTTP_SEPARATORS " \"(),/:;<=>?@[\\]{}" + +/*TEXT can also include LWS, but that has structure, so we parse it + separately.*/ +# define OP_HTTP_CTOKEN OP_HTTP_CTLS OP_HTTP_SEPARATORS + +/*Return: The amount of linear white space (LWS) at the start of _s.*/ +static int op_http_lwsspn(const char *_s){ + int i; + for(i=0;;){ + if(_s[0]=='\r'&&_s[1]=='\n'&&(_s[2]=='\t'||_s[2]==' '))i+=3; + /*This case is for broken servers.*/ + else if(_s[0]=='\n'&&(_s[1]=='\t'||_s[1]==' '))i+=2; + else if(_s[i]=='\t'||_s[i]==' ')i++; + else return i; + } +} + +static char *op_http_parse_status_line(int *_v1_1_compat, + char **_status_code,char *_response){ + char *next; + char *status_code; + int v1_1_compat; + size_t d; + /*RFC 2616 Section 6.1 does not say that the tokens in the Status-Line cannot + be separated by optional LWS, but since it specifically calls out where + spaces are to be placed and that CR and LF are not allowed except at the + end, I am assuming this to be true.*/ + /*We already validated that this starts with "HTTP"*/ + OP_ASSERT(op_strncasecmp(_response,"HTTP",4)==0); + next=_response+4; + if(OP_UNLIKELY(*next++!='/'))return NULL; + d=strspn(next,OP_HTTP_DIGIT); + /*"Leading zeros MUST be ignored by recipients."*/ + while(*next=='0'){ + next++; + OP_ASSERT(d>0); + d--; + } + /*We only support version 1.x*/ + if(OP_UNLIKELY(d!=1)||OP_UNLIKELY(*next++!='1'))return NULL; + if(OP_UNLIKELY(*next++!='.'))return NULL; + d=strspn(next,OP_HTTP_DIGIT); + if(OP_UNLIKELY(d<=0))return NULL; + /*"Leading zeros MUST be ignored by recipients."*/ + while(*next=='0'){ + next++; + OP_ASSERT(d>0); + d--; + } + /*We don't need to parse the version number. + Any non-zero digit means it's greater than 1.*/ + v1_1_compat=d>0; + next+=d; + if(OP_UNLIKELY(*next++!=' '))return NULL; + status_code=next; + d=strspn(next,OP_HTTP_DIGIT); + if(OP_UNLIKELY(d!=3))return NULL; + next+=d; + /*The Reason-Phrase can be empty, but the space must be here.*/ + if(OP_UNLIKELY(*next++!=' '))return NULL; + next+=strcspn(next,OP_HTTP_CREASON_PHRASE); + /*We are not mandating this be present thanks to broken servers.*/ + if(OP_LIKELY(*next=='\r'))next++; + if(OP_UNLIKELY(*next++!='\n'))return NULL; + if(_v1_1_compat!=NULL)*_v1_1_compat=v1_1_compat; + *_status_code=status_code; + return next; +} + +/*Get the next response header. + [out] _header: The header token, NUL-terminated, with leading and trailing + whitespace stripped, and converted to lower case (to simplify + case-insensitive comparisons), or NULL if there are no more + response headers. + [out] _cdr: The remaining contents of the header, excluding the initial + colon (':') and the terminating CRLF ("\r\n"), + NUL-terminated, and with leading and trailing whitespace + stripped, or NULL if there are no more response headers. + [inout] _s: On input, this points to the start of the current line of the + response headers. + On output, it points to the start of the first line following + this header, or NULL if there are no more response headers. + Return: 0 on success, or a negative value on failure.*/ +static int op_http_get_next_header(char **_header,char **_cdr,char **_s){ + char *header; + char *header_end; + char *cdr; + char *cdr_end; + char *next; + size_t d; + next=*_s; + /*The second case is for broken servers.*/ + if(next[0]=='\r'&&next[1]=='\n'||OP_UNLIKELY(next[0]=='\n')){ + /*No more headers.*/ + *_header=NULL; + *_cdr=NULL; + *_s=NULL; + return 0; + } + header=next+op_http_lwsspn(next); + d=strcspn(header,OP_HTTP_CTOKEN); + if(OP_UNLIKELY(d<=0))return OP_FALSE; + header_end=header+d; + next=header_end+op_http_lwsspn(header_end); + if(OP_UNLIKELY(*next++!=':'))return OP_FALSE; + next+=op_http_lwsspn(next); + cdr=next; + do{ + cdr_end=next+strcspn(next,OP_HTTP_CTLS); + next=cdr_end+op_http_lwsspn(cdr_end); + } + while(next>cdr_end); + /*We are not mandating this be present thanks to broken servers.*/ + if(OP_LIKELY(*next=='\r'))next++; + if(OP_UNLIKELY(*next++!='\n'))return OP_FALSE; + *header_end='\0'; + *cdr_end='\0'; + /*Field names are case-insensitive.*/ + op_string_tolower(header); + *_header=header; + *_cdr=cdr; + *_s=next; + return 0; +} + +static opus_int64 op_http_parse_nonnegative_int64(const char **_next, + const char *_cdr){ + const char *next; + opus_int64 ret; + int i; + next=_cdr+strspn(_cdr,OP_HTTP_DIGIT); + *_next=next; + if(OP_UNLIKELY(next<=_cdr))return OP_FALSE; + while(*_cdr=='0')_cdr++; + if(OP_UNLIKELY(next-_cdr>19))return OP_EIMPL; + ret=0; + for(i=0;i<next-_cdr;i++){ + int digit; + digit=_cdr[i]-'0'; + /*Check for overflow.*/ + if(OP_UNLIKELY(ret>(OP_INT64_MAX-9)/10+(digit<=7)))return OP_EIMPL; + ret=ret*10+digit; + } + return ret; +} + +static opus_int64 op_http_parse_content_length(const char *_cdr){ + const char *next; + opus_int64 content_length; + content_length=op_http_parse_nonnegative_int64(&next,_cdr); + if(OP_UNLIKELY(*next!='\0'))return OP_FALSE; + return content_length; +} + +static int op_http_parse_content_range(opus_int64 *_first,opus_int64 *_last, + opus_int64 *_length,const char *_cdr){ + opus_int64 first; + opus_int64 last; + opus_int64 length; + size_t d; + if(OP_UNLIKELY(op_strncasecmp(_cdr,"bytes",5)!=0))return OP_FALSE; + _cdr+=5; + d=op_http_lwsspn(_cdr); + if(OP_UNLIKELY(d<=0))return OP_FALSE; + _cdr+=d; + if(*_cdr!='*'){ + first=op_http_parse_nonnegative_int64(&_cdr,_cdr); + if(OP_UNLIKELY(first<0))return (int)first; + _cdr+=op_http_lwsspn(_cdr); + if(*_cdr++!='-')return OP_FALSE; + _cdr+=op_http_lwsspn(_cdr); + last=op_http_parse_nonnegative_int64(&_cdr,_cdr); + if(OP_UNLIKELY(last<0))return (int)last; + _cdr+=op_http_lwsspn(_cdr); + } + else{ + /*This is for a 416 response (Requested range not satisfiable).*/ + first=last=-1; + _cdr++; + } + if(OP_UNLIKELY(*_cdr++!='/'))return OP_FALSE; + if(*_cdr!='*'){ + length=op_http_parse_nonnegative_int64(&_cdr,_cdr); + if(OP_UNLIKELY(length<0))return (int)length; + } + else{ + /*The total length is unspecified.*/ + _cdr++; + length=-1; + } + if(OP_UNLIKELY(*_cdr!='\0'))return OP_FALSE; + if(OP_UNLIKELY(last<first))return OP_FALSE; + if(length>=0&&OP_UNLIKELY(last>=length))return OP_FALSE; + *_first=first; + *_last=last; + *_length=length; + return 0; +} + +/*Parse the Connection response header and look for a "close" token. + Return: 1 if a "close" token is found, 0 if it's not found, and a negative + value on error.*/ +static int op_http_parse_connection(char *_cdr){ + size_t d; + int ret; + ret=0; + for(;;){ + d=strcspn(_cdr,OP_HTTP_CTOKEN); + if(OP_UNLIKELY(d<=0))return OP_FALSE; + if(op_strncasecmp(_cdr,"close",(int)d)==0)ret=1; + /*We're supposed to strip and ignore any headers mentioned in the + Connection header if this response is from an HTTP/1.0 server (to + work around forwarding of hop-by-hop headers by old proxies), but the + only hop-by-hop header we look at is Connection itself. + Everything else is a well-defined end-to-end header, and going back and + undoing the things we did based on already-examined headers would be + hard (since we only scan them once, in a destructive manner). + Therefore we just ignore all the other tokens.*/ + _cdr+=d; + d=op_http_lwsspn(_cdr); + if(d<=0)break; + _cdr+=d; + } + return OP_UNLIKELY(*_cdr!='\0')?OP_FALSE:ret; +} + +typedef int (*op_ssl_step_func)(SSL *_ssl_conn); + +/*Try to run an SSL function to completion (blocking if necessary).*/ +static int op_do_ssl_step(SSL *_ssl_conn,op_sock _fd,op_ssl_step_func _step){ + struct pollfd fd; + fd.fd=_fd; + for(;;){ + int ret; + int err; + ret=(*_step)(_ssl_conn); + if(ret>=0)return ret; + err=SSL_get_error(_ssl_conn,ret); + if(err==SSL_ERROR_WANT_READ)fd.events=POLLIN; + else if(err==SSL_ERROR_WANT_WRITE)fd.events=POLLOUT; + else return OP_FALSE; + if(poll(&fd,1,OP_POLL_TIMEOUT_MS)<=0)return OP_FALSE; + } +} + +/*Implement a BIO type that just indicates every operation should be retried. + We use this when initializing an SSL connection via a proxy to allow the + initial handshake to proceed all the way up to the first read attempt, and + then return. + This allows the TLS client hello message to be pipelined with the HTTP + CONNECT request.*/ + +static int op_bio_retry_write(BIO *_b,const char *_buf,int _num){ + (void)_buf; + (void)_num; + BIO_clear_retry_flags(_b); + BIO_set_retry_write(_b); + return -1; +} + +static int op_bio_retry_read(BIO *_b,char *_buf,int _num){ + (void)_buf; + (void)_num; + BIO_clear_retry_flags(_b); + BIO_set_retry_read(_b); + return -1; +} + +static int op_bio_retry_puts(BIO *_b,const char *_str){ + return op_bio_retry_write(_b,_str,0); +} + +static long op_bio_retry_ctrl(BIO *_b,int _cmd,long _num,void *_ptr){ + long ret; + (void)_b; + (void)_num; + (void)_ptr; + ret=0; + switch(_cmd){ + case BIO_CTRL_RESET: + case BIO_C_RESET_READ_REQUEST:{ + BIO_clear_retry_flags(_b); + /*Fall through.*/ + } + case BIO_CTRL_EOF: + case BIO_CTRL_SET: + case BIO_CTRL_SET_CLOSE: + case BIO_CTRL_FLUSH: + case BIO_CTRL_DUP:{ + ret=1; + }break; + } + return ret; +} + +static int op_bio_retry_new(BIO *_b){ + _b->init=1; + _b->num=0; + _b->ptr=NULL; + return 1; +} + +static int op_bio_retry_free(BIO *_b){ + return _b!=NULL; +} + +/*This is not const because OpenSSL doesn't allow it, even though it won't + write to it.*/ +static BIO_METHOD op_bio_retry_method={ + BIO_TYPE_NULL, + "retry", + op_bio_retry_write, + op_bio_retry_read, + op_bio_retry_puts, + NULL, + op_bio_retry_ctrl, + op_bio_retry_new, + op_bio_retry_free, + NULL +}; + +/*Establish a CONNECT tunnel and pipeline the start of the TLS handshake for + proxying https URL requests.*/ +static int op_http_conn_establish_tunnel(OpusHTTPStream *_stream, + OpusHTTPConn *_conn,op_sock _fd,SSL *_ssl_conn,BIO *_ssl_bio){ + BIO *retry_bio; + char *status_code; + char *next; + int ret; + _conn->ssl_conn=NULL; + _conn->fd=_fd; + OP_ASSERT(_stream->proxy_connect.nbuf>0); + ret=op_http_conn_write_fully(_conn, + _stream->proxy_connect.buf,_stream->proxy_connect.nbuf); + if(OP_UNLIKELY(ret<0))return ret; + retry_bio=BIO_new(&op_bio_retry_method); + if(OP_UNLIKELY(retry_bio==NULL))return OP_EFAULT; + SSL_set_bio(_ssl_conn,retry_bio,_ssl_bio); + SSL_set_connect_state(_ssl_conn); + /*This shouldn't succeed, since we can't read yet.*/ + OP_ALWAYS_TRUE(SSL_connect(_ssl_conn)<0); + SSL_set_bio(_ssl_conn,_ssl_bio,_ssl_bio); + /*Only now do we disable write coalescing, to allow the CONNECT + request and the start of the TLS handshake to be combined.*/ + op_sock_set_tcp_nodelay(_fd,1); + ret=op_http_conn_read_response(_conn,&_stream->response); + if(OP_UNLIKELY(ret<0))return ret; + next=op_http_parse_status_line(NULL,&status_code,_stream->response.buf); + /*According to RFC 2817, "Any successful (2xx) response to a + CONNECT request indicates that the proxy has established a + connection to the requested host and port.*/ + if(OP_UNLIKELY(next==NULL)||OP_UNLIKELY(status_code[0]!='2'))return OP_FALSE; + return 0; +} + +/*Match a host name against a host with a possible wildcard pattern according + to the rules of RFC 6125 Section 6.4.3. + Return: 0 if the pattern doesn't match, and a non-zero value if it does.*/ +static int op_http_hostname_match(const char *_host,size_t _host_len, + ASN1_STRING *_pattern){ + const char *pattern; + size_t host_label_len; + size_t host_suffix_len; + size_t pattern_len; + size_t pattern_label_len; + size_t pattern_prefix_len; + size_t pattern_suffix_len; + pattern=(const char *)ASN1_STRING_data(_pattern); + pattern_len=strlen(pattern); + /*Check the pattern for embedded NULs.*/ + if(OP_UNLIKELY(pattern_len!=(size_t)ASN1_STRING_length(_pattern)))return 0; + pattern_label_len=strcspn(pattern,"."); + OP_ASSERT(pattern_label_len<=pattern_len); + pattern_prefix_len=strcspn(pattern,"*"); + if(pattern_prefix_len>=pattern_label_len){ + /*"The client SHOULD NOT attempt to match a presented identifier in which + the wildcard character comprises a label other than the left-most label + (e.g., do not match bar.*.example.net)." [RFC 6125 Section 6.4.3]*/ + if(pattern_prefix_len<pattern_len)return 0; + /*If the pattern does not contain a wildcard in the first element, do an + exact match. + Don't use the system strcasecmp here, as that uses the locale and + RFC 4343 makes clear that DNS's case-insensitivity only applies to + the ASCII range.*/ + return _host_len==pattern_len&&op_strncasecmp(_host,pattern,_host_len)==0; + } + /*"However, the client SHOULD NOT attempt to match a presented identifier + where the wildcard character is embedded within an A-label or U-label of + an internationalized domain name." [RFC 6125 Section 6.4.3]*/ + if(op_strncasecmp(pattern,"xn--",4)==0)return 0; + host_label_len=strcspn(_host,"."); + /*Make sure the host has at least two dots, to prevent the wildcard match + from being ridiculously wide. + We should have already checked to ensure it had at least one.*/ + if(OP_UNLIKELY(_host[host_label_len]!='.') + ||strchr(_host+host_label_len+1,'.')==NULL){ + return 0; + } + OP_ASSERT(host_label_len<_host_len); + /*"If the wildcard character is the only character of the left-most label in + the presented identifier, the client SHOULD NOT compare against anything + but the left-most label of the reference identifier (e.g., *.example.com + would match foo.example.com but not bar.foo.example.com)." [RFC 6125 + Section 6.4.3] + This is really confusingly worded, as we check this by actually comparing + the rest of the pattern for an exact match. + We also use the fact that the wildcard must match at least one character, + so the left-most label of the hostname must be at least as large as the + left-most label of the pattern.*/ + if(host_label_len<pattern_label_len)return 0; + OP_ASSERT(pattern[pattern_prefix_len]=='*'); + /*"The client MAY match a presented identifier in which the wildcard + character is not the only character of the label (e.g., baz*.example.net + and *baz.example.net and b*z.example.net would be taken to match + baz1.example.net and foobaz.example.net and buzz.example.net, + respectively)." [RFC 6125 Section 6.4.3]*/ + pattern_suffix_len=pattern_len-pattern_prefix_len-1; + host_suffix_len=_host_len-host_label_len + +pattern_label_len-pattern_prefix_len-1; + return pattern_suffix_len==host_suffix_len + &&op_strncasecmp(_host,pattern,pattern_prefix_len)==0 + &&op_strncasecmp(_host+_host_len-host_suffix_len, + pattern+pattern_prefix_len+1,host_suffix_len)==0; +} + +/*Convert a host to a numeric address, if possible. + Return: A struct addrinfo containing the address, if it was numeric, and NULL + otherise.*/ +static struct addrinfo *op_inet_pton(const char *_host){ + struct addrinfo *addrs; + struct addrinfo hints; + memset(&hints,0,sizeof(hints)); + hints.ai_socktype=SOCK_STREAM; + hints.ai_flags=AI_NUMERICHOST; + if(!getaddrinfo(_host,NULL,&hints,&addrs))return addrs; + return NULL; +} + +/*Verify the server's hostname matches the certificate they presented using + the procedure from Section 6 of RFC 6125. + Return: 0 if the certificate doesn't match, and a non-zero value if it does.*/ +static int op_http_verify_hostname(OpusHTTPStream *_stream,SSL *_ssl_conn){ + X509 *peer_cert; + STACK_OF(GENERAL_NAME) *san_names; + char *host; + size_t host_len; + int ret; + host=_stream->url.host; + host_len=strlen(host); + peer_cert=SSL_get_peer_certificate(_ssl_conn); + /*We set VERIFY_PEER, so we shouldn't get here without a certificate.*/ + if(OP_UNLIKELY(peer_cert==NULL))return 0; + ret=0; + OP_ASSERT(host_len<INT_MAX); + /*RFC 2818 says (after correcting for Eratta 1077): "If a subjectAltName + extension of type dNSName is present, that MUST be used as the identity. + Otherwise, the (most specific) Common Name field in the Subject field of + the certificate MUST be used. + Although the use of the Common Name is existing practice, it is deprecated + and Certification Authorities are encouraged to use the dNSName + instead." + "Matching is performed using the matching rules specified by RFC 2459. + If more than one identity of a given type is present in the certificate + (e.g., more than one dNSName name), a match in any one of the set is + considered acceptable. + Names may contain the wildcard character * which is condered to match any + single domain name component or component fragment. + E.g., *.a.com matches foo.a.com but not bar.foo.a.com. + f*.com matches foo.com but not bar.com." + "In some cases, the URI is specified as an IP address rather than a + hostname. + In this case, the iPAddress subjectAltName must be present in the + certificate and must exactly match the IP in the URI."*/ + san_names=X509_get_ext_d2i(peer_cert,NID_subject_alt_name,NULL,NULL); + if(san_names!=NULL){ + struct addrinfo *addr; + unsigned char *ip; + int ip_len; + int nsan_names; + int sni; + /*Check to see if the host was specified as a simple IP address.*/ + addr=op_inet_pton(host); + ip=NULL; + ip_len=0; + if(addr!=NULL){ + switch(addr->ai_family){ + case AF_INET:{ + struct sockaddr_in *s; + s=(struct sockaddr_in *)addr->ai_addr; + OP_ASSERT(addr->ai_addrlen>=sizeof(*s)); + ip=(unsigned char *)&s->sin_addr; + ip_len=sizeof(s->sin_addr); + }break; + case AF_INET6:{ + struct sockaddr_in6 *s; + s=(struct sockaddr_in6 *)addr->ai_addr; + OP_ASSERT(addr->ai_addrlen>=sizeof(*s)); + ip=(unsigned char *)&s->sin6_addr; + ip_len=sizeof(s->sin6_addr); + }break; + } + } + /*We can only verify fully-qualified domain names. + To quote RFC 6125: "The extracted data MUST include only information that + can be securely parsed out of the inputs (e.g., parsing the fully + qualified DNS domain name out of the "host" component (or its + equivalent) of a URI or deriving the application service type from the + scheme of a URI) ..." + We don't have a way to check (without relying on DNS records, which might + be subverted) if this address is fully-qualified. + This is particularly problematic when using a CONNECT tunnel, as it is + the server that does DNS lookup, not us. + However, we are certain that if the hostname has no '.', it is definitely + not a fully-qualified domain name (with the exception of crazy TLDs that + actually resolve, like "uz", but I am willing to ignore those). + RFC 1535 says "...in any event where a '.' exists in a specified name it + should be assumed to be a fully qualified domain name (FQDN) and SHOULD + be tried as a rooted name first." + That doesn't give us any security guarantees, of course (a subverted DNS + could fail the original query and our resolver might still retry with a + local domain appended). + If we don't have a FQDN, just set the number of names to 0, so we'll fail + and clean up any resources we allocated.*/ + if(ip==NULL&&strchr(host,'.')==NULL)nsan_names=0; + /*RFC 2459 says there MUST be at least one, but we don't depend on it.*/ + else nsan_names=sk_GENERAL_NAME_num(san_names); + for(sni=0;sni<nsan_names;sni++){ + const GENERAL_NAME *name; + name=sk_GENERAL_NAME_value(san_names,sni); + if(ip==NULL){ + if(name->type==GEN_DNS + &&op_http_hostname_match(host,host_len,name->d.dNSName)){ + ret=1; + break; + } + } + else if(name->type==GEN_IPADD){ + unsigned char *cert_ip; + /*If we do have an IP address, compare it directly. + RFC 6125: "When the reference identity is an IP address, the identity + MUST be converted to the 'network byte order' octet string + representation. + For IP Version 4, as specified in RFC 791, the octet string will + contain exactly four octets. + For IP Version 6, as specified in RFC 2460, the octet string will + contain exactly sixteen octets. + This octet string is then compared against subjectAltName values of + type iPAddress. + A match occurs if the reference identity octet string and the value + octet strings are identical."*/ + cert_ip=ASN1_STRING_data(name->d.iPAddress); + if(ip_len==ASN1_STRING_length(name->d.iPAddress) + &&memcmp(ip,cert_ip,ip_len)==0){ + ret=1; + break; + } + } + } + sk_GENERAL_NAME_pop_free(san_names,GENERAL_NAME_free); + if(addr!=NULL)freeaddrinfo(addr); + } + /*Do the same FQDN check we did above. + We don't do this once in advance for both cases, because in the + subjectAltName case we might have an IPv6 address without a dot.*/ + else if(strchr(host,'.')!=NULL){ + int last_cn_loc; + int cn_loc; + /*If there is no subjectAltName, match against commonName. + RFC 6125 says that at least one significant CA is known to issue certs + with multiple CNs, although it SHOULD NOT. + It also says: "The server's identity may also be verified by comparing + the reference identity to the Common Name (CN) value in the last + Relative Distinguished Name (RDN) of the subject field of the server's + certificate (where "last" refers to the DER-encoded order...)." + So find the last one and check it.*/ + cn_loc=-1; + do{ + last_cn_loc=cn_loc; + cn_loc=X509_NAME_get_index_by_NID(X509_get_subject_name(peer_cert), + NID_commonName,last_cn_loc); + } + while(cn_loc>=0); + ret=last_cn_loc>=0 + &&op_http_hostname_match(host,host_len, + X509_NAME_ENTRY_get_data( + X509_NAME_get_entry(X509_get_subject_name(peer_cert),last_cn_loc))); + } + X509_free(peer_cert); + return ret; +} + +/*Perform the TLS handshake on a new connection.*/ +static int op_http_conn_start_tls(OpusHTTPStream *_stream,OpusHTTPConn *_conn, + op_sock _fd,SSL *_ssl_conn){ + SSL_SESSION *ssl_session; + BIO *ssl_bio; + int skip_certificate_check; + int ret; + ssl_bio=BIO_new_socket(_fd,BIO_NOCLOSE); + if(OP_LIKELY(ssl_bio==NULL))return OP_FALSE; +# if !defined(OPENSSL_NO_TLSEXT) + /*Support for RFC 6066 Server Name Indication.*/ + SSL_set_tlsext_host_name(_ssl_conn,_stream->url.host); +# endif + /*Resume a previous session if available.*/ + if(_stream->ssl_session!=NULL){ + SSL_set_session(_ssl_conn,_stream->ssl_session); + } + /*If we're proxying, establish the CONNECT tunnel.*/ + if(_stream->proxy_connect.nbuf>0){ + ret=op_http_conn_establish_tunnel(_stream,_conn, + _fd,_ssl_conn,ssl_bio); + if(OP_UNLIKELY(ret<0))return ret; + } + else{ + /*Otherwise, just use this socket directly.*/ + op_sock_set_tcp_nodelay(_fd,1); + SSL_set_bio(_ssl_conn,ssl_bio,ssl_bio); + SSL_set_connect_state(_ssl_conn); + } + ret=op_do_ssl_step(_ssl_conn,_fd,SSL_connect); + if(OP_UNLIKELY(ret<=0))return OP_FALSE; + ssl_session=_stream->ssl_session; + skip_certificate_check=_stream->skip_certificate_check; + if(ssl_session==NULL||!skip_certificate_check){ + ret=op_do_ssl_step(_ssl_conn,_fd,SSL_do_handshake); + if(OP_UNLIKELY(ret<=0))return OP_FALSE; + /*OpenSSL does not do hostname verification, despite the fact that we just + passed it the hostname above in the call to SSL_set_tlsext_host_name(), + because they are morons. + Do it for them.*/ + if(!skip_certificate_check&&!op_http_verify_hostname(_stream,_ssl_conn)){ + return OP_FALSE; + } + if(ssl_session==NULL){ + /*Save the session for later resumption.*/ + _stream->ssl_session=SSL_get1_session(_ssl_conn); + } + } + _conn->ssl_conn=_ssl_conn; + _conn->fd=_fd; + _conn->nrequests_left=OP_PIPELINE_MAX_REQUESTS; + return 0; +} + +/*Try to start a connection to the next address in the given list of a given + type. + _fd: The socket to connect with. + [inout] _addr: A pointer to the list of addresses. + This will be advanced to the first one that matches the given + address family (possibly the current one). + _ai_family: The address family to connect to. + Return: 1 If the connection was successful. + 0 If the connection is in progress. + OP_FALSE If the connection failed and there were no more addresses + left to try. + *_addr will be set to NULL in this case.*/ +static int op_sock_connect_next(op_sock _fd, + const struct addrinfo **_addr,int _ai_family){ + const struct addrinfo *addr; + int err; + addr=*_addr; + for(;;){ + /*Move to the next address of the requested type.*/ + for(;addr!=NULL&&addr->ai_family!=_ai_family;addr=addr->ai_next); + *_addr=addr; + /*No more: failure.*/ + if(addr==NULL)return OP_FALSE; + if(connect(_fd,addr->ai_addr,addr->ai_addrlen)>=0)return 1; + err=op_errno(); + /*Winsock will set WSAEWOULDBLOCK.*/ + if(OP_LIKELY(err==EINPROGRESS||err==EWOULDBLOCK))return 0; + addr=addr->ai_next; + } +} + +/*The number of address families to try connecting to simultaneously.*/ +# define OP_NPROTOS (2) + +static int op_http_connect_impl(OpusHTTPStream *_stream,OpusHTTPConn *_conn, + const struct addrinfo *_addrs,struct timeb *_start_time){ + const struct addrinfo *addr; + const struct addrinfo *addrs[OP_NPROTOS]; + struct pollfd fds[OP_NPROTOS]; + int ai_family; + int nprotos; + int ret; + int pi; + int pj; + for(pi=0;pi<OP_NPROTOS;pi++)addrs[pi]=NULL; + /*Try connecting via both IPv4 and IPv6 simultaneously, and keep the first + one that succeeds. + Start by finding the first address from each family. + We order the first connection attempts in the same order the address + families were returned in the DNS records in accordance with RFC 6555.*/ + for(addr=_addrs,nprotos=0;addr!=NULL&&nprotos<OP_NPROTOS;addr=addr->ai_next){ + if(addr->ai_family==AF_INET6||addr->ai_family==AF_INET){ + OP_ASSERT(addr->ai_addrlen<=sizeof(struct sockaddr_in6)); + OP_ASSERT(addr->ai_addrlen<=sizeof(struct sockaddr_in)); + /*If we've seen this address family before, skip this address for now.*/ + for(pi=0;pi<nprotos;pi++)if(addrs[pi]->ai_family==addr->ai_family)break; + if(pi<nprotos)continue; + addrs[nprotos++]=addr; + } + } + /*Pop the connection off the free list and put it on the LRU list.*/ + OP_ASSERT(_stream->free_head==_conn); + _stream->free_head=_conn->next; + _conn->next=_stream->lru_head; + _stream->lru_head=_conn; + ftime(_start_time); + *&_conn->read_time=*_start_time; + _conn->read_bytes=0; + _conn->read_rate=0; + /*Try to start a connection to each protocol. + RFC 6555 says it is RECOMMENDED that connection attempts be paced + 150...250 ms apart "to balance human factors against network load", but + that "stateful algorithms" (that's us) "are expected to be more + aggressive". + We are definitely more aggressive: we don't pace at all.*/ + for(pi=0;pi<nprotos;pi++){ + ai_family=addrs[pi]->ai_family; + fds[pi].fd=socket(ai_family,SOCK_STREAM,addrs[pi]->ai_protocol); + fds[pi].events=POLLOUT; + if(OP_LIKELY(fds[pi].fd!=OP_INVALID_SOCKET)){ + if(OP_LIKELY(op_sock_set_nonblocking(fds[pi].fd,1)>=0)){ + ret=op_sock_connect_next(fds[pi].fd,addrs+pi,ai_family); + if(OP_UNLIKELY(ret>0)){ + /*It succeeded right away (technically possible), so stop.*/ + nprotos=pi+1; + break; + } + /*Otherwise go on to the next protocol, and skip the clean-up below.*/ + else if(ret==0)continue; + /*Tried all the addresses for this protocol.*/ + } + /*Clean up the socket.*/ + close(fds[pi].fd); + } + /*Remove this protocol from the list.*/ + memmove(addrs+pi,addrs+pi+1,sizeof(*addrs)*(nprotos-pi-1)); + nprotos--; + pi--; + } + /*Wait for one of the connections to finish.*/ + while(pi>=nprotos&&nprotos>0&&poll(fds,nprotos,OP_POLL_TIMEOUT_MS)>0){ + for(pi=0;pi<nprotos;pi++){ + socklen_t errlen; + int err; + /*Still waiting...*/ + if(!fds[pi].revents)continue; + errlen=sizeof(err); + /*Some platforms will return the pending error in &err and return 0. + Others will put it in errno and return -1.*/ + ret=getsockopt(fds[pi].fd,SOL_SOCKET,SO_ERROR,&err,&errlen); + if(ret<0)err=op_errno(); + /*Success!*/ + if(err==0||err==EISCONN)break; + /*Move on to the next address for this protocol.*/ + ai_family=addrs[pi]->ai_family; + addrs[pi]=addrs[pi]->ai_next; + ret=op_sock_connect_next(fds[pi].fd,addrs+pi,ai_family); + /*It succeeded right away, so stop.*/ + if(ret>0)break; + /*Otherwise go on to the next protocol, and skip the clean-up below.*/ + else if(ret==0)continue; + /*Tried all the addresses for this protocol. + Remove it from the list.*/ + close(fds[pi].fd); + memmove(fds+pi,fds+pi+1,sizeof(*fds)*(nprotos-pi-1)); + memmove(addrs+pi,addrs+pi+1,sizeof(*addrs)*(nprotos-pi-1)); + nprotos--; + pi--; + } + } + /*Close all the other sockets.*/ + for(pj=0;pj<nprotos;pj++)if(pi!=pj)close(fds[pj].fd); + /*If none of them succeeded, we're done.*/ + if(pi>=nprotos)return OP_FALSE; + /*Save this address for future connection attempts.*/ + if(addrs[pi]!=&_stream->addr_info){ + memcpy(&_stream->addr_info,addrs[pi],sizeof(_stream->addr_info)); + _stream->addr_info.ai_addr=&_stream->addr.s; + _stream->addr_info.ai_next=NULL; + memcpy(&_stream->addr,addrs[pi]->ai_addr,addrs[pi]->ai_addrlen); + } + if(OP_URL_IS_SSL(&_stream->url)){ + SSL *ssl_conn; + /*Start the SSL connection.*/ + OP_ASSERT(_stream->ssl_ctx!=NULL); + ssl_conn=SSL_new(_stream->ssl_ctx); + if(OP_LIKELY(ssl_conn!=NULL)){ + ret=op_http_conn_start_tls(_stream,_conn,fds[pi].fd,ssl_conn); + if(OP_LIKELY(ret>=0))return ret; + SSL_free(ssl_conn); + } + close(fds[pi].fd); + _conn->fd=OP_INVALID_SOCKET; + return OP_FALSE; + } + /*Just a normal non-SSL connection.*/ + _conn->ssl_conn=NULL; + _conn->fd=fds[pi].fd; + _conn->nrequests_left=OP_PIPELINE_MAX_REQUESTS; + /*Disable write coalescing. + We always send whole requests at once and always parse the response headers + before sending another one.*/ + op_sock_set_tcp_nodelay(fds[pi].fd,1); + return 0; +} + +static int op_http_connect(OpusHTTPStream *_stream,OpusHTTPConn *_conn, + const struct addrinfo *_addrs,struct timeb *_start_time){ + struct timeb resolve_time; + struct addrinfo *new_addrs; + int ret; + /*Re-resolve the host if we need to (RFC 6555 says we MUST do so + occasionally).*/ + new_addrs=NULL; + ftime(&resolve_time); + if(_addrs!=&_stream->addr_info||op_time_diff_ms(&resolve_time, + &_stream->resolve_time)>=OP_RESOLVE_CACHE_TIMEOUT_MS){ + new_addrs=op_resolve(_stream->connect_host,_stream->connect_port); + if(OP_LIKELY(new_addrs!=NULL)){ + _addrs=new_addrs; + *&_stream->resolve_time=*&resolve_time; + } + else if(OP_LIKELY(_addrs==NULL))return OP_FALSE; + } + ret=op_http_connect_impl(_stream,_conn,_addrs,_start_time); + if(new_addrs!=NULL)freeaddrinfo(new_addrs); + return ret; +} + +# define OP_BASE64_LENGTH(_len) (((_len)+2)/3*4) + +static const char BASE64_TABLE[64]={ + 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P', + 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f', + 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v', + 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/' +}; + +static char *op_base64_encode(char *_dst,const char *_src,int _len){ + unsigned s0; + unsigned s1; + unsigned s2; + int ngroups; + int i; + ngroups=_len/3; + for(i=0;i<ngroups;i++){ + s0=_src[3*i+0]; + s1=_src[3*i+1]; + s2=_src[3*i+2]; + _dst[4*i+0]=BASE64_TABLE[s0>>2]; + _dst[4*i+1]=BASE64_TABLE[(s0&3)<<4|s1>>4]; + _dst[4*i+2]=BASE64_TABLE[(s1&15)<<2|s2>>6]; + _dst[4*i+3]=BASE64_TABLE[s2&63]; + } + _len-=3*i; + if(_len==1){ + s0=_src[3*i+0]; + _dst[4*i+0]=BASE64_TABLE[s0>>2]; + _dst[4*i+1]=BASE64_TABLE[(s0&3)<<4]; + _dst[4*i+2]='='; + _dst[4*i+3]='='; + i++; + } + else if(_len==2){ + s0=_src[3*i+0]; + s1=_src[3*i+1]; + _dst[4*i+0]=BASE64_TABLE[s0>>2]; + _dst[4*i+1]=BASE64_TABLE[(s0&3)<<4|s1>>4]; + _dst[4*i+2]=BASE64_TABLE[(s1&15)<<2]; + _dst[4*i+3]='='; + i++; + } + _dst[4*i]='\0'; + return _dst+4*i; +} + +/*Construct an HTTP authorization header using RFC 2617's Basic Authentication + Scheme and append it to the given string buffer.*/ +static int op_sb_append_basic_auth_header(OpusStringBuf *_sb, + const char *_header,const char *_user,const char *_pass){ + int user_len; + int pass_len; + int user_pass_len; + int base64_len; + int nbuf_total; + int ret; + ret=op_sb_append_string(_sb,_header); + ret|=op_sb_append(_sb,": Basic ",8); + user_len=strlen(_user); + pass_len=strlen(_pass); + if(OP_UNLIKELY(pass_len>INT_MAX-user_len))return OP_EFAULT; + if(OP_UNLIKELY(user_len+pass_len>(INT_MAX>>2)*3-3))return OP_EFAULT; + user_pass_len=user_len+1+pass_len; + base64_len=OP_BASE64_LENGTH(user_pass_len); + /*Stick "user:pass" at the end of the buffer so we can Base64 encode it + in-place.*/ + nbuf_total=_sb->nbuf; + if(OP_UNLIKELY(base64_len>INT_MAX-nbuf_total))return OP_EFAULT; + nbuf_total+=base64_len; + ret|=op_sb_ensure_capacity(_sb,nbuf_total); + if(OP_UNLIKELY(ret<0))return ret; + _sb->nbuf=nbuf_total-user_pass_len; + OP_ALWAYS_TRUE(!op_sb_append(_sb,_user,user_len)); + OP_ALWAYS_TRUE(!op_sb_append(_sb,":",1)); + OP_ALWAYS_TRUE(!op_sb_append(_sb,_pass,pass_len)); + op_base64_encode(_sb->buf+nbuf_total-base64_len, + _sb->buf+nbuf_total-user_pass_len,user_pass_len); + return op_sb_append(_sb,"\r\n",2); +} + +static int op_http_allow_pipelining(const char *_server){ + /*Servers known to do bad things with pipelined requests. + This list is taken from Gecko's nsHttpConnection::SupportsPipelining() (in + netwerk/protocol/http/nsHttpConnection.cpp).*/ + static const char *BAD_SERVERS[]={ + "EFAServer/", + "Microsoft-IIS/4.", + "Microsoft-IIS/5.", + "Netscape-Enterprise/3.", + "Netscape-Enterprise/4.", + "Netscape-Enterprise/5.", + "Netscape-Enterprise/6.", + "WebLogic 3.", + "WebLogic 4.", + "WebLogic 5.", + "WebLogic 6.", + "Winstone Servlet Engine v0." + }; +# define NBAD_SERVERS ((int)(sizeof(BAD_SERVERS)/sizeof(*BAD_SERVERS))) + if(*_server>='E'&&*_server<='W'){ + int si; + for(si=0;si<NBAD_SERVERS;si++){ + if(strncmp(_server,BAD_SERVERS[si],strlen(BAD_SERVERS[si]))==0){ + return 0; + } + } + } + return 1; +# undef NBAD_SERVERS +} + +static int op_http_stream_open(OpusHTTPStream *_stream,const char *_url, + int _skip_certificate_check,const char *_proxy_host,unsigned _proxy_port, + const char *_proxy_user,const char *_proxy_pass,OpusServerInfo *_info){ + struct addrinfo *addrs; + int nredirs; + int ret; +#if defined(_WIN32) + op_init_winsock(); +#endif + ret=op_parse_url(&_stream->url,_url); + if(OP_UNLIKELY(ret<0))return ret; + if(_proxy_host!=NULL){ + if(OP_UNLIKELY(_proxy_port>65535U))return OP_EINVAL; + _stream->connect_host=op_string_dup(_proxy_host); + _stream->connect_port=_proxy_port; + } + else{ + _stream->connect_host=_stream->url.host; + _stream->connect_port=_stream->url.port; + } + addrs=NULL; + for(nredirs=0;nredirs<OP_REDIRECT_LIMIT;nredirs++){ + OpusParsedURL next_url; + struct timeb start_time; + struct timeb end_time; + char *next; + char *status_code; + int minor_version_pos; + int v1_1_compat; + /*Initialize the SSL library if necessary.*/ + if(OP_URL_IS_SSL(&_stream->url)&&_stream->ssl_ctx==NULL){ + SSL_CTX *ssl_ctx; +# if !defined(OPENSSL_NO_LOCKING) + /*The documentation says SSL_library_init() is not reentrant. + We don't want to add our own depenencies on a threading library, and it + appears that it's safe to call OpenSSL's locking functions before the + library is initialized, so that's what we'll do (really OpenSSL should + do this for us). + This doesn't guarantee that _other_ threads in the application aren't + calling SSL_library_init() at the same time, but there's not much we + can do about that.*/ + CRYPTO_w_lock(CRYPTO_LOCK_SSL); +# endif + SSL_library_init(); + /*Needed to get SHA2 algorithms with old OpenSSL versions.*/ + OpenSSL_add_ssl_algorithms(); +# if !defined(OPENSSL_NO_LOCKING) + CRYPTO_w_unlock(CRYPTO_LOCK_SSL); +# endif + ssl_ctx=SSL_CTX_new(SSLv23_client_method()); + if(ssl_ctx==NULL)return OP_EFAULT; + if(!_skip_certificate_check){ + /*We don't do anything if this fails, since it just means we won't load + any certificates (and thus all checks will fail). + However, as that is probably the result of a system + mis-configuration, assert here to make it easier to identify.*/ + OP_ALWAYS_TRUE(SSL_CTX_set_default_verify_paths(ssl_ctx)); + SSL_CTX_set_verify(ssl_ctx,SSL_VERIFY_PEER,NULL); + } + _stream->ssl_ctx=ssl_ctx; + _stream->skip_certificate_check=_skip_certificate_check; + if(_proxy_host!=NULL){ + /*We need to establish a CONNECT tunnel to handle https proxying. + Build the request we'll send to do so.*/ + _stream->proxy_connect.nbuf=0; + ret=op_sb_append(&_stream->proxy_connect,"CONNECT ",8); + ret|=op_sb_append_string(&_stream->proxy_connect,_stream->url.host); + ret|=op_sb_append_port(&_stream->proxy_connect,_stream->url.port); + /*CONNECT requires at least HTTP 1.1.*/ + ret|=op_sb_append(&_stream->proxy_connect," HTTP/1.1\r\n",11); + ret|=op_sb_append(&_stream->proxy_connect,"Host: ",6); + ret|=op_sb_append_string(&_stream->proxy_connect,_stream->url.host); + /*The example in RFC 2817 Section 5.2 specifies an explicit port even + when connecting to the default port. + Given that the proxy doesn't know whether we're trying to connect to + an http or an https URL except by the port number, this seems like a + good idea.*/ + ret|=op_sb_append_port(&_stream->proxy_connect,_stream->url.port); + ret|=op_sb_append(&_stream->proxy_connect,"\r\n",2); + ret|=op_sb_append(&_stream->proxy_connect,"User-Agent: .\r\n",15); + if(_proxy_user!=NULL&&_proxy_pass!=NULL){ + ret|=op_sb_append_basic_auth_header(&_stream->proxy_connect, + "Proxy-Authorization",_proxy_user,_proxy_pass); + } + /*For backwards compatibility.*/ + ret|=op_sb_append(&_stream->proxy_connect, + "Proxy-Connection: keep-alive\r\n",30); + ret|=op_sb_append(&_stream->proxy_connect,"\r\n",2); + if(OP_UNLIKELY(ret<0))return ret; + } + } + /*Actually make the connection.*/ + ret=op_http_connect(_stream,_stream->conns+0,addrs,&start_time); + if(OP_UNLIKELY(ret<0))return ret; + /*Build the request to send.*/ + _stream->request.nbuf=0; + ret=op_sb_append(&_stream->request,"GET ",4); + ret|=op_sb_append_string(&_stream->request, + _proxy_host!=NULL?_url:_stream->url.path); + /*Send HTTP/1.0 by default for maximum compatibility (so we don't have to + re-try if HTTP/1.1 fails, though it shouldn't, even for a 1.0 server). + This means we aren't conditionally compliant with RFC 2145, because we + violate the requirement that "An HTTP client SHOULD send a request + version equal to the highest version for which the client is at least + conditionally compliant...". + According to RFC 2145, that means we can't claim any compliance with any + IETF HTTP specification.*/ + ret|=op_sb_append(&_stream->request," HTTP/1.0\r\n",11); + /*Remember where this is so we can upgrade to HTTP/1.1 if the server + supports it.*/ + minor_version_pos=_stream->request.nbuf-3; + ret|=op_sb_append(&_stream->request,"Host: ",6); + ret|=op_sb_append_string(&_stream->request,_stream->url.host); + if(!OP_URL_IS_DEFAULT_PORT(&_stream->url)){ + ret|=op_sb_append_port(&_stream->request,_stream->url.port); + } + ret|=op_sb_append(&_stream->request,"\r\n",2); + /*User-Agents have been a bad idea, so send as little as possible. + RFC 2616 requires at least one token in the User-Agent, which must have + at least one character.*/ + ret|=op_sb_append(&_stream->request,"User-Agent: .\r\n",15); + if(_proxy_host!=NULL&&!OP_URL_IS_SSL(&_stream->url) + &&_proxy_user!=NULL&&_proxy_pass!=NULL){ + ret|=op_sb_append_basic_auth_header(&_stream->request, + "Proxy-Authorization",_proxy_user,_proxy_pass); + } + if(_stream->url.user!=NULL&&_stream->url.pass!=NULL){ + ret|=op_sb_append_basic_auth_header(&_stream->request, + "Authorization",_stream->url.user,_stream->url.pass); + } + /*Always send a Referer [sic] header. + It's common to refuse to serve a resource unless one is present. + We just use the relative "/" URI to suggest we came from the same domain, + as this is the most common check. + This might violate RFC 2616's mandate that the field "MUST NOT be sent if + the Request-URI was obtained from a source that does not have its own + URI, such as input from the user keyboard," but we don't really have any + way to know.*/ + /*TODO: Should we update this on redirects?*/ + ret|=op_sb_append(&_stream->request,"Referer: /\r\n",12); + /*Always send a Range request header to find out if we're seekable. + This requires an HTTP/1.1 server to succeed, but we'll still get what we + want with an HTTP/1.0 server that ignores this request header.*/ + ret|=op_sb_append(&_stream->request,"Range: bytes=0-\r\n",17); + /*Remember where this is so we can append offsets to it later.*/ + _stream->request_tail=_stream->request.nbuf-4; + ret|=op_sb_append(&_stream->request,"\r\n",2); + if(OP_UNLIKELY(ret<0))return ret; + ret=op_http_conn_write_fully(_stream->conns+0, + _stream->request.buf,_stream->request.nbuf); + if(OP_UNLIKELY(ret<0))return ret; + ret=op_http_conn_read_response(_stream->conns+0,&_stream->response); + if(OP_UNLIKELY(ret<0))return ret; + ftime(&end_time); + next=op_http_parse_status_line(&v1_1_compat,&status_code, + _stream->response.buf); + if(OP_UNLIKELY(next==NULL))return OP_FALSE; + if(status_code[0]=='2'){ + opus_int64 content_length; + opus_int64 range_length; + int pipeline_supported; + int pipeline_disabled; + /*We only understand 20x codes.*/ + if(status_code[1]!='0')return OP_FALSE; + content_length=-1; + range_length=-1; + /*Pipelining must be explicitly enabled.*/ + pipeline_supported=0; + pipeline_disabled=0; + for(;;){ + char *header; + char *cdr; + ret=op_http_get_next_header(&header,&cdr,&next); + if(OP_UNLIKELY(ret<0))return ret; + if(header==NULL)break; + if(strcmp(header,"content-length")==0){ + /*Two Content-Length headers?*/ + if(OP_UNLIKELY(content_length>=0))return OP_FALSE; + content_length=op_http_parse_content_length(cdr); + if(OP_UNLIKELY(content_length<0))return (int)content_length; + /*Make sure the Content-Length and Content-Range headers match.*/ + if(range_length>=0&&OP_UNLIKELY(content_length!=range_length)){ + return OP_FALSE; + } + } + else if(strcmp(header,"content-range")==0){ + opus_int64 range_first; + opus_int64 range_last; + /*Two Content-Range headers?*/ + if(OP_UNLIKELY(range_length>=0))return OP_FALSE; + ret=op_http_parse_content_range(&range_first,&range_last, + &range_length,cdr); + if(OP_UNLIKELY(ret<0))return ret; + /*"A response with satus code 206 (Partial Content) MUST NOT + include a Content-Range field with a byte-range-resp-spec of + '*'."*/ + if(status_code[2]=='6' + &&(OP_UNLIKELY(range_first<0)||OP_UNLIKELY(range_last<0))){ + return OP_FALSE; + } + /*We asked for the entire resource.*/ + if(range_length>=0){ + /*Quit if we didn't get it.*/ + if(range_last>=0&&OP_UNLIKELY(range_last!=range_length-1)){ + return OP_FALSE; + } + } + /*If there was no length, use the end of the range.*/ + else if(range_last>=0)range_length=range_last+1; + /*Make sure the Content-Length and Content-Range headers match.*/ + if(content_length>=0&&OP_UNLIKELY(content_length!=range_length)){ + return OP_FALSE; + } + } + else if(strcmp(header,"connection")==0){ + /*According to RFC 2616, if an HTTP/1.1 application does not support + pipelining, it "MUST include the 'close' connection option in + every message." + Therefore, if we receive one in the initial response, disable + pipelining entirely. + The server still might support it (e.g., we might just have hit the + request limit for a temporary child process), but if it doesn't + and we assume it does, every time we cross a chunk boundary we'll + error out and reconnect, adding lots of latency.*/ + ret=op_http_parse_connection(cdr); + if(OP_UNLIKELY(ret<0))return ret; + pipeline_disabled|=ret; + } + else if(strcmp(header,"server")==0){ + /*If we got a Server response header, and it wasn't from a known-bad + server, enable pipelining, as long as it's at least HTTP/1.1. + According to RFC 2145, the server is supposed to respond with the + highest minor version number it supports unless it is known or + suspected that we incorrectly implement the HTTP specification. + So it should send back at least HTTP/1.1, despite our HTTP/1.0 + request.*/ + pipeline_supported=v1_1_compat; + if(v1_1_compat)pipeline_disabled|=!op_http_allow_pipelining(cdr); + if(_info!=NULL&&_info->server==NULL)_info->server=op_string_dup(cdr); + } + /*Collect station information headers if the caller requested it. + If there's more than one copy of a header, the first one wins.*/ + else if(_info!=NULL){ + if(strcmp(header,"content-type")==0){ + if(_info->content_type==NULL){ + _info->content_type=op_string_dup(cdr); + } + } + else if(header[0]=='i'&&header[1]=='c' + &&(header[2]=='e'||header[2]=='y')&&header[3]=='-'){ + if(strcmp(header+4,"name")==0){ + if(_info->name==NULL)_info->name=op_string_dup(cdr); + } + else if(strcmp(header+4,"description")==0){ + if(_info->description==NULL)_info->description=op_string_dup(cdr); + } + else if(strcmp(header+4,"genre")==0){ + if(_info->genre==NULL)_info->genre=op_string_dup(cdr); + } + else if(strcmp(header+4,"url")==0){ + if(_info->url==NULL)_info->url=op_string_dup(cdr); + } + else if(strcmp(header,"icy-br")==0 + ||strcmp(header,"ice-bitrate")==0){ + if(_info->bitrate_kbps<0){ + opus_int64 bitrate_kbps; + /*Just re-using this function to parse a random unsigned + integer field.*/ + bitrate_kbps=op_http_parse_content_length(cdr); + if(bitrate_kbps>=0&&bitrate_kbps<=OP_INT32_MAX){ + _info->bitrate_kbps=(opus_int32)bitrate_kbps; + } + } + } + else if(strcmp(header,"icy-pub")==0 + ||strcmp(header,"ice-public")==0){ + if(_info->is_public<0&&(cdr[0]=='0'||cdr[0]=='1')&&cdr[1]=='\0'){ + _info->is_public=cdr[0]-'0'; + } + } + } + } + } + switch(status_code[2]){ + /*200 OK*/ + case '0':break; + /*203 Non-Authoritative Information*/ + case '3':break; + /*204 No Content*/ + case '4':{ + if(content_length>=0&&OP_UNLIKELY(content_length!=0)){ + return OP_FALSE; + } + }break; + /*206 Partial Content*/ + case '6':{ + /*No Content-Range header.*/ + if(OP_UNLIKELY(range_length<0))return OP_FALSE; + content_length=range_length; + /*The server supports range requests for this resource. + We can seek.*/ + _stream->seekable=1; + }break; + /*201 Created: the response "SHOULD include an entity containing a list + of resource characteristics and location(s)," but not an Opus file. + 202 Accepted: the response "SHOULD include an indication of request's + current status and either a pointer to a status monitor or some + estimate of when the user can expect the request to be fulfilled," + but not an Opus file. + 205 Reset Content: this "MUST NOT include an entity," meaning no Opus + file. + 207...209 are not yet defined, so we don't know how to handle them.*/ + default:return OP_FALSE; + } + _stream->content_length=content_length; + _stream->pipeline=pipeline_supported&&!pipeline_disabled; + /*Pipelining requires HTTP/1.1 persistent connections.*/ + if(_stream->pipeline)_stream->request.buf[minor_version_pos]='1'; + _stream->conns[0].pos=0; + _stream->conns[0].end_pos=_stream->seekable?content_length:-1; + _stream->conns[0].chunk_size=-1; + _stream->cur_conni=0; + _stream->connect_rate=op_time_diff_ms(&end_time,&start_time); + _stream->connect_rate=OP_MAX(_stream->connect_rate,1); + if(_info!=NULL)_info->is_ssl=OP_URL_IS_SSL(&_stream->url); + /*The URL has been successfully opened.*/ + return 0; + } + /*Shouldn't get 1xx; 4xx and 5xx are both failures (and we don't retry). + Everything else is undefined.*/ + else if(status_code[0]!='3')return OP_FALSE; + /*We have some form of redirect request.*/ + /*We only understand 30x codes.*/ + if(status_code[1]!='0')return OP_FALSE; + switch(status_code[2]){ + /*300 Multiple Choices: "If the server has a preferred choice of + representation, it SHOULD include the specific URI for that + representation in the Location field," otherwise we'll fail.*/ + case '0': + /*301 Moved Permanently*/ + case '1': + /*302 Found*/ + case '2': + /*307 Temporary Redirect*/ + case '7': + /*308 Permanent Redirect (defined by draft-reschke-http-status-308-07).*/ + case '8':break; + /*305 Use Proxy: "The Location field gives the URI of the proxy." + TODO: This shouldn't actually be that hard to do.*/ + case '5':return OP_EIMPL; + /*303 See Other: "The new URI is not a substitute reference for the + originally requested resource." + 304 Not Modified: "The 304 response MUST NOT contain a message-body." + 306 (Unused) + 309 is not yet defined, so we don't know how to handle it.*/ + default:return OP_FALSE; + } + _url=NULL; + for(;;){ + char *header; + char *cdr; + ret=op_http_get_next_header(&header,&cdr,&next); + if(OP_UNLIKELY(ret<0))return ret; + if(header==NULL)break; + if(strcmp(header,"location")==0&&OP_LIKELY(_url==NULL))_url=cdr; + } + if(OP_UNLIKELY(_url==NULL))return OP_FALSE; + ret=op_parse_url(&next_url,_url); + if(OP_UNLIKELY(ret<0))return ret; + if(_proxy_host==NULL||_stream->ssl_session!=NULL){ + if(strcmp(_stream->url.host,next_url.host)==0 + &&_stream->url.port==next_url.port){ + /*Try to skip re-resolve when connecting to the same host.*/ + addrs=&_stream->addr_info; + } + else{ + if(_stream->ssl_session!=NULL){ + /*Forget any cached SSL session from the last host.*/ + SSL_SESSION_free(_stream->ssl_session); + _stream->ssl_session=NULL; + } + } + } + if(_proxy_host==NULL){ + OP_ASSERT(_stream->connect_host==_stream->url.host); + _stream->connect_host=next_url.host; + _stream->connect_port=next_url.port; + } + /*Always try to skip re-resolve for proxy connections.*/ + else addrs=&_stream->addr_info; + op_parsed_url_clear(&_stream->url); + *&_stream->url=*&next_url; + /*TODO: On servers/proxies that support pipelining, we might be able to + re-use this connection.*/ + op_http_conn_close(_stream,_stream->conns+0,&_stream->lru_head,1); + } + /*Redirection limit reached.*/ + return OP_FALSE; +} + +static int op_http_conn_send_request(OpusHTTPStream *_stream, + OpusHTTPConn *_conn,opus_int64 _pos,opus_int32 _chunk_size, + int _try_not_to_block){ + opus_int64 next_end; + int ret; + /*We shouldn't have another request outstanding.*/ + OP_ASSERT(_conn->next_pos<0); + /*Build the request to send.*/ + OP_ASSERT(_stream->request.nbuf>=_stream->request_tail); + _stream->request.nbuf=_stream->request_tail; + ret=op_sb_append_nonnegative_int64(&_stream->request,_pos); + ret|=op_sb_append(&_stream->request,"-",1); + if(_chunk_size>0&&OP_ADV_OFFSET(_pos,2*_chunk_size)<_stream->content_length){ + /*We shouldn't be pipelining requests with non-HTTP/1.1 servers.*/ + OP_ASSERT(_stream->pipeline); + next_end=_pos+_chunk_size; + ret|=op_sb_append_nonnegative_int64(&_stream->request,next_end-1); + /*Use a larger chunk size for our next request.*/ + _chunk_size<<=1; + /*But after a while, just request the rest of the resource.*/ + if(_chunk_size>OP_PIPELINE_CHUNK_SIZE_MAX)_chunk_size=-1; + } + else{ + /*Either this was a non-pipelined request or we were close enough to the + end to just ask for the rest.*/ + next_end=-1; + _chunk_size=-1; + } + ret|=op_sb_append(&_stream->request,"\r\n\r\n",4); + if(OP_UNLIKELY(ret<0))return ret; + /*If we don't want to block, check to see if there's enough space in the send + queue. + There's still a chance we might block, even if there is enough space, but + it's a much slimmer one. + Blocking at all is pretty unlikely, as we won't have any requests queued + when _try_not_to_block is set, so if FIONSPACE isn't available (e.g., on + Linux), just skip the test.*/ + if(_try_not_to_block){ +# if defined(FIONSPACE) + int available; + ret=ioctl(_conn->fd,FIONSPACE,&available); + if(ret<0||available<_stream->request.nbuf)return 1; +# endif + } + ret=op_http_conn_write_fully(_conn, + _stream->request.buf,_stream->request.nbuf); + if(OP_UNLIKELY(ret<0))return ret; + _conn->next_pos=_pos; + _conn->next_end=next_end; + /*Save the chunk size to use for the next request.*/ + _conn->chunk_size=_chunk_size; + _conn->nrequests_left--; + return ret; +} + +/*Handles the response to all requests after the first one. + Return: 1 if the connection was closed or timed out, 0 on success, or a + negative value on any other error.*/ +static int op_http_conn_handle_response(OpusHTTPStream *_stream, + OpusHTTPConn *_conn){ + char *next; + char *status_code; + opus_int64 range_length; + opus_int64 next_pos; + opus_int64 next_end; + int ret; + ret=op_http_conn_read_response(_conn,&_stream->response); + /*If the server just closed the connection on us, we may have just hit a + connection re-use limit, so we might want to retry.*/ + if(OP_UNLIKELY(ret<0))return ret==OP_EREAD?1:ret; + next=op_http_parse_status_line(NULL,&status_code,_stream->response.buf); + if(OP_UNLIKELY(next==NULL))return OP_FALSE; + /*We _need_ a 206 Partial Content response. + Nothing else will do.*/ + if(strncmp(status_code,"206",3)!=0){ + /*But on a 408 Request Timeout, we might want to re-try.*/ + return strncmp(status_code,"408",3)==0?1:OP_FALSE; + } + next_pos=_conn->next_pos; + next_end=_conn->next_end; + range_length=-1; + for(;;){ + char *header; + char *cdr; + ret=op_http_get_next_header(&header,&cdr,&next); + if(OP_UNLIKELY(ret<0))return ret; + if(header==NULL)break; + if(strcmp(header,"content-range")==0){ + opus_int64 range_first; + opus_int64 range_last; + /*Two Content-Range headers?*/ + if(OP_UNLIKELY(range_length>=0))return OP_FALSE; + ret=op_http_parse_content_range(&range_first,&range_last, + &range_length,cdr); + if(OP_UNLIKELY(ret<0))return ret; + /*"A response with satus code 206 (Partial Content) MUST NOT + include a Content-Range field with a byte-range-resp-spec of + '*'."*/ + if(OP_UNLIKELY(range_first<0)||OP_UNLIKELY(range_last<0))return OP_FALSE; + /*We also don't want range_last to overflow.*/ + if(OP_UNLIKELY(range_last>=OP_INT64_MAX))return OP_FALSE; + range_last++; + /*Quit if we didn't get the offset we asked for.*/ + if(range_first!=next_pos)return OP_FALSE; + if(next_end<0){ + /*We asked for the rest of the resource.*/ + if(range_length>=0){ + /*Quit if we didn't get it.*/ + if(OP_UNLIKELY(range_last!=range_length))return OP_FALSE; + } + /*If there was no length, use the end of the range.*/ + else range_length=range_last; + next_end=range_last; + } + else{ + if(range_last!=next_end)return OP_FALSE; + /*If there was no length, use the larger of the content length or the + end of this chunk.*/ + if(range_length<0){ + range_length=OP_MAX(range_last,_stream->content_length); + } + } + } + else if(strcmp(header,"content-length")==0){ + opus_int64 content_length; + /*Validate the Content-Length header, if present, against the request we + made.*/ + content_length=op_http_parse_content_length(cdr); + if(OP_UNLIKELY(content_length<0))return (int)content_length; + if(next_end<0){ + /*If we haven't seen the Content-Range header yet and we asked for the + rest of the resource, set next_end, so we can make sure they match + when we do find the Content-Range header.*/ + if(OP_UNLIKELY(next_pos>OP_INT64_MAX-content_length))return OP_FALSE; + next_end=next_pos+content_length; + } + /*Otherwise, make sure they match now.*/ + else if(OP_UNLIKELY(next_end-next_pos!=content_length))return OP_FALSE; + } + else if(strcmp(header,"connection")==0){ + ret=op_http_parse_connection(cdr); + if(OP_UNLIKELY(ret<0))return ret; + /*If the server told us it was going to close the connection, don't make + any more requests.*/ + if(OP_UNLIKELY(ret>0))_conn->nrequests_left=0; + } + } + /*No Content-Range header.*/ + if(OP_UNLIKELY(range_length<0))return OP_FALSE; + /*Update the content_length if necessary.*/ + _stream->content_length=range_length; + _conn->pos=next_pos; + _conn->end_pos=next_end; + _conn->next_pos=-1; + return 0; +} + +/*Open a new connection that will start reading at byte offset _pos. + _pos: The byte offset to start reading from. + _chunk_size: The number of bytes to ask for in the initial request, or -1 to + request the rest of the resource. + This may be more bytes than remain, in which case it will be + converted into a request for the rest.*/ +static int op_http_conn_open_pos(OpusHTTPStream *_stream, + OpusHTTPConn *_conn,opus_int64 _pos,opus_int32 _chunk_size){ + struct timeb start_time; + struct timeb end_time; + opus_int32 connect_rate; + opus_int32 connect_time; + int ret; + ret=op_http_connect(_stream,_conn,&_stream->addr_info,&start_time); + if(OP_UNLIKELY(ret<0))return ret; + ret=op_http_conn_send_request(_stream,_conn,_pos,_chunk_size,0); + if(OP_UNLIKELY(ret<0))return ret; + ret=op_http_conn_handle_response(_stream,_conn); + if(OP_UNLIKELY(ret!=0))return OP_FALSE; + ftime(&end_time); + _stream->cur_conni=_conn-_stream->conns; + OP_ASSERT(_stream->cur_conni>=0&&_stream->cur_conni<OP_NCONNS_MAX); + /*The connection has been successfully opened. + Update the connection time estimate.*/ + connect_time=op_time_diff_ms(&end_time,&start_time); + connect_rate=_stream->connect_rate; + connect_rate+=OP_MAX(connect_time,1)-connect_rate+8>>4; + _stream->connect_rate=connect_rate; + return 0; +} + +/*Read data from the current response body. + If we're pipelining and we get close to the end of this response, queue + another request. + If we've reached the end of this response body, parse the next response and + keep going. + [out] _buf: Returns the data read. + _buf_size: The size of the buffer. + Return: A positive number of bytes read on success. + 0: The connection was closed. + OP_EREAD: There was a fatal read error.*/ +static int op_http_conn_read_body(OpusHTTPStream *_stream, + OpusHTTPConn *_conn,unsigned char *_buf,int _buf_size){ + opus_int64 pos; + opus_int64 end_pos; + opus_int64 next_pos; + opus_int64 content_length; + int nread; + int pipeline; + int ret; + /*Currently this function can only be called on the LRU head. + Otherwise, we'd need a _pnext pointer if we needed to close the connection, + and re-opening it would re-organize the lists.*/ + OP_ASSERT(_stream->lru_head==_conn); + /*We should have filterd out empty reads by this point.*/ + OP_ASSERT(_buf_size>0); + pos=_conn->pos; + end_pos=_conn->end_pos; + next_pos=_conn->next_pos; + pipeline=_stream->pipeline; + content_length=_stream->content_length; + if(end_pos>=0){ + /*Have we reached the end of the current response body?*/ + if(pos>=end_pos){ + OP_ASSERT(content_length>=0); + /*If this was the end of the stream, we're done. + Also return early if a non-blocking read was requested (regardless of + whether we might be able to parse the next response without + blocking).*/ + if(content_length<=end_pos)return 0; + /*Otherwise, start on the next response.*/ + if(next_pos<0){ + /*We haven't issued another request yet.*/ + if(!pipeline||_conn->nrequests_left<=0){ + /*There are two ways to get here: either the server told us it was + going to close the connection after the last request, or we + thought we were reading the whole resource, but it grew while we + were reading it. + The only way the latter could have happened is if content_length + changed while seeking. + Open a new request to read the rest.*/ + OP_ASSERT(_stream->seekable); + /*Try to open a new connection to read another chunk.*/ + op_http_conn_close(_stream,_conn,&_stream->lru_head,1); + /*If we're not pipelining, we should be requesting the rest.*/ + OP_ASSERT(pipeline||_conn->chunk_size==-1); + ret=op_http_conn_open_pos(_stream,_conn,end_pos,_conn->chunk_size); + if(OP_UNLIKELY(ret<0))return OP_EREAD; + } + else{ + /*Issue the request now (better late than never).*/ + ret=op_http_conn_send_request(_stream,_conn,pos,_conn->chunk_size,0); + if(OP_UNLIKELY(ret<0))return OP_EREAD; + next_pos=_conn->next_pos; + OP_ASSERT(next_pos>=0); + } + } + if(next_pos>=0){ + /*We shouldn't be trying to read past the current request body if we're + seeking somewhere else.*/ + OP_ASSERT(next_pos==end_pos); + ret=op_http_conn_handle_response(_stream,_conn); + if(OP_UNLIKELY(ret<0))return OP_EREAD; + if(OP_UNLIKELY(ret>0)&&pipeline){ + opus_int64 next_end; + next_end=_conn->next_end; + /*Our request timed out or the server closed the connection. + Try re-connecting.*/ + op_http_conn_close(_stream,_conn,&_stream->lru_head,1); + /*Unless there's a bug, we should be able to convert + (next_pos,next_end) into valid (_pos,_chunk_size) parameters.*/ + OP_ASSERT(next_end<0 + ||next_end-next_pos>=0&&next_end-next_pos<=OP_INT32_MAX); + ret=op_http_conn_open_pos(_stream,_conn,next_pos, + next_end<0?-1:(opus_int32)(next_end-next_pos)); + if(OP_UNLIKELY(ret<0))return OP_EREAD; + } + else if(OP_UNLIKELY(ret!=0))return OP_EREAD; + } + pos=_conn->pos; + end_pos=_conn->end_pos; + content_length=_stream->content_length; + } + OP_ASSERT(end_pos>pos); + _buf_size=OP_MIN(_buf_size,end_pos-pos); + } + nread=op_http_conn_read(_conn,(char *)_buf,_buf_size,1); + if(OP_UNLIKELY(nread<0))return nread; + pos+=nread; + _conn->pos=pos; + OP_ASSERT(end_pos<0||content_length>=0); + /*TODO: If nrequests_left<=0, we can't make a new request, and there will be + a big pause after we hit the end of the chunk while we open a new + connection. + It would be nice to be able to start that process now, but we have no way + to do it in the background without blocking (even if we could start it, we + have no guarantee the application will return control to us in a + sufficiently timely manner to allow us to complete it, and this is + uncommon enough that it's not worth using threads just for this).*/ + if(end_pos>=0&&end_pos<content_length&&next_pos<0 + &&pipeline&&OP_LIKELY(_conn->nrequests_left>0)){ + opus_int64 request_thresh; + opus_int32 chunk_size; + /*Are we getting close to the end of the current response body? + If so, we should request more data.*/ + request_thresh=_stream->connect_rate*_conn->read_rate>>12; + /*But don't commit ourselves too quickly.*/ + chunk_size=_conn->chunk_size; + if(chunk_size>=0)request_thresh=OP_MIN(chunk_size>>2,request_thresh); + if(end_pos-pos<request_thresh){ + ret=op_http_conn_send_request(_stream,_conn,end_pos,_conn->chunk_size,1); + if(OP_UNLIKELY(ret<0))return OP_EREAD; + } + } + return nread; +} + +static int op_http_stream_read(void *_stream, + unsigned char *_ptr,int _buf_size){ + OpusHTTPStream *stream; + ptrdiff_t nread; + opus_int64 size; + opus_int64 pos; + int ci; + stream=(OpusHTTPStream *)_stream; + /*Check for an empty read.*/ + if(_buf_size<=0)return 0; + ci=stream->cur_conni; + /*No current connection => EOF.*/ + if(ci<0)return 0; + pos=stream->conns[ci].pos; + size=stream->content_length; + /*Check for EOF.*/ + if(size>=0){ + if(pos>=size)return 0; + /*Check for a short read.*/ + if(_buf_size>size-pos)_buf_size=(int)(size-pos); + } + nread=op_http_conn_read_body(stream,stream->conns+ci,_ptr,_buf_size); + if(OP_UNLIKELY(nread<=0)){ + /*We hit an error or EOF. + Either way, we're done with this connection.*/ + op_http_conn_close(stream,stream->conns+ci,&stream->lru_head,1); + stream->cur_conni=-1; + stream->pos=pos; + } + return nread; +} + +/*Discard data until we reach the _target position. + This destroys the contents of _stream->response.buf, as we need somewhere to + read this data, and that is a convenient place. + _just_read_ahead: Whether or not this is a plain fast-forward. + If 0, we need to issue a new request for a chunk at _target + and discard all the data from our current request(s). + Otherwise, we should be able to reach _target without + issuing any new requests. + _target: The stream position to which to read ahead.*/ +static int op_http_conn_read_ahead(OpusHTTPStream *_stream, + OpusHTTPConn *_conn,int _just_read_ahead,opus_int64 _target){ + opus_int64 pos; + opus_int64 end_pos; + opus_int64 next_pos; + opus_int64 next_end; + ptrdiff_t nread; + int ret; + pos=_conn->pos; + end_pos=_conn->end_pos; + next_pos=_conn->next_pos; + next_end=_conn->next_end; + if(!_just_read_ahead){ + /*We need to issue a new pipelined request. + This is the only case where we allow more than one outstanding request + at a time, so we need to reset next_pos (we'll restore it below if we + did have an outstanding request).*/ + OP_ASSERT(_stream->pipeline); + _conn->next_pos=-1; + ret=op_http_conn_send_request(_stream,_conn,_target, + OP_PIPELINE_CHUNK_SIZE,0); + if(OP_UNLIKELY(ret<0))return ret; + } + /*We can reach the target position by reading forward in the current chunk.*/ + if(_just_read_ahead&&(end_pos<0||_target<end_pos))end_pos=_target; + else if(next_pos>=0){ + opus_int64 next_next_pos; + opus_int64 next_next_end; + /*We already have a request outstanding. + Finish off the current chunk.*/ + while(pos<end_pos){ + nread=op_http_conn_read(_conn,_stream->response.buf, + (int)OP_MIN(end_pos-pos,_stream->response.cbuf),1); + /*We failed to read ahead.*/ + if(nread<=0)return OP_FALSE; + pos+=nread; + } + OP_ASSERT(pos==end_pos); + if(_just_read_ahead){ + next_next_pos=next_next_end=-1; + end_pos=_target; + } + else{ + OP_ASSERT(_conn->next_pos==_target); + next_next_pos=_target; + next_next_end=_conn->next_end; + _conn->next_pos=next_pos; + _conn->next_end=next_end; + end_pos=next_end; + } + ret=op_http_conn_handle_response(_stream,_conn); + if(OP_UNLIKELY(ret!=0))return OP_FALSE; + _conn->next_pos=next_next_pos; + _conn->next_end=next_next_end; + } + while(pos<end_pos){ + nread=op_http_conn_read(_conn,_stream->response.buf, + (int)OP_MIN(end_pos-pos,_stream->response.cbuf),1); + /*We failed to read ahead.*/ + if(nread<=0)return OP_FALSE; + pos+=nread; + } + OP_ASSERT(pos==end_pos); + if(!_just_read_ahead){ + ret=op_http_conn_handle_response(_stream,_conn); + if(OP_UNLIKELY(ret!=0))return OP_FALSE; + } + else _conn->pos=end_pos; + OP_ASSERT(_conn->pos==_target); + return 0; +} + +static int op_http_stream_seek(void *_stream,opus_int64 _offset,int _whence){ + struct timeb seek_time; + OpusHTTPStream *stream; + OpusHTTPConn *conn; + OpusHTTPConn **pnext; + OpusHTTPConn *close_conn; + OpusHTTPConn **close_pnext; + opus_int64 content_length; + opus_int64 pos; + int pipeline; + int ci; + int ret; + stream=(OpusHTTPStream *)_stream; + if(!stream->seekable)return -1; + content_length=stream->content_length; + /*If we're seekable, we should have gotten a Content-Length.*/ + OP_ASSERT(content_length>=0); + ci=stream->cur_conni; + pos=ci<0?content_length:stream->conns[ci].pos; + switch(_whence){ + case SEEK_SET:{ + /*Check for overflow:*/ + if(_offset<0)return -1; + pos=_offset; + }break; + case SEEK_CUR:{ + /*Check for overflow:*/ + if(_offset<-pos||_offset>OP_INT64_MAX-pos)return -1; + pos+=_offset; + }break; + case SEEK_END:{ + /*Check for overflow:*/ + if(_offset>content_length||_offset<content_length-OP_INT64_MAX)return -1; + pos=content_length-_offset; + }break; + default:return -1; + } + /*Mark when we deactivated the active connection.*/ + if(ci>=0){ + op_http_conn_read_rate_update(stream->conns+ci); + *&seek_time=*&stream->conns[ci].read_time; + } + else ftime(&seek_time); + /*If we seeked past the end of the stream, just disable the active + connection.*/ + if(pos>=content_length){ + stream->cur_conni=-1; + stream->pos=pos; + return 0; + } + /*First try to find a connection we can use without waiting.*/ + pnext=&stream->lru_head; + conn=stream->lru_head; + while(conn!=NULL){ + opus_int64 conn_pos; + opus_int64 end_pos; + int available; + /*If this connection has been dormant too long or has made too many + requests, close it. + This is to prevent us from hitting server limits/firewall timeouts.*/ + if(op_time_diff_ms(&seek_time,&conn->read_time)> + OP_CONNECTION_IDLE_TIMEOUT_MS + ||conn->nrequests_left<OP_PIPELINE_MIN_REQUESTS){ + op_http_conn_close(stream,conn,pnext,1); + conn=*pnext; + continue; + } + available=op_http_conn_estimate_available(conn); + conn_pos=conn->pos; + end_pos=conn->end_pos; + if(conn->next_pos>=0){ + OP_ASSERT(end_pos>=0); + OP_ASSERT(conn->next_pos==end_pos); + end_pos=conn->next_end; + } + OP_ASSERT(end_pos<0||conn_pos<=end_pos); + /*Can we quickly read ahead without issuing a new request or waiting for + any more data? + If we have an oustanding request, we'll over-estimate the amount of data + it has available (because we'll count the response headers, too), but + that probably doesn't matter.*/ + if(conn_pos<=pos&&pos-conn_pos<=available&&(end_pos<0||pos<end_pos)){ + /*Found a suitable connection to re-use.*/ + ret=op_http_conn_read_ahead(stream,conn,1,pos); + if(OP_UNLIKELY(ret<0)){ + /*The connection might have become stale, so close it and keep going.*/ + op_http_conn_close(stream,conn,pnext,1); + conn=*pnext; + continue; + } + /*Sucessfully resurrected this connection.*/ + *pnext=conn->next; + conn->next=stream->lru_head; + stream->lru_head=conn; + stream->cur_conni=conn-stream->conns; + return 0; + } + pnext=&conn->next; + conn=conn->next; + } + /*Chances are that didn't work, so now try to find one we can use by reading + ahead a reasonable amount and/or by issuing a new request.*/ + close_pnext=NULL; + close_conn=NULL; + pnext=&stream->lru_head; + conn=stream->lru_head; + pipeline=stream->pipeline; + while(conn!=NULL){ + opus_int64 conn_pos; + opus_int64 end_pos; + opus_int64 read_ahead_thresh; + int available; + int just_read_ahead; + /*Dividing by 2048 instead of 1000 scales this by nearly 1/2, biasing away + from connection re-use (and roughly compensating for the lag required to + reopen the TCP window of a connection that's been idle). + There's no overflow checking here, because it's vanishingly unlikely, and + all it would do is cause us to make poor decisions.*/ + read_ahead_thresh=OP_MAX(OP_READAHEAD_THRESH_MIN, + stream->connect_rate*conn->read_rate>>11); + available=op_http_conn_estimate_available(conn); + conn_pos=conn->pos; + end_pos=conn->end_pos; + if(conn->next_pos>=0){ + OP_ASSERT(end_pos>=0); + OP_ASSERT(conn->next_pos==end_pos); + end_pos=conn->next_end; + } + OP_ASSERT(end_pos<0||conn_pos<=end_pos); + /*Can we quickly read ahead without issuing a new request?*/ + just_read_ahead=conn_pos<=pos&&pos-conn_pos-available<=read_ahead_thresh + &&(end_pos<0||pos<end_pos); + if(just_read_ahead||pipeline&&end_pos>=0 + &&end_pos-conn_pos-available<=read_ahead_thresh){ + /*Found a suitable connection to re-use.*/ + ret=op_http_conn_read_ahead(stream,conn,just_read_ahead,pos); + if(OP_UNLIKELY(ret<0)){ + /*The connection might have become stale, so close it and keep going.*/ + op_http_conn_close(stream,conn,pnext,1); + conn=*pnext; + continue; + } + /*Sucessfully resurrected this connection.*/ + *pnext=conn->next; + conn->next=stream->lru_head; + stream->lru_head=conn; + stream->cur_conni=conn-stream->conns; + return 0; + } + close_pnext=pnext; + close_conn=conn; + pnext=&conn->next; + conn=conn->next; + } + /*No suitable connections. + Open a new one.*/ + if(stream->free_head==NULL){ + /*All connections in use. + Expire one of them (we should have already picked which one when scanning + the list).*/ + OP_ASSERT(close_conn!=NULL); + OP_ASSERT(close_pnext!=NULL); + op_http_conn_close(stream,close_conn,close_pnext,1); + } + OP_ASSERT(stream->free_head!=NULL); + conn=stream->free_head; + /*If we can pipeline, only request a chunk of data. + If we're seeking now, there's a good chance we will want to seek again + soon, and this avoids committing this connection to reading the rest of + the stream. + Particularly with SSL or proxies, issuing a new request on the same + connection can be substantially faster than opening a new one. + This also limits the amount of data the server will blast at us on this + connection if we later seek elsewhere and start reading from a different + connection.*/ + ret=op_http_conn_open_pos(stream,conn,pos, + pipeline?OP_PIPELINE_CHUNK_SIZE:-1); + if(OP_UNLIKELY(ret<0)){ + op_http_conn_close(stream,conn,&stream->lru_head,1); + return -1; + } + return 0; +} + +static opus_int64 op_http_stream_tell(void *_stream){ + OpusHTTPStream *stream; + int ci; + stream=(OpusHTTPStream *)_stream; + ci=stream->cur_conni; + return ci<0?stream->pos:stream->conns[ci].pos; +} + +static int op_http_stream_close(void *_stream){ + OpusHTTPStream *stream; + stream=(OpusHTTPStream *)_stream; + if(OP_LIKELY(stream!=NULL)){ + op_http_stream_clear(stream); + _ogg_free(stream); + } + return 0; +} + +static const OpusFileCallbacks OP_HTTP_CALLBACKS={ + op_http_stream_read, + op_http_stream_seek, + op_http_stream_tell, + op_http_stream_close +}; +#endif + +void opus_server_info_init(OpusServerInfo *_info){ + _info->name=NULL; + _info->description=NULL; + _info->genre=NULL; + _info->url=NULL; + _info->server=NULL; + _info->content_type=NULL; + _info->bitrate_kbps=-1; + _info->is_public=-1; + _info->is_ssl=0; +} + +void opus_server_info_clear(OpusServerInfo *_info){ + _ogg_free(_info->content_type); + _ogg_free(_info->server); + _ogg_free(_info->url); + _ogg_free(_info->genre); + _ogg_free(_info->description); + _ogg_free(_info->name); +} + +/*The actual URL stream creation function. + This one isn't extensible like the application-level interface, but because + it isn't public, we're free to change it in the future.*/ +static void *op_url_stream_create_impl(OpusFileCallbacks *_cb,const char *_url, + int _skip_certificate_check,const char *_proxy_host,unsigned _proxy_port, + const char *_proxy_user,const char *_proxy_pass,OpusServerInfo *_info){ + const char *path; + /*Check to see if this is a valid file: URL.*/ + path=op_parse_file_url(_url); + if(path!=NULL){ + char *unescaped_path; + void *ret; + unescaped_path=op_string_dup(path); + if(OP_UNLIKELY(unescaped_path==NULL))return NULL; + ret=op_fopen(_cb,op_unescape_url_component(unescaped_path),"rb"); + _ogg_free(unescaped_path); + return ret; + } +#if defined(OP_ENABLE_HTTP) + /*If not, try http/https.*/ + else{ + OpusHTTPStream *stream; + int ret; + stream=(OpusHTTPStream *)_ogg_malloc(sizeof(*stream)); + if(OP_UNLIKELY(stream==NULL))return NULL; + op_http_stream_init(stream); + ret=op_http_stream_open(stream,_url,_skip_certificate_check, + _proxy_host,_proxy_port,_proxy_user,_proxy_pass,_info); + if(OP_UNLIKELY(ret<0)){ + op_http_stream_clear(stream); + _ogg_free(stream); + return NULL; + } + *_cb=*&OP_HTTP_CALLBACKS; + return stream; + } +#else + (void)_skip_certificate_check; + (void)_proxy_host; + (void)_proxy_port; + (void)_proxy_user; + (void)_proxy_pass; + (void)_info; + return NULL; +#endif +} + +void *op_url_stream_vcreate(OpusFileCallbacks *_cb, + const char *_url,va_list _ap){ + int skip_certificate_check; + const char *proxy_host; + opus_int32 proxy_port; + const char *proxy_user; + const char *proxy_pass; + OpusServerInfo *pinfo; + skip_certificate_check=0; + proxy_host=NULL; + proxy_port=8080; + proxy_user=NULL; + proxy_pass=NULL; + pinfo=NULL; + for(;;){ + ptrdiff_t request; + request=va_arg(_ap,char *)-(char *)NULL; + /*If we hit NULL, we're done processing options.*/ + if(!request)break; + switch(request){ + case OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST:{ + skip_certificate_check=!!va_arg(_ap,opus_int32); + }break; + case OP_HTTP_PROXY_HOST_REQUEST:{ + proxy_host=va_arg(_ap,const char *); + }break; + case OP_HTTP_PROXY_PORT_REQUEST:{ + proxy_port=va_arg(_ap,opus_int32); + if(proxy_port<0||proxy_port>(opus_int32)65535)return NULL; + }break; + case OP_HTTP_PROXY_USER_REQUEST:{ + proxy_user=va_arg(_ap,const char *); + }break; + case OP_HTTP_PROXY_PASS_REQUEST:{ + proxy_pass=va_arg(_ap,const char *); + }break; + case OP_GET_SERVER_INFO_REQUEST:{ + pinfo=va_arg(_ap,OpusServerInfo *); + }break; + /*Some unknown option.*/ + default:return NULL; + } + } + /*If the caller has requested server information, proxy it to a local copy to + simplify error handling.*/ + if(pinfo!=NULL){ + OpusServerInfo info; + void *ret; + opus_server_info_init(&info); + ret=op_url_stream_create_impl(_cb,_url,skip_certificate_check, + proxy_host,proxy_port,proxy_user,proxy_pass,&info); + if(ret!=NULL)*pinfo=*&info; + else opus_server_info_clear(&info); + return ret; + } + return op_url_stream_create_impl(_cb,_url,skip_certificate_check, + proxy_host,proxy_port,proxy_user,proxy_pass,NULL); +} + +void *op_url_stream_create(OpusFileCallbacks *_cb, + const char *_url,...){ + va_list ap; + void *ret; + va_start(ap,_url); + ret=op_url_stream_vcreate(_cb,_url,ap); + va_end(ap); + return ret; +} + +/*Convenience routines to open/test URLs in a single step.*/ + +OggOpusFile *op_vopen_url(const char *_url,int *_error,va_list _ap){ + OpusFileCallbacks cb; + OggOpusFile *of; + void *source; + source=op_url_stream_vcreate(&cb,_url,_ap); + if(OP_UNLIKELY(source==NULL)){ + if(_error!=NULL)*_error=OP_EFAULT; + return NULL; + } + of=op_open_callbacks(source,&cb,NULL,0,_error); + if(OP_UNLIKELY(of==NULL))(*cb.close)(source); + return of; +} + +OggOpusFile *op_open_url(const char *_url,int *_error,...){ + OggOpusFile *ret; + va_list ap; + va_start(ap,_error); + ret=op_vopen_url(_url,_error,ap); + va_end(ap); + return ret; +} + +OggOpusFile *op_vtest_url(const char *_url,int *_error,va_list _ap){ + OpusFileCallbacks cb; + OggOpusFile *of; + void *source; + source=op_url_stream_vcreate(&cb,_url,_ap); + if(OP_UNLIKELY(source==NULL)){ + if(_error!=NULL)*_error=OP_EFAULT; + return NULL; + } + of=op_test_callbacks(source,&cb,NULL,0,_error); + if(OP_UNLIKELY(of==NULL))(*cb.close)(source); + return of; +} + +OggOpusFile *op_test_url(const char *_url,int *_error,...){ + OggOpusFile *ret; + va_list ap; + va_start(ap,_error); + ret=op_vtest_url(_url,_error,ap); + va_end(ap); + return ret; +} diff --git a/drivers/opus/info.c b/drivers/opus/info.c new file mode 100644 index 0000000000..f5ad2110be --- /dev/null +++ b/drivers/opus/info.c @@ -0,0 +1,687 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ********************************************************************/ +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "internal.h" +#include <limits.h> +#include <string.h> + +static unsigned op_parse_uint16le(const unsigned char *_data){ + return _data[0]|_data[1]<<8; +} + +static int op_parse_int16le(const unsigned char *_data){ + int ret; + ret=_data[0]|_data[1]<<8; + return (ret^0x8000)-0x8000; +} + +static opus_uint32 op_parse_uint32le(const unsigned char *_data){ + return _data[0]|(opus_uint32)_data[1]<<8| + (opus_uint32)_data[2]<<16|(opus_uint32)_data[3]<<24; +} + +static opus_uint32 op_parse_uint32be(const unsigned char *_data){ + return _data[3]|(opus_uint32)_data[2]<<8| + (opus_uint32)_data[1]<<16|(opus_uint32)_data[0]<<24; +} + +int opus_head_parse(OpusHead *_head,const unsigned char *_data,size_t _len){ + OpusHead head; + if(_len<8)return OP_ENOTFORMAT; + if(memcmp(_data,"OpusHead",8)!=0)return OP_ENOTFORMAT; + if(_len<9)return OP_EBADHEADER; + head.version=_data[8]; + if(head.version>15)return OP_EVERSION; + if(_len<19)return OP_EBADHEADER; + head.channel_count=_data[9]; + head.pre_skip=op_parse_uint16le(_data+10); + head.input_sample_rate=op_parse_uint32le(_data+12); + head.output_gain=op_parse_int16le(_data+16); + head.mapping_family=_data[18]; + if(head.mapping_family==0){ + if(head.channel_count<1||head.channel_count>2)return OP_EBADHEADER; + if(head.version<=1&&_len>19)return OP_EBADHEADER; + head.stream_count=1; + head.coupled_count=head.channel_count-1; + if(_head!=NULL){ + _head->mapping[0]=0; + _head->mapping[1]=1; + } + } + else if(head.mapping_family==1){ + size_t size; + int ci; + if(head.channel_count<1||head.channel_count>8)return OP_EBADHEADER; + size=21+head.channel_count; + if(_len<size||head.version<=1&&_len>size)return OP_EBADHEADER; + head.stream_count=_data[19]; + if(head.stream_count<1)return OP_EBADHEADER; + head.coupled_count=_data[20]; + if(head.coupled_count>head.stream_count)return OP_EBADHEADER; + for(ci=0;ci<head.channel_count;ci++){ + if(_data[21+ci]>=head.stream_count+head.coupled_count + &&_data[21+ci]!=255){ + return OP_EBADHEADER; + } + } + if(_head!=NULL)memcpy(_head->mapping,_data+21,head.channel_count); + } + /*General purpose players should not attempt to play back content with + channel mapping family 255.*/ + else if(head.mapping_family==255)return OP_EIMPL; + /*No other channel mapping families are currently defined.*/ + else return OP_EBADHEADER; + if(_head!=NULL)memcpy(_head,&head,head.mapping-(unsigned char *)&head); + return 0; +} + +void opus_tags_init(OpusTags *_tags){ + memset(_tags,0,sizeof(*_tags)); +} + +void opus_tags_clear(OpusTags *_tags){ + int ci; + for(ci=_tags->comments;ci-->0;)_ogg_free(_tags->user_comments[ci]); + _ogg_free(_tags->user_comments); + _ogg_free(_tags->comment_lengths); + _ogg_free(_tags->vendor); +} + +/*Ensure there's room for up to _ncomments comments.*/ +static int op_tags_ensure_capacity(OpusTags *_tags,size_t _ncomments){ + char **user_comments; + int *comment_lengths; + size_t size; + if(OP_UNLIKELY(_ncomments>=(size_t)INT_MAX))return OP_EFAULT; + size=sizeof(*_tags->comment_lengths)*(_ncomments+1); + if(size/sizeof(*_tags->comment_lengths)!=_ncomments+1)return OP_EFAULT; + comment_lengths=(int *)_ogg_realloc(_tags->comment_lengths,size); + if(OP_UNLIKELY(comment_lengths==NULL))return OP_EFAULT; + comment_lengths[_ncomments]=0; + _tags->comment_lengths=comment_lengths; + size=sizeof(*_tags->user_comments)*(_ncomments+1); + if(size/sizeof(*_tags->user_comments)!=_ncomments+1)return OP_EFAULT; + user_comments=(char **)_ogg_realloc(_tags->user_comments,size); + if(OP_UNLIKELY(user_comments==NULL))return OP_EFAULT; + user_comments[_ncomments]=NULL; + _tags->user_comments=user_comments; + return 0; +} + +/*Duplicate a (possibly non-NUL terminated) string with a known length.*/ +static char *op_strdup_with_len(const char *_s,size_t _len){ + size_t size; + char *ret; + size=sizeof(*ret)*(_len+1); + if(OP_UNLIKELY(size<_len))return NULL; + ret=(char *)_ogg_malloc(size); + if(OP_LIKELY(ret!=NULL)){ + ret=(char *)memcpy(ret,_s,sizeof(*ret)*_len); + ret[_len]='\0'; + } + return ret; +} + +/*The actual implementation of opus_tags_parse(). + Unlike the public API, this function requires _tags to already be + initialized, modifies its contents before success is guaranteed, and assumes + the caller will clear it on error.*/ +static int opus_tags_parse_impl(OpusTags *_tags, + const unsigned char *_data,size_t _len){ + opus_uint32 count; + size_t len; + int ncomments; + int ci; + len=_len; + if(len<8)return OP_ENOTFORMAT; + if(memcmp(_data,"OpusTags",8)!=0)return OP_ENOTFORMAT; + if(len<16)return OP_EBADHEADER; + _data+=8; + len-=8; + count=op_parse_uint32le(_data); + _data+=4; + len-=4; + if(count>len)return OP_EBADHEADER; + if(_tags!=NULL){ + _tags->vendor=op_strdup_with_len((char *)_data,count); + if(_tags->vendor==NULL)return OP_EFAULT; + } + _data+=count; + len-=count; + if(len<4)return OP_EBADHEADER; + count=op_parse_uint32le(_data); + _data+=4; + len-=4; + /*Check to make sure there's minimally sufficient data left in the packet.*/ + if(count>len>>2)return OP_EBADHEADER; + /*Check for overflow (the API limits this to an int).*/ + if(count>(opus_uint32)INT_MAX-1)return OP_EFAULT; + if(_tags!=NULL){ + int ret; + ret=op_tags_ensure_capacity(_tags,count); + if(ret<0)return ret; + } + ncomments=(int)count; + for(ci=0;ci<ncomments;ci++){ + /*Check to make sure there's minimally sufficient data left in the packet.*/ + if((size_t)(ncomments-ci)>len>>2)return OP_EBADHEADER; + count=op_parse_uint32le(_data); + _data+=4; + len-=4; + if(count>len)return OP_EBADHEADER; + /*Check for overflow (the API limits this to an int).*/ + if(count>(opus_uint32)INT_MAX)return OP_EFAULT; + if(_tags!=NULL){ + _tags->user_comments[ci]=op_strdup_with_len((char *)_data,count); + if(_tags->user_comments[ci]==NULL)return OP_EFAULT; + _tags->comment_lengths[ci]=(int)count; + _tags->comments=ci+1; + } + _data+=count; + len-=count; + } + return 0; +} + +int opus_tags_parse(OpusTags *_tags,const unsigned char *_data,size_t _len){ + if(_tags!=NULL){ + OpusTags tags; + int ret; + opus_tags_init(&tags); + ret=opus_tags_parse_impl(&tags,_data,_len); + if(ret<0)opus_tags_clear(&tags); + else *_tags=*&tags; + return ret; + } + else return opus_tags_parse_impl(NULL,_data,_len); +} + +/*The actual implementation of opus_tags_copy(). + Unlike the public API, this function requires _dst to already be + initialized, modifies its contents before success is guaranteed, and assumes + the caller will clear it on error.*/ +static int opus_tags_copy_impl(OpusTags *_dst,const OpusTags *_src){ + char *vendor; + int ncomments; + int ret; + int ci; + vendor=_src->vendor; + _dst->vendor=op_strdup_with_len(vendor,strlen(vendor)); + if(OP_UNLIKELY(_dst->vendor==NULL))return OP_EFAULT; + ncomments=_src->comments; + ret=op_tags_ensure_capacity(_dst,ncomments); + if(OP_UNLIKELY(ret<0))return ret; + for(ci=0;ci<ncomments;ci++){ + int len; + len=_src->comment_lengths[ci]; + OP_ASSERT(len>=0); + _dst->user_comments[ci]=op_strdup_with_len(_src->user_comments[ci],len); + if(OP_UNLIKELY(_dst->user_comments[ci]==NULL))return OP_EFAULT; + _dst->comment_lengths[ci]=len; + _dst->comments=ci+1; + } + return 0; +} + +int opus_tags_copy(OpusTags *_dst,const OpusTags *_src){ + OpusTags dst; + int ret; + opus_tags_init(&dst); + ret=opus_tags_copy_impl(&dst,_src); + if(OP_UNLIKELY(ret<0))opus_tags_clear(&dst); + else *_dst=*&dst; + return 0; +} + +int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value){ + char *comment; + int tag_len; + int value_len; + int ncomments; + int ret; + ncomments=_tags->comments; + ret=op_tags_ensure_capacity(_tags,ncomments+1); + if(OP_UNLIKELY(ret<0))return ret; + tag_len=strlen(_tag); + value_len=strlen(_value); + /*+2 for '=' and '\0'.*/ + _tags->comment_lengths[ncomments]=0; + _tags->user_comments[ncomments]=comment= + (char *)_ogg_malloc(sizeof(*comment)*(tag_len+value_len+2)); + if(OP_UNLIKELY(comment==NULL))return OP_EFAULT; + memcpy(comment,_tag,sizeof(*comment)*tag_len); + comment[tag_len]='='; + memcpy(comment+tag_len+1,_value,sizeof(*comment)*(value_len+1)); + _tags->comment_lengths[ncomments]=tag_len+value_len+1; + _tags->comments=ncomments+1; + return 0; +} + +int opus_tags_add_comment(OpusTags *_tags,const char *_comment){ + int comment_len; + int ncomments; + int ret; + ncomments=_tags->comments; + ret=op_tags_ensure_capacity(_tags,ncomments+1); + if(OP_UNLIKELY(ret<0))return ret; + comment_len=(int)strlen(_comment); + _tags->comment_lengths[ncomments]=0; + _tags->user_comments[ncomments]=op_strdup_with_len(_comment,comment_len); + if(OP_UNLIKELY(_tags->user_comments[ncomments]==NULL))return OP_EFAULT; + _tags->comment_lengths[ncomments]=comment_len; + _tags->comments=ncomments+1; + return 0; +} + +int opus_tagcompare(const char *_tag_name,const char *_comment){ + return opus_tagncompare(_tag_name,strlen(_tag_name),_comment); +} + +int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment){ + int ret; + OP_ASSERT(_tag_len>=0); + ret=op_strncasecmp(_tag_name,_comment,_tag_len); + return ret?ret:'='-_comment[_tag_len]; +} + +const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count){ + char **user_comments; + int tag_len; + int found; + int ncomments; + int ci; + tag_len=strlen(_tag); + ncomments=_tags->comments; + user_comments=_tags->user_comments; + found=0; + for(ci=0;ci<ncomments;ci++){ + if(!opus_tagncompare(_tag,tag_len,user_comments[ci])){ + /*We return a pointer to the data, not a copy.*/ + if(_count==found++)return user_comments[ci]+tag_len+1; + } + } + /*Didn't find anything.*/ + return NULL; +} + +int opus_tags_query_count(const OpusTags *_tags,const char *_tag){ + char **user_comments; + int tag_len; + int found; + int ncomments; + int ci; + tag_len=strlen(_tag); + ncomments=_tags->comments; + user_comments=_tags->user_comments; + found=0; + for(ci=0;ci<ncomments;ci++){ + if(!opus_tagncompare(_tag,tag_len,user_comments[ci]))found++; + } + return found; +} + +int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8){ + char **comments; + int ncomments; + int ci; + comments=_tags->user_comments; + ncomments=_tags->comments; + /*Look for the first valid R128_TRACK_GAIN tag and use that.*/ + for(ci=0;ci<ncomments;ci++){ + if(opus_tagncompare("R128_TRACK_GAIN",15,comments[ci])==0){ + char *p; + opus_int32 gain_q8; + int negative; + p=comments[ci]+16; + negative=0; + if(*p=='-'){ + negative=-1; + p++; + } + else if(*p=='+')p++; + gain_q8=0; + while(*p>='0'&&*p<='9'){ + gain_q8=10*gain_q8+*p-'0'; + if(gain_q8>32767-negative)break; + p++; + } + /*This didn't look like a signed 16-bit decimal integer. + Not a valid R128_TRACK_GAIN tag.*/ + if(*p!='\0')continue; + *_gain_q8=(int)(gain_q8+negative^negative); + return 0; + } + } + return OP_FALSE; +} + +static int op_is_jpeg(const unsigned char *_buf,size_t _buf_sz){ + return _buf_sz>=11&&memcmp(_buf,"\xFF\xD8\xFF\xE0",4)==0 + &&(_buf[4]<<8|_buf[5])>=16&&memcmp(_buf+6,"JFIF",5)==0; +} + +/*Tries to extract the width, height, bits per pixel, and palette size of a + JPEG. + On failure, simply leaves its outputs unmodified.*/ +static void op_extract_jpeg_params(const unsigned char *_buf,size_t _buf_sz, + opus_uint32 *_width,opus_uint32 *_height, + opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){ + if(op_is_jpeg(_buf,_buf_sz)){ + size_t offs; + offs=2; + for(;;){ + size_t segment_len; + int marker; + while(offs<_buf_sz&&_buf[offs]!=0xFF)offs++; + while(offs<_buf_sz&&_buf[offs]==0xFF)offs++; + marker=_buf[offs]; + offs++; + /*If we hit EOI* (end of image), or another SOI* (start of image), + or SOS (start of scan), then stop now.*/ + if(offs>=_buf_sz||(marker>=0xD8&&marker<=0xDA))break; + /*RST* (restart markers): skip (no segment length).*/ + else if(marker>=0xD0&&marker<=0xD7)continue; + /*Read the length of the marker segment.*/ + if(_buf_sz-offs<2)break; + segment_len=_buf[offs]<<8|_buf[offs+1]; + if(segment_len<2||_buf_sz-offs<segment_len)break; + if(marker==0xC0||(marker>0xC0&&marker<0xD0&&(marker&3)!=0)){ + /*Found a SOFn (start of frame) marker segment:*/ + if(segment_len>=8){ + *_height=_buf[offs+3]<<8|_buf[offs+4]; + *_width=_buf[offs+5]<<8|_buf[offs+6]; + *_depth=_buf[offs+2]*_buf[offs+7]; + *_colors=0; + *_has_palette=0; + } + break; + } + /*Other markers: skip the whole marker segment.*/ + offs+=segment_len; + } + } +} + +static int op_is_png(const unsigned char *_buf,size_t _buf_sz){ + return _buf_sz>=8&&memcmp(_buf,"\x89PNG\x0D\x0A\x1A\x0A",8)==0; +} + +/*Tries to extract the width, height, bits per pixel, and palette size of a + PNG. + On failure, simply leaves its outputs unmodified.*/ +static void op_extract_png_params(const unsigned char *_buf,size_t _buf_sz, + opus_uint32 *_width,opus_uint32 *_height, + opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){ + if(op_is_png(_buf,_buf_sz)){ + size_t offs; + offs=8; + while(_buf_sz-offs>=12){ + ogg_uint32_t chunk_len; + chunk_len=op_parse_uint32be(_buf+offs); + if(chunk_len>_buf_sz-(offs+12))break; + else if(chunk_len==13&&memcmp(_buf+offs+4,"IHDR",4)==0){ + int color_type; + *_width=op_parse_uint32be(_buf+offs+8); + *_height=op_parse_uint32be(_buf+offs+12); + color_type=_buf[offs+17]; + if(color_type==3){ + *_depth=24; + *_has_palette=1; + } + else{ + int sample_depth; + sample_depth=_buf[offs+16]; + if(color_type==0)*_depth=sample_depth; + else if(color_type==2)*_depth=sample_depth*3; + else if(color_type==4)*_depth=sample_depth*2; + else if(color_type==6)*_depth=sample_depth*4; + *_colors=0; + *_has_palette=0; + break; + } + } + else if(*_has_palette>0&&memcmp(_buf+offs+4,"PLTE",4)==0){ + *_colors=chunk_len/3; + break; + } + offs+=12+chunk_len; + } + } +} + +static int op_is_gif(const unsigned char *_buf,size_t _buf_sz){ + return _buf_sz>=6&&(memcmp(_buf,"GIF87a",6)==0||memcmp(_buf,"GIF89a",6)==0); +} + +/*Tries to extract the width, height, bits per pixel, and palette size of a + GIF. + On failure, simply leaves its outputs unmodified.*/ +static void op_extract_gif_params(const unsigned char *_buf,size_t _buf_sz, + opus_uint32 *_width,opus_uint32 *_height, + opus_uint32 *_depth,opus_uint32 *_colors,int *_has_palette){ + if(op_is_gif(_buf,_buf_sz)&&_buf_sz>=14){ + *_width=_buf[6]|_buf[7]<<8; + *_height=_buf[8]|_buf[9]<<8; + /*libFLAC hard-codes the depth to 24.*/ + *_depth=24; + *_colors=1<<((_buf[10]&7)+1); + *_has_palette=1; + } +} + +/*The actual implementation of opus_picture_tag_parse(). + Unlike the public API, this function requires _pic to already be + initialized, modifies its contents before success is guaranteed, and assumes + the caller will clear it on error.*/ +static int opus_picture_tag_parse_impl(OpusPictureTag *_pic,const char *_tag, + unsigned char *_buf,size_t _buf_sz,size_t _base64_sz){ + opus_int32 picture_type; + opus_uint32 mime_type_length; + char *mime_type; + opus_uint32 description_length; + char *description; + opus_uint32 width; + opus_uint32 height; + opus_uint32 depth; + opus_uint32 colors; + opus_uint32 data_length; + opus_uint32 file_width; + opus_uint32 file_height; + opus_uint32 file_depth; + opus_uint32 file_colors; + int format; + int has_palette; + int colors_set; + size_t i; + /*Decode the BASE64 data.*/ + for(i=0;i<_base64_sz;i++){ + opus_uint32 value; + int j; + value=0; + for(j=0;j<4;j++){ + unsigned c; + unsigned d; + c=(unsigned char)_tag[4*i+j]; + if(c=='+')d=62; + else if(c=='/')d=63; + else if(c>='0'&&c<='9')d=52+c-'0'; + else if(c>='a'&&c<='z')d=26+c-'a'; + else if(c>='A'&&c<='Z')d=c-'A'; + else if(c=='='&&3*i+j>_buf_sz)d=0; + else return OP_ENOTFORMAT; + value=value<<6|d; + } + _buf[3*i]=(unsigned char)(value>>16); + if(3*i+1<_buf_sz){ + _buf[3*i+1]=(unsigned char)(value>>8); + if(3*i+2<_buf_sz)_buf[3*i+2]=(unsigned char)value; + } + } + i=0; + picture_type=op_parse_uint32be(_buf+i); + i+=4; + /*Extract the MIME type.*/ + mime_type_length=op_parse_uint32be(_buf+i); + i+=4; + if(mime_type_length>_buf_sz-32)return OP_ENOTFORMAT; + mime_type=(char *)_ogg_malloc(sizeof(*_pic->mime_type)*(mime_type_length+1)); + if(mime_type==NULL)return OP_EFAULT; + memcpy(mime_type,_buf+i,sizeof(*mime_type)*mime_type_length); + mime_type[mime_type_length]='\0'; + _pic->mime_type=mime_type; + i+=mime_type_length; + /*Extract the description string.*/ + description_length=op_parse_uint32be(_buf+i); + i+=4; + if(description_length>_buf_sz-mime_type_length-32)return OP_ENOTFORMAT; + description= + (char *)_ogg_malloc(sizeof(*_pic->mime_type)*(description_length+1)); + if(description==NULL)return OP_EFAULT; + memcpy(description,_buf+i,sizeof(*description)*description_length); + description[description_length]='\0'; + _pic->description=description; + i+=description_length; + /*Extract the remaining fields.*/ + width=op_parse_uint32be(_buf+i); + i+=4; + height=op_parse_uint32be(_buf+i); + i+=4; + depth=op_parse_uint32be(_buf+i); + i+=4; + colors=op_parse_uint32be(_buf+i); + i+=4; + /*If one of these is set, they all must be, but colors==0 is a valid value.*/ + colors_set=width!=0||height!=0||depth!=0||colors!=0; + if((width==0||height==0||depth==0)&&colors_set)return OP_ENOTFORMAT; + data_length=op_parse_uint32be(_buf+i); + i+=4; + if(data_length>_buf_sz-i)return OP_ENOTFORMAT; + /*Trim extraneous data so we don't copy it below.*/ + _buf_sz=i+data_length; + /*Attempt to determine the image format.*/ + format=OP_PIC_FORMAT_UNKNOWN; + if(mime_type_length==3&&strcmp(mime_type,"-->")==0){ + format=OP_PIC_FORMAT_URL; + /*Picture type 1 must be a 32x32 PNG.*/ + if(picture_type==1&&(width!=0||height!=0)&&(width!=32||height!=32)){ + return OP_ENOTFORMAT; + } + /*Append a terminating NUL for the convenience of our callers.*/ + _buf[_buf_sz++]='\0'; + } + else{ + if(mime_type_length==10 + &&op_strncasecmp(mime_type,"image/jpeg",mime_type_length)==0){ + if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG; + } + else if(mime_type_length==9 + &&op_strncasecmp(mime_type,"image/png",mime_type_length)==0){ + if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG; + } + else if(mime_type_length==9 + &&op_strncasecmp(mime_type,"image/gif",mime_type_length)==0){ + if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF; + } + else if(mime_type_length==0||(mime_type_length==6 + &&op_strncasecmp(mime_type,"image/",mime_type_length)==0)){ + if(op_is_jpeg(_buf+i,data_length))format=OP_PIC_FORMAT_JPEG; + else if(op_is_png(_buf+i,data_length))format=OP_PIC_FORMAT_PNG; + else if(op_is_gif(_buf+i,data_length))format=OP_PIC_FORMAT_GIF; + } + file_width=file_height=file_depth=file_colors=0; + has_palette=-1; + switch(format){ + case OP_PIC_FORMAT_JPEG:{ + op_extract_jpeg_params(_buf+i,data_length, + &file_width,&file_height,&file_depth,&file_colors,&has_palette); + }break; + case OP_PIC_FORMAT_PNG:{ + op_extract_png_params(_buf+i,data_length, + &file_width,&file_height,&file_depth,&file_colors,&has_palette); + }break; + case OP_PIC_FORMAT_GIF:{ + op_extract_gif_params(_buf+i,data_length, + &file_width,&file_height,&file_depth,&file_colors,&has_palette); + }break; + } + if(has_palette>=0){ + /*If we successfully extracted these parameters from the image, override + any declared values.*/ + width=file_width; + height=file_height; + depth=file_depth; + colors=file_colors; + } + /*Picture type 1 must be a 32x32 PNG.*/ + if(picture_type==1&&(format!=OP_PIC_FORMAT_PNG||width!=32||height!=32)){ + return OP_ENOTFORMAT; + } + } + /*Adjust _buf_sz instead of using data_length to capture the terminating NUL + for URLs.*/ + _buf_sz-=i; + memmove(_buf,_buf+i,sizeof(*_buf)*_buf_sz); + _buf=(unsigned char *)_ogg_realloc(_buf,_buf_sz); + if(_buf_sz>0&&_buf==NULL)return OP_EFAULT; + _pic->type=picture_type; + _pic->width=width; + _pic->height=height; + _pic->depth=depth; + _pic->colors=colors; + _pic->data_length=data_length; + _pic->data=_buf; + _pic->format=format; + return 0; +} + +int opus_picture_tag_parse(OpusPictureTag *_pic,const char *_tag){ + OpusPictureTag pic; + unsigned char *buf; + size_t base64_sz; + size_t buf_sz; + size_t tag_length; + int ret; + if(opus_tagncompare("METADATA_BLOCK_PICTURE",22,_tag)==0)_tag+=23; + /*Figure out how much BASE64-encoded data we have.*/ + tag_length=strlen(_tag); + if(tag_length&3)return OP_ENOTFORMAT; + base64_sz=tag_length>>2; + buf_sz=3*base64_sz; + if(buf_sz<32)return OP_ENOTFORMAT; + if(_tag[tag_length-1]=='=')buf_sz--; + if(_tag[tag_length-2]=='=')buf_sz--; + if(buf_sz<32)return OP_ENOTFORMAT; + /*Allocate an extra byte to allow appending a terminating NUL to URL data.*/ + buf=(unsigned char *)_ogg_malloc(sizeof(*buf)*(buf_sz+1)); + if(buf==NULL)return OP_EFAULT; + opus_picture_tag_init(&pic); + ret=opus_picture_tag_parse_impl(&pic,_tag,buf,buf_sz,base64_sz); + if(ret<0){ + opus_picture_tag_clear(&pic); + _ogg_free(buf); + } + else *_pic=*&pic; + return ret; +} + +void opus_picture_tag_init(OpusPictureTag *_pic){ + memset(_pic,0,sizeof(*_pic)); +} + +void opus_picture_tag_clear(OpusPictureTag *_pic){ + _ogg_free(_pic->description); + _ogg_free(_pic->mime_type); + _ogg_free(_pic->data); +} diff --git a/drivers/opus/internal.c b/drivers/opus/internal.c new file mode 100644 index 0000000000..a9c3671179 --- /dev/null +++ b/drivers/opus/internal.c @@ -0,0 +1,42 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ********************************************************************/ +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "internal.h" + +#if defined(OP_ENABLE_ASSERTIONS) +void op_fatal_impl(const char *_str,const char *_file,int _line){ + fprintf(stderr,"Fatal (internal) error in %s, line %i: %s\n", + _file,_line,_str); + abort(); +} +#endif + +/*A version of strncasecmp() that is guaranteed to only ignore the case of + ASCII characters.*/ +int op_strncasecmp(const char *_a,const char *_b,int _n){ + int i; + for(i=0;i<_n;i++){ + int a; + int b; + int d; + a=_a[i]; + b=_b[i]; + if(a>='a'&&a<='z')a-='a'-'A'; + if(b>='a'&&b<='z')b-='a'-'A'; + d=a-b; + if(d)return d; + } + return 0; +} diff --git a/drivers/opus/internal.h b/drivers/opus/internal.h new file mode 100644 index 0000000000..cb4089fd4d --- /dev/null +++ b/drivers/opus/internal.h @@ -0,0 +1,249 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ********************************************************************/ +#if !defined(_opusfile_internal_h) +# define _opusfile_internal_h (1) + +# if !defined(_REENTRANT) +# define _REENTRANT +# endif +# if !defined(_GNU_SOURCE) +# define _GNU_SOURCE +# endif +# if !defined(_LARGEFILE_SOURCE) +# define _LARGEFILE_SOURCE +# endif +# if !defined(_LARGEFILE64_SOURCE) +# define _LARGEFILE64_SOURCE +# endif +# if !defined(_FILE_OFFSET_BITS) +# define _FILE_OFFSET_BITS 64 +# endif + +# include <stdlib.h> +# include <opus/opusfile.h> + +typedef struct OggOpusLink OggOpusLink; + +# if defined(OPUS_FIXED_POINT) + +typedef opus_int16 op_sample; + +# else + +typedef float op_sample; + +/*We're using this define to test for libopus 1.1 or later until libopus + provides a better mechanism.*/ +# if defined(OPUS_GET_EXPERT_FRAME_DURATION_REQUEST) +/*Enable soft clipping prevention in 16-bit decodes.*/ +# define OP_SOFT_CLIP (1) +# endif + +# endif + +# if OP_GNUC_PREREQ(4,2) +/*Disable excessive warnings about the order of operations.*/ +# pragma GCC diagnostic ignored "-Wparentheses" +# elif defined(_MSC_VER) +/*Disable excessive warnings about the order of operations.*/ +# pragma warning(disable:4554) +/*Disable warnings about "deprecated" POSIX functions.*/ +# pragma warning(disable:4996) +# endif + +# if OP_GNUC_PREREQ(3,0) +/*Another alternative is + (__builtin_constant_p(_x)?!!(_x):__builtin_expect(!!(_x),1)) + but that evaluates _x multiple times, which may be bad.*/ +# define OP_LIKELY(_x) (__builtin_expect(!!(_x),1)) +# define OP_UNLIKELY(_x) (__builtin_expect(!!(_x),0)) +# else +# define OP_LIKELY(_x) (!!(_x)) +# define OP_UNLIKELY(_x) (!!(_x)) +# endif + +# if defined(OP_ENABLE_ASSERTIONS) +# if OP_GNUC_PREREQ(2,5)||__SUNPRO_C>=0x590 +__attribute__((noreturn)) +# endif +void op_fatal_impl(const char *_str,const char *_file,int _line); + +# define OP_FATAL(_str) (op_fatal_impl(_str,__FILE__,__LINE__)) + +# define OP_ASSERT(_cond) \ + do{ \ + if(OP_UNLIKELY(!(_cond)))OP_FATAL("assertion failed: " #_cond); \ + } \ + while(0) +# define OP_ALWAYS_TRUE(_cond) OP_ASSERT(_cond) + +# else +# define OP_FATAL(_str) abort() +# define OP_ASSERT(_cond) +# define OP_ALWAYS_TRUE(_cond) ((void)(_cond)) +# endif + +# define OP_INT64_MAX (2*(((ogg_int64_t)1<<62)-1)|1) +# define OP_INT64_MIN (-OP_INT64_MAX-1) +# define OP_INT32_MAX (2*(((ogg_int32_t)1<<30)-1)|1) +# define OP_INT32_MIN (-OP_INT32_MAX-1) + +# define OP_MIN(_a,_b) ((_a)<(_b)?(_a):(_b)) +# define OP_MAX(_a,_b) ((_a)>(_b)?(_a):(_b)) +# define OP_CLAMP(_lo,_x,_hi) (OP_MAX(_lo,OP_MIN(_x,_hi))) + +/*Advance a file offset by the given amount, clamping against OP_INT64_MAX. + This is used to advance a known offset by things like OP_CHUNK_SIZE or + OP_PAGE_SIZE_MAX, while making sure to avoid signed overflow. + It assumes that both _offset and _amount are non-negative.*/ +#define OP_ADV_OFFSET(_offset,_amount) \ + (OP_MIN(_offset,OP_INT64_MAX-(_amount))+(_amount)) + +/*The maximum channel count for any mapping we'll actually decode.*/ +# define OP_NCHANNELS_MAX (8) + +/*Initial state.*/ +# define OP_NOTOPEN (0) +/*We've found the first Opus stream in the first link.*/ +# define OP_PARTOPEN (1) +# define OP_OPENED (2) +/*We've found the first Opus stream in the current link.*/ +# define OP_STREAMSET (3) +/*We've initialized the decoder for the chosen Opus stream in the current + link.*/ +# define OP_INITSET (4) + +/*Information cached for a single link in a chained Ogg Opus file. + We choose the first Opus stream encountered in each link to play back (and + require at least one).*/ +struct OggOpusLink{ + /*The byte offset of the first header page in this link.*/ + opus_int64 offset; + /*The byte offset of the first data page from the chosen Opus stream in this + link (after the headers).*/ + opus_int64 data_offset; + /*The byte offset of the last page from the chosen Opus stream in this link. + This is used when seeking to ensure we find a page before the last one, so + that end-trimming calculations work properly. + This is only valid for seekable sources.*/ + opus_int64 end_offset; + /*The granule position of the last sample. + This is only valid for seekable sources.*/ + ogg_int64_t pcm_end; + /*The granule position before the first sample.*/ + ogg_int64_t pcm_start; + /*The serial number.*/ + ogg_uint32_t serialno; + /*The contents of the info header.*/ + OpusHead head; + /*The contents of the comment header.*/ + OpusTags tags; +}; + +struct OggOpusFile{ + /*The callbacks used to access the data source.*/ + OpusFileCallbacks callbacks; + /*A FILE *, memory bufer, etc.*/ + void *source; + /*Whether or not we can seek with this data source.*/ + int seekable; + /*The number of links in this chained Ogg Opus file.*/ + int nlinks; + /*The cached information from each link in a chained Ogg Opus file. + If source isn't seekable (e.g., it's a pipe), only the current link + appears.*/ + OggOpusLink *links; + /*The number of serial numbers from a single link.*/ + int nserialnos; + /*The capacity of the list of serial numbers from a single link.*/ + int cserialnos; + /*Storage for the list of serial numbers from a single link.*/ + ogg_uint32_t *serialnos; + /*This is the current offset of the data processed by the ogg_sync_state. + After a seek, this should be set to the target offset so that we can track + the byte offsets of subsequent pages. + After a call to op_get_next_page(), this will point to the first byte after + that page.*/ + opus_int64 offset; + /*The total size of this data source, or -1 if it's unseekable.*/ + opus_int64 end; + /*Used to locate pages in the data source.*/ + ogg_sync_state oy; + /*One of OP_NOTOPEN, OP_PARTOPEN, OP_OPENED, OP_STREAMSET, OP_INITSET.*/ + int ready_state; + /*The current link being played back.*/ + int cur_link; + /*The number of decoded samples to discard from the start of decoding.*/ + opus_int32 cur_discard_count; + /*The granule position of the previous packet (current packet start time).*/ + ogg_int64_t prev_packet_gp; + /*The number of bytes read since the last bitrate query, including framing.*/ + opus_int64 bytes_tracked; + /*The number of samples decoded since the last bitrate query.*/ + ogg_int64_t samples_tracked; + /*Takes physical pages and welds them into a logical stream of packets.*/ + ogg_stream_state os; + /*Re-timestamped packets from a single page. + Buffering these relies on the undocumented libogg behavior that ogg_packet + pointers remain valid until the next page is submitted to the + ogg_stream_state they came from.*/ + ogg_packet op[255]; + /*The index of the next packet to return.*/ + int op_pos; + /*The total number of packets available.*/ + int op_count; + /*Central working state for the packet-to-PCM decoder.*/ + OpusMSDecoder *od; + /*The application-provided packet decode callback.*/ + op_decode_cb_func decode_cb; + /*The application-provided packet decode callback context.*/ + void *decode_cb_ctx; + /*The stream count used to initialize the decoder.*/ + int od_stream_count; + /*The coupled stream count used to initialize the decoder.*/ + int od_coupled_count; + /*The channel count used to initialize the decoder.*/ + int od_channel_count; + /*The channel mapping used to initialize the decoder.*/ + unsigned char od_mapping[OP_NCHANNELS_MAX]; + /*The buffered data for one decoded packet.*/ + op_sample *od_buffer; + /*The current position in the decoded buffer.*/ + int od_buffer_pos; + /*The number of valid samples in the decoded buffer.*/ + int od_buffer_size; + /*The type of gain offset to apply. + One of OP_HEADER_GAIN, OP_TRACK_GAIN, or OP_ABSOLUTE_GAIN.*/ + int gain_type; + /*The offset to apply to the gain.*/ + opus_int32 gain_offset_q8; + /*Internal state for soft clipping and dithering float->short output.*/ +#if !defined(OPUS_FIXED_POINT) +# if defined(OP_SOFT_CLIP) + float clip_state[OP_NCHANNELS_MAX]; +# endif + float dither_a[OP_NCHANNELS_MAX*4]; + float dither_b[OP_NCHANNELS_MAX*4]; + opus_uint32 dither_seed; + int dither_mute; + int dither_disabled; + /*The number of channels represented by the internal state. + This gets set to 0 whenever anything that would prevent state propagation + occurs (switching between the float/short APIs, or between the + stereo/multistream APIs).*/ + int state_channel_count; +#endif +}; + +int op_strncasecmp(const char *_a,const char *_b,int _n); + +#endif diff --git a/drivers/opus/mlp.c b/drivers/opus/mlp.c new file mode 100644 index 0000000000..7220a23d42 --- /dev/null +++ b/drivers/opus/mlp.c @@ -0,0 +1,140 @@ +/* Copyright (c) 2008-2011 Octasic Inc. + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "opus_types.h" +#include "opus_defines.h" + +#include <math.h> +#include "mlp.h" +#include "arch.h" +#include "tansig_table.h" +#define MAX_NEURONS 100 + +#if 0 +static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ +{ + int i; + opus_val16 xx; /* Q11 */ + /*double x, y;*/ + opus_val16 dy, yy; /* Q14 */ + /*x = 1.9073e-06*_x;*/ + if (_x>=QCONST32(8,19)) + return QCONST32(1.,14); + if (_x<=-QCONST32(8,19)) + return -QCONST32(1.,14); + xx = EXTRACT16(SHR32(_x, 8)); + /*i = lrint(25*x);*/ + i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); + /*x -= .04*i;*/ + xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); + /*x = xx*(1./2048);*/ + /*y = tansig_table[250+i];*/ + yy = tansig_table[250+i]; + /*y = yy*(1./16384);*/ + dy = 16384-MULT16_16_Q14(yy,yy); + yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); + return yy; +} +#else +/*extern const float tansig_table[501];*/ +static OPUS_INLINE float tansig_approx(float x) +{ + int i; + float y, dy; + float sign=1; + /* Tests are reversed to catch NaNs */ + if (!(x<8)) + return 1; + if (!(x>-8)) + return -1; + if (x<0) + { + x=-x; + sign=-1; + } + i = (int)floor(.5f+25*x); + x -= .04f*i; + y = tansig_table[i]; + dy = 1-y*y; + y = y + x*dy*(1 - y*x); + return sign*y; +} +#endif + +#if 0 +void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) +{ + int j; + opus_val16 hidden[MAX_NEURONS]; + const opus_val16 *W = m->weights; + /* Copy to tmp_in */ + for (j=0;j<m->topo[1];j++) + { + int k; + opus_val32 sum = SHL32(EXTEND32(*W++),8); + for (k=0;k<m->topo[0];k++) + sum = MAC16_16(sum, in[k],*W++); + hidden[j] = tansig_approx(sum); + } + for (j=0;j<m->topo[2];j++) + { + int k; + opus_val32 sum = SHL32(EXTEND32(*W++),14); + for (k=0;k<m->topo[1];k++) + sum = MAC16_16(sum, hidden[k], *W++); + out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); + } +} +#else +void mlp_process(const MLP *m, const float *in, float *out) +{ + int j; + float hidden[MAX_NEURONS]; + const float *W = m->weights; + /* Copy to tmp_in */ + for (j=0;j<m->topo[1];j++) + { + int k; + float sum = *W++; + for (k=0;k<m->topo[0];k++) + sum = sum + in[k]**W++; + hidden[j] = tansig_approx(sum); + } + for (j=0;j<m->topo[2];j++) + { + int k; + float sum = *W++; + for (k=0;k<m->topo[1];k++) + sum = sum + hidden[k]**W++; + out[j] = tansig_approx(sum); + } +} +#endif diff --git a/drivers/opus/mlp.h b/drivers/opus/mlp.h new file mode 100644 index 0000000000..86c8e0617d --- /dev/null +++ b/drivers/opus/mlp.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2008-2011 Octasic Inc. + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _MLP_H_ +#define _MLP_H_ + +#include "arch.h" + +typedef struct { + int layers; + const int *topo; + const float *weights; +} MLP; + +void mlp_process(const MLP *m, const float *in, float *out); + +#endif /* _MLP_H_ */ diff --git a/drivers/opus/mlp_data.c b/drivers/opus/mlp_data.c new file mode 100644 index 0000000000..401c4c0250 --- /dev/null +++ b/drivers/opus/mlp_data.c @@ -0,0 +1,105 @@ +/* The contents of this file was automatically generated by mlp_train.c + It contains multi-layer perceptron (MLP) weights. */ + +#include "mlp.h" + +/* RMS error was 0.138320, seed was 1361535663 */ + +static const float weights[422] = { + +/* hidden layer */ +-0.0941125f, -0.302976f, -0.603555f, -0.19393f, -0.185983f, +-0.601617f, -0.0465317f, -0.114563f, -0.103599f, -0.618938f, +-0.317859f, -0.169949f, -0.0702885f, 0.148065f, 0.409524f, +0.548432f, 0.367649f, -0.494393f, 0.764306f, -1.83957f, +0.170849f, 12.786f, -1.08848f, -1.27284f, -16.2606f, +24.1773f, -5.57454f, -0.17276f, -0.163388f, -0.224421f, +-0.0948944f, -0.0728695f, -0.26557f, -0.100283f, -0.0515459f, +-0.146142f, -0.120674f, -0.180655f, 0.12857f, 0.442138f, +-0.493735f, 0.167767f, 0.206699f, -0.197567f, 0.417999f, +1.50364f, -0.773341f, -10.0401f, 0.401872f, 2.97966f, +15.2165f, -1.88905f, -1.19254f, 0.0285397f, -0.00405139f, +0.0707565f, 0.00825699f, -0.0927269f, -0.010393f, -0.00428882f, +-0.00489743f, -0.0709731f, -0.00255992f, 0.0395619f, 0.226424f, +0.0325231f, 0.162175f, -0.100118f, 0.485789f, 0.12697f, +0.285937f, 0.0155637f, 0.10546f, 3.05558f, 1.15059f, +-1.00904f, -1.83088f, 3.31766f, -3.42516f, -0.119135f, +-0.0405654f, 0.00690068f, 0.0179877f, -0.0382487f, 0.00597941f, +-0.0183611f, 0.00190395f, -0.144322f, -0.0435671f, 0.000990594f, +0.221087f, 0.142405f, 0.484066f, 0.404395f, 0.511955f, +-0.237255f, 0.241742f, 0.35045f, -0.699428f, 10.3993f, +2.6507f, -2.43459f, -4.18838f, 1.05928f, 1.71067f, +0.00667811f, -0.0721335f, -0.0397346f, 0.0362704f, -0.11496f, +-0.0235776f, 0.0082161f, -0.0141741f, -0.0329699f, -0.0354253f, +0.00277404f, -0.290654f, -1.14767f, -0.319157f, -0.686544f, +0.36897f, 0.478899f, 0.182579f, -0.411069f, 0.881104f, +-4.60683f, 1.4697f, 0.335845f, -1.81905f, -30.1699f, +5.55225f, 0.0019508f, -0.123576f, -0.0727332f, -0.0641597f, +-0.0534458f, -0.108166f, -0.0937368f, -0.0697883f, -0.0275475f, +-0.192309f, -0.110074f, 0.285375f, -0.405597f, 0.0926724f, +-0.287881f, -0.851193f, -0.099493f, -0.233764f, -1.2852f, +1.13611f, 3.12168f, -0.0699f, -1.86216f, 2.65292f, +-7.31036f, 2.44776f, -0.00111802f, -0.0632786f, -0.0376296f, +-0.149851f, 0.142963f, 0.184368f, 0.123433f, 0.0756158f, +0.117312f, 0.0933395f, 0.0692163f, 0.0842592f, 0.0704683f, +0.0589963f, 0.0942205f, -0.448862f, 0.0262677f, 0.270352f, +-0.262317f, 0.172586f, 2.00227f, -0.159216f, 0.038422f, +10.2073f, 4.15536f, -2.3407f, -0.0550265f, 0.00964792f, +-0.141336f, 0.0274501f, 0.0343921f, -0.0487428f, 0.0950172f, +-0.00775017f, -0.0372492f, -0.00548121f, -0.0663695f, 0.0960506f, +-0.200008f, -0.0412827f, 0.58728f, 0.0515787f, 0.337254f, +0.855024f, 0.668371f, -0.114904f, -3.62962f, -0.467477f, +-0.215472f, 2.61537f, 0.406117f, -1.36373f, 0.0425394f, +0.12208f, 0.0934502f, 0.123055f, 0.0340935f, -0.142466f, +0.035037f, -0.0490666f, 0.0733208f, 0.0576672f, 0.123984f, +-0.0517194f, -0.253018f, 0.590565f, 0.145849f, 0.315185f, +0.221534f, -0.149081f, 0.216161f, -0.349575f, 24.5664f, +-0.994196f, 0.614289f, -18.7905f, -2.83277f, -0.716801f, +-0.347201f, 0.479515f, -0.246027f, 0.0758683f, 0.137293f, +-0.17781f, 0.118751f, -0.00108329f, -0.237334f, 0.355732f, +-0.12991f, -0.0547627f, -0.318576f, -0.325524f, 0.180494f, +-0.0625604f, 0.141219f, 0.344064f, 0.37658f, -0.591772f, +5.8427f, -0.38075f, 0.221894f, -1.41934f, -1.87943e+06f, +1.34114f, 0.0283355f, -0.0447856f, -0.0211466f, -0.0256927f, +0.0139618f, 0.0207934f, -0.0107666f, 0.0110969f, 0.0586069f, +-0.0253545f, -0.0328433f, 0.11872f, -0.216943f, 0.145748f, +0.119808f, -0.0915211f, -0.120647f, -0.0787719f, -0.143644f, +-0.595116f, -1.152f, -1.25335f, -1.17092f, 4.34023f, +-975268.f, -1.37033f, -0.0401123f, 0.210602f, -0.136656f, +0.135962f, -0.0523293f, 0.0444604f, 0.0143928f, 0.00412666f, +-0.0193003f, 0.218452f, -0.110204f, -2.02563f, 0.918238f, +-2.45362f, 1.19542f, -0.061362f, -1.92243f, 0.308111f, +0.49764f, 0.912356f, 0.209272f, -2.34525f, 2.19326f, +-6.47121f, 1.69771f, -0.725123f, 0.0118929f, 0.0377944f, +0.0554003f, 0.0226452f, -0.0704421f, -0.0300309f, 0.0122978f, +-0.0041782f, -0.0686612f, 0.0313115f, 0.039111f, 0.364111f, +-0.0945548f, 0.0229876f, -0.17414f, 0.329795f, 0.114714f, +0.30022f, 0.106997f, 0.132355f, 5.79932f, 0.908058f, +-0.905324f, -3.3561f, 0.190647f, 0.184211f, -0.673648f, +0.231807f, -0.0586222f, 0.230752f, -0.438277f, 0.245857f, +-0.17215f, 0.0876383f, -0.720512f, 0.162515f, 0.0170571f, +0.101781f, 0.388477f, 1.32931f, 1.08548f, -0.936301f, +-2.36958f, -6.71988f, -3.44376f, 2.13818f, 14.2318f, +4.91459f, -3.09052f, -9.69191f, -0.768234f, 1.79604f, +0.0549653f, 0.163399f, 0.0797025f, 0.0343933f, -0.0555876f, +-0.00505673f, 0.0187258f, 0.0326628f, 0.0231486f, 0.15573f, +0.0476223f, -0.254824f, 1.60155f, -0.801221f, 2.55496f, +0.737629f, -1.36249f, -0.695463f, -2.44301f, -1.73188f, +3.95279f, 1.89068f, 0.486087f, -11.3343f, 3.9416e+06f, + +/* output layer */ +-0.381439f, 0.12115f, -0.906927f, 2.93878f, 1.6388f, +0.882811f, 0.874344f, 1.21726f, -0.874545f, 0.321706f, +0.785055f, 0.946558f, -0.575066f, -3.46553f, 0.884905f, +0.0924047f, -9.90712f, 0.391338f, 0.160103f, -2.04954f, +4.1455f, 0.0684029f, -0.144761f, -0.285282f, 0.379244f, +-1.1584f, -0.0277241f, -9.85f, -4.82386f, 3.71333f, +3.87308f, 3.52558f}; + +static const int topo[3] = {25, 15, 2}; + +const MLP net = { + 3, + topo, + weights +}; diff --git a/drivers/opus/opus.c b/drivers/opus/opus.c new file mode 100644 index 0000000000..8978e3b06b --- /dev/null +++ b/drivers/opus/opus.c @@ -0,0 +1,329 @@ +/* Copyright (c) 2011 Xiph.Org Foundation, Skype Limited + Written by Jean-Marc Valin and Koen Vos */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "opus.h" +#include "opus_private.h" + +#ifndef DISABLE_FLOAT_API +OPUS_EXPORT void opus_pcm_soft_clip(float *_x, int N, int C, float *declip_mem) +{ + int c; + int i; + float *x; + + if (C<1 || N<1 || !_x || !declip_mem) return; + + /* First thing: saturate everything to +/- 2 which is the highest level our + non-linearity can handle. At the point where the signal reaches +/-2, + the derivative will be zero anyway, so this doesn't introduce any + discontinuity in the derivative. */ + for (i=0;i<N*C;i++) + _x[i] = MAX16(-2.f, MIN16(2.f, _x[i])); + for (c=0;c<C;c++) + { + float a; + float x0; + int curr; + + x = _x+c; + a = declip_mem[c]; + /* Continue applying the non-linearity from the previous frame to avoid + any discontinuity. */ + for (i=0;i<N;i++) + { + if (x[i*C]*a>=0) + break; + x[i*C] = x[i*C]+a*x[i*C]*x[i*C]; + } + + curr=0; + x0 = x[0]; + while(1) + { + int start, end; + float maxval; + int special=0; + int peak_pos; + for (i=curr;i<N;i++) + { + if (x[i*C]>1 || x[i*C]<-1) + break; + } + if (i==N) + { + a=0; + break; + } + peak_pos = i; + start=end=i; + maxval=ABS16(x[i*C]); + /* Look for first zero crossing before clipping */ + while (start>0 && x[i*C]*x[(start-1)*C]>=0) + start--; + /* Look for first zero crossing after clipping */ + while (end<N && x[i*C]*x[end*C]>=0) + { + /* Look for other peaks until the next zero-crossing. */ + if (ABS16(x[end*C])>maxval) + { + maxval = ABS16(x[end*C]); + peak_pos = end; + } + end++; + } + /* Detect the special case where we clip before the first zero crossing */ + special = (start==0 && x[i*C]*x[0]>=0); + + /* Compute a such that maxval + a*maxval^2 = 1 */ + a=(maxval-1)/(maxval*maxval); + if (x[i*C]>0) + a = -a; + /* Apply soft clipping */ + for (i=start;i<end;i++) + x[i*C] = x[i*C]+a*x[i*C]*x[i*C]; + + if (special && peak_pos>=2) + { + /* Add a linear ramp from the first sample to the signal peak. + This avoids a discontinuity at the beginning of the frame. */ + float delta; + float offset = x0-x[0]; + delta = offset / peak_pos; + for (i=curr;i<peak_pos;i++) + { + offset -= delta; + x[i*C] += offset; + x[i*C] = MAX16(-1.f, MIN16(1.f, x[i*C])); + } + } + curr = end; + if (curr==N) + break; + } + declip_mem[c] = a; + } +} +#endif + +int encode_size(int size, unsigned char *data) +{ + if (size < 252) + { + data[0] = size; + return 1; + } else { + data[0] = 252+(size&0x3); + data[1] = (size-(int)data[0])>>2; + return 2; + } +} + +static int parse_size(const unsigned char *data, opus_int32 len, opus_int16 *size) +{ + if (len<1) + { + *size = -1; + return -1; + } else if (data[0]<252) + { + *size = data[0]; + return 1; + } else if (len<2) + { + *size = -1; + return -1; + } else { + *size = 4*data[1] + data[0]; + return 2; + } +} + +int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, + int self_delimited, unsigned char *out_toc, + const unsigned char *frames[48], opus_int16 size[48], + int *payload_offset, opus_int32 *packet_offset) +{ + int i, bytes; + int count; + int cbr; + unsigned char ch, toc; + int framesize; + opus_int32 last_size; + opus_int32 pad = 0; + const unsigned char *data0 = data; + + if (size==NULL) + return OPUS_BAD_ARG; + + framesize = opus_packet_get_samples_per_frame(data, 48000); + + cbr = 0; + toc = *data++; + len--; + last_size = len; + switch (toc&0x3) + { + /* One frame */ + case 0: + count=1; + break; + /* Two CBR frames */ + case 1: + count=2; + cbr = 1; + if (!self_delimited) + { + if (len&0x1) + return OPUS_INVALID_PACKET; + last_size = len/2; + /* If last_size doesn't fit in size[0], we'll catch it later */ + size[0] = (opus_int16)last_size; + } + break; + /* Two VBR frames */ + case 2: + count = 2; + bytes = parse_size(data, len, size); + len -= bytes; + if (size[0]<0 || size[0] > len) + return OPUS_INVALID_PACKET; + data += bytes; + last_size = len-size[0]; + break; + /* Multiple CBR/VBR frames (from 0 to 120 ms) */ + default: /*case 3:*/ + if (len<1) + return OPUS_INVALID_PACKET; + /* Number of frames encoded in bits 0 to 5 */ + ch = *data++; + count = ch&0x3F; + if (count <= 0 || framesize*count > 5760) + return OPUS_INVALID_PACKET; + len--; + /* Padding flag is bit 6 */ + if (ch&0x40) + { + int p; + do { + int tmp; + if (len<=0) + return OPUS_INVALID_PACKET; + p = *data++; + len--; + tmp = p==255 ? 254: p; + len -= tmp; + pad += tmp; + } while (p==255); + } + if (len<0) + return OPUS_INVALID_PACKET; + /* VBR flag is bit 7 */ + cbr = !(ch&0x80); + if (!cbr) + { + /* VBR case */ + last_size = len; + for (i=0;i<count-1;i++) + { + bytes = parse_size(data, len, size+i); + len -= bytes; + if (size[i]<0 || size[i] > len) + return OPUS_INVALID_PACKET; + data += bytes; + last_size -= bytes+size[i]; + } + if (last_size<0) + return OPUS_INVALID_PACKET; + } else if (!self_delimited) + { + /* CBR case */ + last_size = len/count; + if (last_size*count!=len) + return OPUS_INVALID_PACKET; + for (i=0;i<count-1;i++) + size[i] = (opus_int16)last_size; + } + break; + } + /* Self-delimited framing has an extra size for the last frame. */ + if (self_delimited) + { + bytes = parse_size(data, len, size+count-1); + len -= bytes; + if (size[count-1]<0 || size[count-1] > len) + return OPUS_INVALID_PACKET; + data += bytes; + /* For CBR packets, apply the size to all the frames. */ + if (cbr) + { + if (size[count-1]*count > len) + return OPUS_INVALID_PACKET; + for (i=0;i<count-1;i++) + size[i] = size[count-1]; + } else if (bytes+size[count-1] > last_size) + return OPUS_INVALID_PACKET; + } else + { + /* Because it's not encoded explicitly, it's possible the size of the + last packet (or all the packets, for the CBR case) is larger than + 1275. Reject them here.*/ + if (last_size > 1275) + return OPUS_INVALID_PACKET; + size[count-1] = (opus_int16)last_size; + } + + if (payload_offset) + *payload_offset = (int)(data-data0); + + for (i=0;i<count;i++) + { + if (frames) + frames[i] = data; + data += size[i]; + } + + if (packet_offset) + *packet_offset = pad+(opus_int32)(data-data0); + + if (out_toc) + *out_toc = toc; + + return count; +} + +int opus_packet_parse(const unsigned char *data, opus_int32 len, + unsigned char *out_toc, const unsigned char *frames[48], + opus_int16 size[48], int *payload_offset) +{ + return opus_packet_parse_impl(data, len, 0, out_toc, + frames, size, payload_offset, NULL); +} + diff --git a/drivers/opus/opus.h b/drivers/opus/opus.h new file mode 100644 index 0000000000..93a53a2ffc --- /dev/null +++ b/drivers/opus/opus.h @@ -0,0 +1,978 @@ +/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited + Written by Jean-Marc Valin and Koen Vos */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** + * @file opus.h + * @brief Opus reference implementation API + */ + +#ifndef OPUS_H +#define OPUS_H + +#include "opus_types.h" +#include "opus_defines.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @mainpage Opus + * + * The Opus codec is designed for interactive speech and audio transmission over the Internet. + * It is designed by the IETF Codec Working Group and incorporates technology from + * Skype's SILK codec and Xiph.Org's CELT codec. + * + * The Opus codec is designed to handle a wide range of interactive audio applications, + * including Voice over IP, videoconferencing, in-game chat, and even remote live music + * performances. It can scale from low bit-rate narrowband speech to very high quality + * stereo music. Its main features are: + + * @li Sampling rates from 8 to 48 kHz + * @li Bit-rates from 6 kb/s to 510 kb/s + * @li Support for both constant bit-rate (CBR) and variable bit-rate (VBR) + * @li Audio bandwidth from narrowband to full-band + * @li Support for speech and music + * @li Support for mono and stereo + * @li Support for multichannel (up to 255 channels) + * @li Frame sizes from 2.5 ms to 60 ms + * @li Good loss robustness and packet loss concealment (PLC) + * @li Floating point and fixed-point implementation + * + * Documentation sections: + * @li @ref opus_encoder + * @li @ref opus_decoder + * @li @ref opus_repacketizer + * @li @ref opus_multistream + * @li @ref opus_libinfo + * @li @ref opus_custom + */ + +/** @defgroup opus_encoder Opus Encoder + * @{ + * + * @brief This page describes the process and functions used to encode Opus. + * + * Since Opus is a stateful codec, the encoding process starts with creating an encoder + * state. This can be done with: + * + * @code + * int error; + * OpusEncoder *enc; + * enc = opus_encoder_create(Fs, channels, application, &error); + * @endcode + * + * From this point, @c enc can be used for encoding an audio stream. An encoder state + * @b must @b not be used for more than one stream at the same time. Similarly, the encoder + * state @b must @b not be re-initialized for each frame. + * + * While opus_encoder_create() allocates memory for the state, it's also possible + * to initialize pre-allocated memory: + * + * @code + * int size; + * int error; + * OpusEncoder *enc; + * size = opus_encoder_get_size(channels); + * enc = malloc(size); + * error = opus_encoder_init(enc, Fs, channels, application); + * @endcode + * + * where opus_encoder_get_size() returns the required size for the encoder state. Note that + * future versions of this code may change the size, so no assuptions should be made about it. + * + * The encoder state is always continuous in memory and only a shallow copy is sufficient + * to copy it (e.g. memcpy()) + * + * It is possible to change some of the encoder's settings using the opus_encoder_ctl() + * interface. All these settings already default to the recommended value, so they should + * only be changed when necessary. The most common settings one may want to change are: + * + * @code + * opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate)); + * opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity)); + * opus_encoder_ctl(enc, OPUS_SET_SIGNAL(signal_type)); + * @endcode + * + * where + * + * @arg bitrate is in bits per second (b/s) + * @arg complexity is a value from 1 to 10, where 1 is the lowest complexity and 10 is the highest + * @arg signal_type is either OPUS_AUTO (default), OPUS_SIGNAL_VOICE, or OPUS_SIGNAL_MUSIC + * + * See @ref opus_encoderctls and @ref opus_genericctls for a complete list of parameters that can be set or queried. Most parameters can be set or changed at any time during a stream. + * + * To encode a frame, opus_encode() or opus_encode_float() must be called with exactly one frame (2.5, 5, 10, 20, 40 or 60 ms) of audio data: + * @code + * len = opus_encode(enc, audio_frame, frame_size, packet, max_packet); + * @endcode + * + * where + * <ul> + * <li>audio_frame is the audio data in opus_int16 (or float for opus_encode_float())</li> + * <li>frame_size is the duration of the frame in samples (per channel)</li> + * <li>packet is the byte array to which the compressed data is written</li> + * <li>max_packet is the maximum number of bytes that can be written in the packet (4000 bytes is recommended). + * Do not use max_packet to control VBR target bitrate, instead use the #OPUS_SET_BITRATE CTL.</li> + * </ul> + * + * opus_encode() and opus_encode_float() return the number of bytes actually written to the packet. + * The return value <b>can be negative</b>, which indicates that an error has occurred. If the return value + * is 1 byte, then the packet does not need to be transmitted (DTX). + * + * Once the encoder state if no longer needed, it can be destroyed with + * + * @code + * opus_encoder_destroy(enc); + * @endcode + * + * If the encoder was created with opus_encoder_init() rather than opus_encoder_create(), + * then no action is required aside from potentially freeing the memory that was manually + * allocated for it (calling free(enc) for the example above) + * + */ + +/** Opus encoder state. + * This contains the complete state of an Opus encoder. + * It is position independent and can be freely copied. + * @see opus_encoder_create,opus_encoder_init + */ +typedef struct OpusEncoder OpusEncoder; + +/** Gets the size of an <code>OpusEncoder</code> structure. + * @param[in] channels <tt>int</tt>: Number of channels. + * This must be 1 or 2. + * @returns The size in bytes. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_encoder_get_size(int channels); + +/** + */ + +/** Allocates and initializes an encoder state. + * There are three coding modes: + * + * @ref OPUS_APPLICATION_VOIP gives best quality at a given bitrate for voice + * signals. It enhances the input signal by high-pass filtering and + * emphasizing formants and harmonics. Optionally it includes in-band + * forward error correction to protect against packet loss. Use this + * mode for typical VoIP applications. Because of the enhancement, + * even at high bitrates the output may sound different from the input. + * + * @ref OPUS_APPLICATION_AUDIO gives best quality at a given bitrate for most + * non-voice signals like music. Use this mode for music and mixed + * (music/voice) content, broadcast, and applications requiring less + * than 15 ms of coding delay. + * + * @ref OPUS_APPLICATION_RESTRICTED_LOWDELAY configures low-delay mode that + * disables the speech-optimized mode in exchange for slightly reduced delay. + * This mode can only be set on an newly initialized or freshly reset encoder + * because it changes the codec delay. + * + * This is useful when the caller knows that the speech-optimized modes will not be needed (use with caution). + * @param [in] Fs <tt>opus_int32</tt>: Sampling rate of input signal (Hz) + * This must be one of 8000, 12000, 16000, + * 24000, or 48000. + * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) in input signal + * @param [in] application <tt>int</tt>: Coding mode (@ref OPUS_APPLICATION_VOIP/@ref OPUS_APPLICATION_AUDIO/@ref OPUS_APPLICATION_RESTRICTED_LOWDELAY) + * @param [out] error <tt>int*</tt>: @ref opus_errorcodes + * @note Regardless of the sampling rate and number channels selected, the Opus encoder + * can switch to a lower audio bandwidth or number of channels if the bitrate + * selected is too low. This also means that it is safe to always use 48 kHz stereo input + * and let the encoder optimize the encoding. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusEncoder *opus_encoder_create( + opus_int32 Fs, + int channels, + int application, + int *error +); + +/** Initializes a previously allocated encoder state + * The memory pointed to by st must be at least the size returned by opus_encoder_get_size(). + * This is intended for applications which use their own allocator instead of malloc. + * @see opus_encoder_create(),opus_encoder_get_size() + * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. + * @param [in] st <tt>OpusEncoder*</tt>: Encoder state + * @param [in] Fs <tt>opus_int32</tt>: Sampling rate of input signal (Hz) + * This must be one of 8000, 12000, 16000, + * 24000, or 48000. + * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) in input signal + * @param [in] application <tt>int</tt>: Coding mode (OPUS_APPLICATION_VOIP/OPUS_APPLICATION_AUDIO/OPUS_APPLICATION_RESTRICTED_LOWDELAY) + * @retval #OPUS_OK Success or @ref opus_errorcodes + */ +OPUS_EXPORT int opus_encoder_init( + OpusEncoder *st, + opus_int32 Fs, + int channels, + int application +) OPUS_ARG_NONNULL(1); + +/** Encodes an Opus frame. + * @param [in] st <tt>OpusEncoder*</tt>: Encoder state + * @param [in] pcm <tt>opus_int16*</tt>: Input signal (interleaved if 2 channels). length is frame_size*channels*sizeof(opus_int16) + * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the + * input signal. + * This must be an Opus frame size for + * the encoder's sampling rate. + * For example, at 48 kHz the permitted + * values are 120, 240, 480, 960, 1920, + * and 2880. + * Passing in a duration of less than + * 10 ms (480 samples at 48 kHz) will + * prevent the encoder from using the LPC + * or hybrid modes. + * @param [out] data <tt>unsigned char*</tt>: Output payload. + * This must contain storage for at + * least \a max_data_bytes. + * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated + * memory for the output + * payload. This may be + * used to impose an upper limit on + * the instant bitrate, but should + * not be used as the only bitrate + * control. Use #OPUS_SET_BITRATE to + * control the bitrate. + * @returns The length of the encoded packet (in bytes) on success or a + * negative error code (see @ref opus_errorcodes) on failure. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode( + OpusEncoder *st, + const opus_int16 *pcm, + int frame_size, + unsigned char *data, + opus_int32 max_data_bytes +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); + +/** Encodes an Opus frame from floating point input. + * @param [in] st <tt>OpusEncoder*</tt>: Encoder state + * @param [in] pcm <tt>float*</tt>: Input in float format (interleaved if 2 channels), with a normal range of +/-1.0. + * Samples with a range beyond +/-1.0 are supported but will + * be clipped by decoders using the integer API and should + * only be used if it is known that the far end supports + * extended dynamic range. + * length is frame_size*channels*sizeof(float) + * @param [in] frame_size <tt>int</tt>: Number of samples per channel in the + * input signal. + * This must be an Opus frame size for + * the encoder's sampling rate. + * For example, at 48 kHz the permitted + * values are 120, 240, 480, 960, 1920, + * and 2880. + * Passing in a duration of less than + * 10 ms (480 samples at 48 kHz) will + * prevent the encoder from using the LPC + * or hybrid modes. + * @param [out] data <tt>unsigned char*</tt>: Output payload. + * This must contain storage for at + * least \a max_data_bytes. + * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated + * memory for the output + * payload. This may be + * used to impose an upper limit on + * the instant bitrate, but should + * not be used as the only bitrate + * control. Use #OPUS_SET_BITRATE to + * control the bitrate. + * @returns The length of the encoded packet (in bytes) on success or a + * negative error code (see @ref opus_errorcodes) on failure. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_encode_float( + OpusEncoder *st, + const float *pcm, + int frame_size, + unsigned char *data, + opus_int32 max_data_bytes +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); + +/** Frees an <code>OpusEncoder</code> allocated by opus_encoder_create(). + * @param[in] st <tt>OpusEncoder*</tt>: State to be freed. + */ +OPUS_EXPORT void opus_encoder_destroy(OpusEncoder *st); + +/** Perform a CTL function on an Opus encoder. + * + * Generally the request and subsequent arguments are generated + * by a convenience macro. + * @param st <tt>OpusEncoder*</tt>: Encoder state. + * @param request This and all remaining parameters should be replaced by one + * of the convenience macros in @ref opus_genericctls or + * @ref opus_encoderctls. + * @see opus_genericctls + * @see opus_encoderctls + */ +OPUS_EXPORT int opus_encoder_ctl(OpusEncoder *st, int request, ...) OPUS_ARG_NONNULL(1); +/**@}*/ + +/** @defgroup opus_decoder Opus Decoder + * @{ + * + * @brief This page describes the process and functions used to decode Opus. + * + * The decoding process also starts with creating a decoder + * state. This can be done with: + * @code + * int error; + * OpusDecoder *dec; + * dec = opus_decoder_create(Fs, channels, &error); + * @endcode + * where + * @li Fs is the sampling rate and must be 8000, 12000, 16000, 24000, or 48000 + * @li channels is the number of channels (1 or 2) + * @li error will hold the error code in case of failure (or #OPUS_OK on success) + * @li the return value is a newly created decoder state to be used for decoding + * + * While opus_decoder_create() allocates memory for the state, it's also possible + * to initialize pre-allocated memory: + * @code + * int size; + * int error; + * OpusDecoder *dec; + * size = opus_decoder_get_size(channels); + * dec = malloc(size); + * error = opus_decoder_init(dec, Fs, channels); + * @endcode + * where opus_decoder_get_size() returns the required size for the decoder state. Note that + * future versions of this code may change the size, so no assuptions should be made about it. + * + * The decoder state is always continuous in memory and only a shallow copy is sufficient + * to copy it (e.g. memcpy()) + * + * To decode a frame, opus_decode() or opus_decode_float() must be called with a packet of compressed audio data: + * @code + * frame_size = opus_decode(dec, packet, len, decoded, max_size, 0); + * @endcode + * where + * + * @li packet is the byte array containing the compressed data + * @li len is the exact number of bytes contained in the packet + * @li decoded is the decoded audio data in opus_int16 (or float for opus_decode_float()) + * @li max_size is the max duration of the frame in samples (per channel) that can fit into the decoded_frame array + * + * opus_decode() and opus_decode_float() return the number of samples (per channel) decoded from the packet. + * If that value is negative, then an error has occurred. This can occur if the packet is corrupted or if the audio + * buffer is too small to hold the decoded audio. + * + * Opus is a stateful codec with overlapping blocks and as a result Opus + * packets are not coded independently of each other. Packets must be + * passed into the decoder serially and in the correct order for a correct + * decode. Lost packets can be replaced with loss concealment by calling + * the decoder with a null pointer and zero length for the missing packet. + * + * A single codec state may only be accessed from a single thread at + * a time and any required locking must be performed by the caller. Separate + * streams must be decoded with separate decoder states and can be decoded + * in parallel unless the library was compiled with NONTHREADSAFE_PSEUDOSTACK + * defined. + * + */ + +/** Opus decoder state. + * This contains the complete state of an Opus decoder. + * It is position independent and can be freely copied. + * @see opus_decoder_create,opus_decoder_init + */ +typedef struct OpusDecoder OpusDecoder; + +/** Gets the size of an <code>OpusDecoder</code> structure. + * @param [in] channels <tt>int</tt>: Number of channels. + * This must be 1 or 2. + * @returns The size in bytes. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_size(int channels); + +/** Allocates and initializes a decoder state. + * @param [in] Fs <tt>opus_int32</tt>: Sample rate to decode at (Hz). + * This must be one of 8000, 12000, 16000, + * 24000, or 48000. + * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) to decode + * @param [out] error <tt>int*</tt>: #OPUS_OK Success or @ref opus_errorcodes + * + * Internally Opus stores data at 48000 Hz, so that should be the default + * value for Fs. However, the decoder can efficiently decode to buffers + * at 8, 12, 16, and 24 kHz so if for some reason the caller cannot use + * data at the full sample rate, or knows the compressed data doesn't + * use the full frequency range, it can request decoding at a reduced + * rate. Likewise, the decoder is capable of filling in either mono or + * interleaved stereo pcm buffers, at the caller's request. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusDecoder *opus_decoder_create( + opus_int32 Fs, + int channels, + int *error +); + +/** Initializes a previously allocated decoder state. + * The state must be at least the size returned by opus_decoder_get_size(). + * This is intended for applications which use their own allocator instead of malloc. @see opus_decoder_create,opus_decoder_get_size + * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. + * @param [in] st <tt>OpusDecoder*</tt>: Decoder state. + * @param [in] Fs <tt>opus_int32</tt>: Sampling rate to decode to (Hz). + * This must be one of 8000, 12000, 16000, + * 24000, or 48000. + * @param [in] channels <tt>int</tt>: Number of channels (1 or 2) to decode + * @retval #OPUS_OK Success or @ref opus_errorcodes + */ +OPUS_EXPORT int opus_decoder_init( + OpusDecoder *st, + opus_int32 Fs, + int channels +) OPUS_ARG_NONNULL(1); + +/** Decode an Opus packet. + * @param [in] st <tt>OpusDecoder*</tt>: Decoder state + * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss + * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload* + * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length + * is frame_size*channels*sizeof(opus_int16) + * @param [in] frame_size Number of samples per channel of available space in \a pcm. + * If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will + * not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1), + * then frame_size needs to be exactly the duration of audio that is missing, otherwise the + * decoder will not be in the optimal state to decode the next incoming packet. For the PLC and + * FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms. + * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be + * decoded. If no such data is available, the frame is decoded as if it were lost. + * @returns Number of decoded samples or @ref opus_errorcodes + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode( + OpusDecoder *st, + const unsigned char *data, + opus_int32 len, + opus_int16 *pcm, + int frame_size, + int decode_fec +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); + +/** Decode an Opus packet with floating point output. + * @param [in] st <tt>OpusDecoder*</tt>: Decoder state + * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss + * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload + * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length + * is frame_size*channels*sizeof(float) + * @param [in] frame_size Number of samples per channel of available space in \a pcm. + * If this is less than the maximum packet duration (120ms; 5760 for 48kHz), this function will + * not be capable of decoding some packets. In the case of PLC (data==NULL) or FEC (decode_fec=1), + * then frame_size needs to be exactly the duration of audio that is missing, otherwise the + * decoder will not be in the optimal state to decode the next incoming packet. For the PLC and + * FEC cases, frame_size <b>must</b> be a multiple of 2.5 ms. + * @param [in] decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band forward error correction data be + * decoded. If no such data is available the frame is decoded as if it were lost. + * @returns Number of decoded samples or @ref opus_errorcodes + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decode_float( + OpusDecoder *st, + const unsigned char *data, + opus_int32 len, + float *pcm, + int frame_size, + int decode_fec +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); + +/** Perform a CTL function on an Opus decoder. + * + * Generally the request and subsequent arguments are generated + * by a convenience macro. + * @param st <tt>OpusDecoder*</tt>: Decoder state. + * @param request This and all remaining parameters should be replaced by one + * of the convenience macros in @ref opus_genericctls or + * @ref opus_decoderctls. + * @see opus_genericctls + * @see opus_decoderctls + */ +OPUS_EXPORT int opus_decoder_ctl(OpusDecoder *st, int request, ...) OPUS_ARG_NONNULL(1); + +/** Frees an <code>OpusDecoder</code> allocated by opus_decoder_create(). + * @param[in] st <tt>OpusDecoder*</tt>: State to be freed. + */ +OPUS_EXPORT void opus_decoder_destroy(OpusDecoder *st); + +/** Parse an opus packet into one or more frames. + * Opus_decode will perform this operation internally so most applications do + * not need to use this function. + * This function does not copy the frames, the returned pointers are pointers into + * the input packet. + * @param [in] data <tt>char*</tt>: Opus packet to be parsed + * @param [in] len <tt>opus_int32</tt>: size of data + * @param [out] out_toc <tt>char*</tt>: TOC pointer + * @param [out] frames <tt>char*[48]</tt> encapsulated frames + * @param [out] size <tt>opus_int16[48]</tt> sizes of the encapsulated frames + * @param [out] payload_offset <tt>int*</tt>: returns the position of the payload within the packet (in bytes) + * @returns number of frames + */ +OPUS_EXPORT int opus_packet_parse( + const unsigned char *data, + opus_int32 len, + unsigned char *out_toc, + const unsigned char *frames[48], + opus_int16 size[48], + int *payload_offset +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); + +/** Gets the bandwidth of an Opus packet. + * @param [in] data <tt>char*</tt>: Opus packet + * @retval OPUS_BANDWIDTH_NARROWBAND Narrowband (4kHz bandpass) + * @retval OPUS_BANDWIDTH_MEDIUMBAND Mediumband (6kHz bandpass) + * @retval OPUS_BANDWIDTH_WIDEBAND Wideband (8kHz bandpass) + * @retval OPUS_BANDWIDTH_SUPERWIDEBAND Superwideband (12kHz bandpass) + * @retval OPUS_BANDWIDTH_FULLBAND Fullband (20kHz bandpass) + * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_bandwidth(const unsigned char *data) OPUS_ARG_NONNULL(1); + +/** Gets the number of samples per frame from an Opus packet. + * @param [in] data <tt>char*</tt>: Opus packet. + * This must contain at least one byte of + * data. + * @param [in] Fs <tt>opus_int32</tt>: Sampling rate in Hz. + * This must be a multiple of 400, or + * inaccurate results will be returned. + * @returns Number of samples per frame. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_samples_per_frame(const unsigned char *data, opus_int32 Fs) OPUS_ARG_NONNULL(1); + +/** Gets the number of channels from an Opus packet. + * @param [in] data <tt>char*</tt>: Opus packet + * @returns Number of channels + * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_channels(const unsigned char *data) OPUS_ARG_NONNULL(1); + +/** Gets the number of frames in an Opus packet. + * @param [in] packet <tt>char*</tt>: Opus packet + * @param [in] len <tt>opus_int32</tt>: Length of packet + * @returns Number of frames + * @retval OPUS_BAD_ARG Insufficient data was passed to the function + * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1); + +/** Gets the number of samples of an Opus packet. + * @param [in] packet <tt>char*</tt>: Opus packet + * @param [in] len <tt>opus_int32</tt>: Length of packet + * @param [in] Fs <tt>opus_int32</tt>: Sampling rate in Hz. + * This must be a multiple of 400, or + * inaccurate results will be returned. + * @returns Number of samples + * @retval OPUS_BAD_ARG Insufficient data was passed to the function + * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, opus_int32 Fs) OPUS_ARG_NONNULL(1); + +/** Gets the number of samples of an Opus packet. + * @param [in] dec <tt>OpusDecoder*</tt>: Decoder state + * @param [in] packet <tt>char*</tt>: Opus packet + * @param [in] len <tt>opus_int32</tt>: Length of packet + * @returns Number of samples + * @retval OPUS_BAD_ARG Insufficient data was passed to the function + * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_decoder_get_nb_samples(const OpusDecoder *dec, const unsigned char packet[], opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); + +/** Applies soft-clipping to bring a float signal within the [-1,1] range. If + * the signal is already in that range, nothing is done. If there are values + * outside of [-1,1], then the signal is clipped as smoothly as possible to + * both fit in the range and avoid creating excessive distortion in the + * process. + * @param [in,out] pcm <tt>float*</tt>: Input PCM and modified PCM + * @param [in] frame_size <tt>int</tt> Number of samples per channel to process + * @param [in] channels <tt>int</tt>: Number of channels + * @param [in,out] softclip_mem <tt>float*</tt>: State memory for the soft clipping process (one float per channel, initialized to zero) + */ +OPUS_EXPORT void opus_pcm_soft_clip(float *pcm, int frame_size, int channels, float *softclip_mem); + + +/**@}*/ + +/** @defgroup opus_repacketizer Repacketizer + * @{ + * + * The repacketizer can be used to merge multiple Opus packets into a single + * packet or alternatively to split Opus packets that have previously been + * merged. Splitting valid Opus packets is always guaranteed to succeed, + * whereas merging valid packets only succeeds if all frames have the same + * mode, bandwidth, and frame size, and when the total duration of the merged + * packet is no more than 120 ms. + * The repacketizer currently only operates on elementary Opus + * streams. It will not manipualte multistream packets successfully, except in + * the degenerate case where they consist of data from a single stream. + * + * The repacketizing process starts with creating a repacketizer state, either + * by calling opus_repacketizer_create() or by allocating the memory yourself, + * e.g., + * @code + * OpusRepacketizer *rp; + * rp = (OpusRepacketizer*)malloc(opus_repacketizer_get_size()); + * if (rp != NULL) + * opus_repacketizer_init(rp); + * @endcode + * + * Then the application should submit packets with opus_repacketizer_cat(), + * extract new packets with opus_repacketizer_out() or + * opus_repacketizer_out_range(), and then reset the state for the next set of + * input packets via opus_repacketizer_init(). + * + * For example, to split a sequence of packets into individual frames: + * @code + * unsigned char *data; + * int len; + * while (get_next_packet(&data, &len)) + * { + * unsigned char out[1276]; + * opus_int32 out_len; + * int nb_frames; + * int err; + * int i; + * err = opus_repacketizer_cat(rp, data, len); + * if (err != OPUS_OK) + * { + * release_packet(data); + * return err; + * } + * nb_frames = opus_repacketizer_get_nb_frames(rp); + * for (i = 0; i < nb_frames; i++) + * { + * out_len = opus_repacketizer_out_range(rp, i, i+1, out, sizeof(out)); + * if (out_len < 0) + * { + * release_packet(data); + * return (int)out_len; + * } + * output_next_packet(out, out_len); + * } + * opus_repacketizer_init(rp); + * release_packet(data); + * } + * @endcode + * + * Alternatively, to combine a sequence of frames into packets that each + * contain up to <code>TARGET_DURATION_MS</code> milliseconds of data: + * @code + * // The maximum number of packets with duration TARGET_DURATION_MS occurs + * // when the frame size is 2.5 ms, for a total of (TARGET_DURATION_MS*2/5) + * // packets. + * unsigned char *data[(TARGET_DURATION_MS*2/5)+1]; + * opus_int32 len[(TARGET_DURATION_MS*2/5)+1]; + * int nb_packets; + * unsigned char out[1277*(TARGET_DURATION_MS*2/2)]; + * opus_int32 out_len; + * int prev_toc; + * nb_packets = 0; + * while (get_next_packet(data+nb_packets, len+nb_packets)) + * { + * int nb_frames; + * int err; + * nb_frames = opus_packet_get_nb_frames(data[nb_packets], len[nb_packets]); + * if (nb_frames < 1) + * { + * release_packets(data, nb_packets+1); + * return nb_frames; + * } + * nb_frames += opus_repacketizer_get_nb_frames(rp); + * // If adding the next packet would exceed our target, or it has an + * // incompatible TOC sequence, output the packets we already have before + * // submitting it. + * // N.B., The nb_packets > 0 check ensures we've submitted at least one + * // packet since the last call to opus_repacketizer_init(). Otherwise a + * // single packet longer than TARGET_DURATION_MS would cause us to try to + * // output an (invalid) empty packet. It also ensures that prev_toc has + * // been set to a valid value. Additionally, len[nb_packets] > 0 is + * // guaranteed by the call to opus_packet_get_nb_frames() above, so the + * // reference to data[nb_packets][0] should be valid. + * if (nb_packets > 0 && ( + * ((prev_toc & 0xFC) != (data[nb_packets][0] & 0xFC)) || + * opus_packet_get_samples_per_frame(data[nb_packets], 48000)*nb_frames > + * TARGET_DURATION_MS*48)) + * { + * out_len = opus_repacketizer_out(rp, out, sizeof(out)); + * if (out_len < 0) + * { + * release_packets(data, nb_packets+1); + * return (int)out_len; + * } + * output_next_packet(out, out_len); + * opus_repacketizer_init(rp); + * release_packets(data, nb_packets); + * data[0] = data[nb_packets]; + * len[0] = len[nb_packets]; + * nb_packets = 0; + * } + * err = opus_repacketizer_cat(rp, data[nb_packets], len[nb_packets]); + * if (err != OPUS_OK) + * { + * release_packets(data, nb_packets+1); + * return err; + * } + * prev_toc = data[nb_packets][0]; + * nb_packets++; + * } + * // Output the final, partial packet. + * if (nb_packets > 0) + * { + * out_len = opus_repacketizer_out(rp, out, sizeof(out)); + * release_packets(data, nb_packets); + * if (out_len < 0) + * return (int)out_len; + * output_next_packet(out, out_len); + * } + * @endcode + * + * An alternate way of merging packets is to simply call opus_repacketizer_cat() + * unconditionally until it fails. At that point, the merged packet can be + * obtained with opus_repacketizer_out() and the input packet for which + * opus_repacketizer_cat() needs to be re-added to a newly reinitialized + * repacketizer state. + */ + +typedef struct OpusRepacketizer OpusRepacketizer; + +/** Gets the size of an <code>OpusRepacketizer</code> structure. + * @returns The size in bytes. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_size(void); + +/** (Re)initializes a previously allocated repacketizer state. + * The state must be at least the size returned by opus_repacketizer_get_size(). + * This can be used for applications which use their own allocator instead of + * malloc(). + * It must also be called to reset the queue of packets waiting to be + * repacketized, which is necessary if the maximum packet duration of 120 ms + * is reached or if you wish to submit packets with a different Opus + * configuration (coding mode, audio bandwidth, frame size, or channel count). + * Failure to do so will prevent a new packet from being added with + * opus_repacketizer_cat(). + * @see opus_repacketizer_create + * @see opus_repacketizer_get_size + * @see opus_repacketizer_cat + * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state to + * (re)initialize. + * @returns A pointer to the same repacketizer state that was passed in. + */ +OPUS_EXPORT OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1); + +/** Allocates memory and initializes the new repacketizer with + * opus_repacketizer_init(). + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusRepacketizer *opus_repacketizer_create(void); + +/** Frees an <code>OpusRepacketizer</code> allocated by + * opus_repacketizer_create(). + * @param[in] rp <tt>OpusRepacketizer*</tt>: State to be freed. + */ +OPUS_EXPORT void opus_repacketizer_destroy(OpusRepacketizer *rp); + +/** Add a packet to the current repacketizer state. + * This packet must match the configuration of any packets already submitted + * for repacketization since the last call to opus_repacketizer_init(). + * This means that it must have the same coding mode, audio bandwidth, frame + * size, and channel count. + * This can be checked in advance by examining the top 6 bits of the first + * byte of the packet, and ensuring they match the top 6 bits of the first + * byte of any previously submitted packet. + * The total duration of audio in the repacketizer state also must not exceed + * 120 ms, the maximum duration of a single packet, after adding this packet. + * + * The contents of the current repacketizer state can be extracted into new + * packets using opus_repacketizer_out() or opus_repacketizer_out_range(). + * + * In order to add a packet with a different configuration or to add more + * audio beyond 120 ms, you must clear the repacketizer state by calling + * opus_repacketizer_init(). + * If a packet is too large to add to the current repacketizer state, no part + * of it is added, even if it contains multiple frames, some of which might + * fit. + * If you wish to be able to add parts of such packets, you should first use + * another repacketizer to split the packet into pieces and add them + * individually. + * @see opus_repacketizer_out_range + * @see opus_repacketizer_out + * @see opus_repacketizer_init + * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state to which to + * add the packet. + * @param[in] data <tt>const unsigned char*</tt>: The packet data. + * The application must ensure + * this pointer remains valid + * until the next call to + * opus_repacketizer_init() or + * opus_repacketizer_destroy(). + * @param len <tt>opus_int32</tt>: The number of bytes in the packet data. + * @returns An error code indicating whether or not the operation succeeded. + * @retval #OPUS_OK The packet's contents have been added to the repacketizer + * state. + * @retval #OPUS_INVALID_PACKET The packet did not have a valid TOC sequence, + * the packet's TOC sequence was not compatible + * with previously submitted packets (because + * the coding mode, audio bandwidth, frame size, + * or channel count did not match), or adding + * this packet would increase the total amount of + * audio stored in the repacketizer state to more + * than 120 ms. + */ +OPUS_EXPORT int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); + + +/** Construct a new packet from data previously submitted to the repacketizer + * state via opus_repacketizer_cat(). + * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state from which to + * construct the new packet. + * @param begin <tt>int</tt>: The index of the first frame in the current + * repacketizer state to include in the output. + * @param end <tt>int</tt>: One past the index of the last frame in the + * current repacketizer state to include in the + * output. + * @param[out] data <tt>const unsigned char*</tt>: The buffer in which to + * store the output packet. + * @param maxlen <tt>opus_int32</tt>: The maximum number of bytes to store in + * the output buffer. In order to guarantee + * success, this should be at least + * <code>1276</code> for a single frame, + * or for multiple frames, + * <code>1277*(end-begin)</code>. + * However, <code>1*(end-begin)</code> plus + * the size of all packet data submitted to + * the repacketizer since the last call to + * opus_repacketizer_init() or + * opus_repacketizer_create() is also + * sufficient, and possibly much smaller. + * @returns The total size of the output packet on success, or an error code + * on failure. + * @retval #OPUS_BAD_ARG <code>[begin,end)</code> was an invalid range of + * frames (begin < 0, begin >= end, or end > + * opus_repacketizer_get_nb_frames()). + * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the + * complete output packet. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); + +/** Return the total number of frames contained in packet data submitted to + * the repacketizer state so far via opus_repacketizer_cat() since the last + * call to opus_repacketizer_init() or opus_repacketizer_create(). + * This defines the valid range of packets that can be extracted with + * opus_repacketizer_out_range() or opus_repacketizer_out(). + * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state containing the + * frames. + * @returns The total number of frames contained in the packet data submitted + * to the repacketizer state. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp) OPUS_ARG_NONNULL(1); + +/** Construct a new packet from data previously submitted to the repacketizer + * state via opus_repacketizer_cat(). + * This is a convenience routine that returns all the data submitted so far + * in a single packet. + * It is equivalent to calling + * @code + * opus_repacketizer_out_range(rp, 0, opus_repacketizer_get_nb_frames(rp), + * data, maxlen) + * @endcode + * @param rp <tt>OpusRepacketizer*</tt>: The repacketizer state from which to + * construct the new packet. + * @param[out] data <tt>const unsigned char*</tt>: The buffer in which to + * store the output packet. + * @param maxlen <tt>opus_int32</tt>: The maximum number of bytes to store in + * the output buffer. In order to guarantee + * success, this should be at least + * <code>1277*opus_repacketizer_get_nb_frames(rp)</code>. + * However, + * <code>1*opus_repacketizer_get_nb_frames(rp)</code> + * plus the size of all packet data + * submitted to the repacketizer since the + * last call to opus_repacketizer_init() or + * opus_repacketizer_create() is also + * sufficient, and possibly much smaller. + * @returns The total size of the output packet on success, or an error code + * on failure. + * @retval #OPUS_BUFFER_TOO_SMALL \a maxlen was insufficient to contain the + * complete output packet. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) OPUS_ARG_NONNULL(1); + +/** Pads a given Opus packet to a larger size (possibly changing the TOC sequence). + * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the + * packet to pad. + * @param len <tt>opus_int32</tt>: The size of the packet. + * This must be at least 1. + * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding. + * This must be at least as large as len. + * @returns an error code + * @retval #OPUS_OK \a on success. + * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len. + * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. + */ +OPUS_EXPORT int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len); + +/** Remove all padding from a given Opus packet and rewrite the TOC sequence to + * minimize space usage. + * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the + * packet to strip. + * @param len <tt>opus_int32</tt>: The size of the packet. + * This must be at least 1. + * @returns The new size of the output packet on success, or an error code + * on failure. + * @retval #OPUS_BAD_ARG \a len was less than 1. + * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len); + +/** Pads a given Opus multi-stream packet to a larger size (possibly changing the TOC sequence). + * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the + * packet to pad. + * @param len <tt>opus_int32</tt>: The size of the packet. + * This must be at least 1. + * @param new_len <tt>opus_int32</tt>: The desired size of the packet after padding. + * This must be at least 1. + * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet. + * This must be at least as large as len. + * @returns an error code + * @retval #OPUS_OK \a on success. + * @retval #OPUS_BAD_ARG \a len was less than 1. + * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. + */ +OPUS_EXPORT int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams); + +/** Remove all padding from a given Opus multi-stream packet and rewrite the TOC sequence to + * minimize space usage. + * @param[in,out] data <tt>const unsigned char*</tt>: The buffer containing the + * packet to strip. + * @param len <tt>opus_int32</tt>: The size of the packet. + * This must be at least 1. + * @param nb_streams <tt>opus_int32</tt>: The number of streams (not channels) in the packet. + * This must be at least 1. + * @returns The new size of the output packet on success, or an error code + * on failure. + * @retval #OPUS_BAD_ARG \a len was less than 1 or new_len was less than len. + * @retval #OPUS_INVALID_PACKET \a data did not contain a valid Opus packet. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams); + +/**@}*/ + +#ifdef __cplusplus +} +#endif + +#endif /* OPUS_H */ diff --git a/drivers/opus/opus_compare.c b/drivers/opus/opus_compare.c new file mode 100644 index 0000000000..06c67d752f --- /dev/null +++ b/drivers/opus/opus_compare.c @@ -0,0 +1,379 @@ +/* Copyright (c) 2011-2012 Xiph.Org Foundation, Mozilla Corporation + Written by Jean-Marc Valin and Timothy B. Terriberry */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +#define OPUS_PI (3.14159265F) + +#define OPUS_COSF(_x) ((float)cos(_x)) +#define OPUS_SINF(_x) ((float)sin(_x)) + +static void *check_alloc(void *_ptr){ + if(_ptr==NULL){ + fprintf(stderr,"Out of memory.\n"); + exit(EXIT_FAILURE); + } + return _ptr; +} + +static void *opus_malloc(size_t _size){ + return check_alloc(malloc(_size)); +} + +static void *opus_realloc(void *_ptr,size_t _size){ + return check_alloc(realloc(_ptr,_size)); +} + +static size_t read_pcm16(float **_samples,FILE *_fin,int _nchannels){ + unsigned char buf[1024]; + float *samples; + size_t nsamples; + size_t csamples; + size_t xi; + size_t nread; + samples=NULL; + nsamples=csamples=0; + for(;;){ + nread=fread(buf,2*_nchannels,1024/(2*_nchannels),_fin); + if(nread<=0)break; + if(nsamples+nread>csamples){ + do csamples=csamples<<1|1; + while(nsamples+nread>csamples); + samples=(float *)opus_realloc(samples, + _nchannels*csamples*sizeof(*samples)); + } + for(xi=0;xi<nread;xi++){ + int ci; + for(ci=0;ci<_nchannels;ci++){ + int s; + s=buf[2*(xi*_nchannels+ci)+1]<<8|buf[2*(xi*_nchannels+ci)]; + s=((s&0xFFFF)^0x8000)-0x8000; + samples[(nsamples+xi)*_nchannels+ci]=s; + } + } + nsamples+=nread; + } + *_samples=(float *)opus_realloc(samples, + _nchannels*nsamples*sizeof(*samples)); + return nsamples; +} + +static void band_energy(float *_out,float *_ps,const int *_bands,int _nbands, + const float *_in,int _nchannels,size_t _nframes,int _window_sz, + int _step,int _downsample){ + float *window; + float *x; + float *c; + float *s; + size_t xi; + int xj; + int ps_sz; + window=(float *)opus_malloc((3+_nchannels)*_window_sz*sizeof(*window)); + c=window+_window_sz; + s=c+_window_sz; + x=s+_window_sz; + ps_sz=_window_sz/2; + for(xj=0;xj<_window_sz;xj++){ + window[xj]=0.5F-0.5F*OPUS_COSF((2*OPUS_PI/(_window_sz-1))*xj); + } + for(xj=0;xj<_window_sz;xj++){ + c[xj]=OPUS_COSF((2*OPUS_PI/_window_sz)*xj); + } + for(xj=0;xj<_window_sz;xj++){ + s[xj]=OPUS_SINF((2*OPUS_PI/_window_sz)*xj); + } + for(xi=0;xi<_nframes;xi++){ + int ci; + int xk; + int bi; + for(ci=0;ci<_nchannels;ci++){ + for(xk=0;xk<_window_sz;xk++){ + x[ci*_window_sz+xk]=window[xk]*_in[(xi*_step+xk)*_nchannels+ci]; + } + } + for(bi=xj=0;bi<_nbands;bi++){ + float p[2]={0}; + for(;xj<_bands[bi+1];xj++){ + for(ci=0;ci<_nchannels;ci++){ + float re; + float im; + int ti; + ti=0; + re=im=0; + for(xk=0;xk<_window_sz;xk++){ + re+=c[ti]*x[ci*_window_sz+xk]; + im-=s[ti]*x[ci*_window_sz+xk]; + ti+=xj; + if(ti>=_window_sz)ti-=_window_sz; + } + re*=_downsample; + im*=_downsample; + _ps[(xi*ps_sz+xj)*_nchannels+ci]=re*re+im*im+100000; + p[ci]+=_ps[(xi*ps_sz+xj)*_nchannels+ci]; + } + } + if(_out){ + _out[(xi*_nbands+bi)*_nchannels]=p[0]/(_bands[bi+1]-_bands[bi]); + if(_nchannels==2){ + _out[(xi*_nbands+bi)*_nchannels+1]=p[1]/(_bands[bi+1]-_bands[bi]); + } + } + } + } + free(window); +} + +#define NBANDS (21) +#define NFREQS (240) + +/*Bands on which we compute the pseudo-NMR (Bark-derived + CELT bands).*/ +static const int BANDS[NBANDS+1]={ + 0,2,4,6,8,10,12,14,16,20,24,28,32,40,48,56,68,80,96,120,156,200 +}; + +#define TEST_WIN_SIZE (480) +#define TEST_WIN_STEP (120) + +int main(int _argc,const char **_argv){ + FILE *fin1; + FILE *fin2; + float *x; + float *y; + float *xb; + float *X; + float *Y; + double err; + float Q; + size_t xlength; + size_t ylength; + size_t nframes; + size_t xi; + int ci; + int xj; + int bi; + int nchannels; + unsigned rate; + int downsample; + int ybands; + int yfreqs; + int max_compare; + if(_argc<3||_argc>6){ + fprintf(stderr,"Usage: %s [-s] [-r rate2] <file1.sw> <file2.sw>\n", + _argv[0]); + return EXIT_FAILURE; + } + nchannels=1; + if(strcmp(_argv[1],"-s")==0){ + nchannels=2; + _argv++; + } + rate=48000; + ybands=NBANDS; + yfreqs=NFREQS; + downsample=1; + if(strcmp(_argv[1],"-r")==0){ + rate=atoi(_argv[2]); + if(rate!=8000&&rate!=12000&&rate!=16000&&rate!=24000&&rate!=48000){ + fprintf(stderr, + "Sampling rate must be 8000, 12000, 16000, 24000, or 48000\n"); + return EXIT_FAILURE; + } + downsample=48000/rate; + switch(rate){ + case 8000:ybands=13;break; + case 12000:ybands=15;break; + case 16000:ybands=17;break; + case 24000:ybands=19;break; + } + yfreqs=NFREQS/downsample; + _argv+=2; + } + fin1=fopen(_argv[1],"rb"); + if(fin1==NULL){ + fprintf(stderr,"Error opening '%s'.\n",_argv[1]); + return EXIT_FAILURE; + } + fin2=fopen(_argv[2],"rb"); + if(fin2==NULL){ + fprintf(stderr,"Error opening '%s'.\n",_argv[2]); + fclose(fin1); + return EXIT_FAILURE; + } + /*Read in the data and allocate scratch space.*/ + xlength=read_pcm16(&x,fin1,2); + if(nchannels==1){ + for(xi=0;xi<xlength;xi++)x[xi]=.5*(x[2*xi]+x[2*xi+1]); + } + fclose(fin1); + ylength=read_pcm16(&y,fin2,nchannels); + fclose(fin2); + if(xlength!=ylength*downsample){ + fprintf(stderr,"Sample counts do not match (%lu!=%lu).\n", + (unsigned long)xlength,(unsigned long)ylength*downsample); + return EXIT_FAILURE; + } + if(xlength<TEST_WIN_SIZE){ + fprintf(stderr,"Insufficient sample data (%lu<%i).\n", + (unsigned long)xlength,TEST_WIN_SIZE); + return EXIT_FAILURE; + } + nframes=(xlength-TEST_WIN_SIZE+TEST_WIN_STEP)/TEST_WIN_STEP; + xb=(float *)opus_malloc(nframes*NBANDS*nchannels*sizeof(*xb)); + X=(float *)opus_malloc(nframes*NFREQS*nchannels*sizeof(*X)); + Y=(float *)opus_malloc(nframes*yfreqs*nchannels*sizeof(*Y)); + /*Compute the per-band spectral energy of the original signal + and the error.*/ + band_energy(xb,X,BANDS,NBANDS,x,nchannels,nframes, + TEST_WIN_SIZE,TEST_WIN_STEP,1); + free(x); + band_energy(NULL,Y,BANDS,ybands,y,nchannels,nframes, + TEST_WIN_SIZE/downsample,TEST_WIN_STEP/downsample,downsample); + free(y); + for(xi=0;xi<nframes;xi++){ + /*Frequency masking (low to high): 10 dB/Bark slope.*/ + for(bi=1;bi<NBANDS;bi++){ + for(ci=0;ci<nchannels;ci++){ + xb[(xi*NBANDS+bi)*nchannels+ci]+= + 0.1F*xb[(xi*NBANDS+bi-1)*nchannels+ci]; + } + } + /*Frequency masking (high to low): 15 dB/Bark slope.*/ + for(bi=NBANDS-1;bi-->0;){ + for(ci=0;ci<nchannels;ci++){ + xb[(xi*NBANDS+bi)*nchannels+ci]+= + 0.03F*xb[(xi*NBANDS+bi+1)*nchannels+ci]; + } + } + if(xi>0){ + /*Temporal masking: -3 dB/2.5ms slope.*/ + for(bi=0;bi<NBANDS;bi++){ + for(ci=0;ci<nchannels;ci++){ + xb[(xi*NBANDS+bi)*nchannels+ci]+= + 0.5F*xb[((xi-1)*NBANDS+bi)*nchannels+ci]; + } + } + } + /* Allowing some cross-talk */ + if(nchannels==2){ + for(bi=0;bi<NBANDS;bi++){ + float l,r; + l=xb[(xi*NBANDS+bi)*nchannels+0]; + r=xb[(xi*NBANDS+bi)*nchannels+1]; + xb[(xi*NBANDS+bi)*nchannels+0]+=0.01F*r; + xb[(xi*NBANDS+bi)*nchannels+1]+=0.01F*l; + } + } + + /* Apply masking */ + for(bi=0;bi<ybands;bi++){ + for(xj=BANDS[bi];xj<BANDS[bi+1];xj++){ + for(ci=0;ci<nchannels;ci++){ + X[(xi*NFREQS+xj)*nchannels+ci]+= + 0.1F*xb[(xi*NBANDS+bi)*nchannels+ci]; + Y[(xi*yfreqs+xj)*nchannels+ci]+= + 0.1F*xb[(xi*NBANDS+bi)*nchannels+ci]; + } + } + } + } + + /* Average of consecutive frames to make comparison slightly less sensitive */ + for(bi=0;bi<ybands;bi++){ + for(xj=BANDS[bi];xj<BANDS[bi+1];xj++){ + for(ci=0;ci<nchannels;ci++){ + float xtmp; + float ytmp; + xtmp = X[xj*nchannels+ci]; + ytmp = Y[xj*nchannels+ci]; + for(xi=1;xi<nframes;xi++){ + float xtmp2; + float ytmp2; + xtmp2 = X[(xi*NFREQS+xj)*nchannels+ci]; + ytmp2 = Y[(xi*yfreqs+xj)*nchannels+ci]; + X[(xi*NFREQS+xj)*nchannels+ci] += xtmp; + Y[(xi*yfreqs+xj)*nchannels+ci] += ytmp; + xtmp = xtmp2; + ytmp = ytmp2; + } + } + } + } + + /*If working at a lower sampling rate, don't take into account the last + 300 Hz to allow for different transition bands. + For 12 kHz, we don't skip anything, because the last band already skips + 400 Hz.*/ + if(rate==48000)max_compare=BANDS[NBANDS]; + else if(rate==12000)max_compare=BANDS[ybands]; + else max_compare=BANDS[ybands]-3; + err=0; + for(xi=0;xi<nframes;xi++){ + double Ef; + Ef=0; + for(bi=0;bi<ybands;bi++){ + double Eb; + Eb=0; + for(xj=BANDS[bi];xj<BANDS[bi+1]&&xj<max_compare;xj++){ + for(ci=0;ci<nchannels;ci++){ + float re; + float im; + re=Y[(xi*yfreqs+xj)*nchannels+ci]/X[(xi*NFREQS+xj)*nchannels+ci]; + im=re-log(re)-1; + /*Make comparison less sensitive around the SILK/CELT cross-over to + allow for mode freedom in the filters.*/ + if(xj>=79&&xj<=81)im*=0.1F; + if(xj==80)im*=0.1F; + Eb+=im; + } + } + Eb /= (BANDS[bi+1]-BANDS[bi])*nchannels; + Ef += Eb*Eb; + } + /*Using a fixed normalization value means we're willing to accept slightly + lower quality for lower sampling rates.*/ + Ef/=NBANDS; + Ef*=Ef; + err+=Ef*Ef; + } + err=pow(err/nframes,1.0/16); + Q=100*(1-0.5*log(1+err)/log(1.13)); + if(Q<0){ + fprintf(stderr,"Test vector FAILS\n"); + fprintf(stderr,"Internal weighted error is %f\n",err); + return EXIT_FAILURE; + } + else{ + fprintf(stderr,"Test vector PASSES\n"); + fprintf(stderr, + "Opus quality metric: %.1f %% (internal weighted error is %f)\n",Q,err); + return EXIT_SUCCESS; + } +} diff --git a/drivers/opus/opus_config.h b/drivers/opus/opus_config.h new file mode 100644 index 0000000000..c6470e92c3 --- /dev/null +++ b/drivers/opus/opus_config.h @@ -0,0 +1,121 @@ +/* Opus configuration header */ +/* Based on the output of libopus configure script */ + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you have the <inttypes.h> header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `lrint' function. */ +#define HAVE_LRINT 1 + +/* Define to 1 if you have the `lrintf' function. */ +#define HAVE_LRINTF 1 + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <unistd.h> header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#define LT_OBJDIR ".libs/" + +#ifdef OPUS_ARM_OPT +/* Make use of ARM asm optimization */ +#define OPUS_ARM_ASM 1 + +/* Use generic ARMv4 inline asm optimizations */ +#define OPUS_ARM_INLINE_ASM 1 + +/* Use ARMv5E inline asm optimizations */ +#define OPUS_ARM_INLINE_EDSP 1 + +/* Use ARMv6 inline asm optimizations */ +#define OPUS_ARM_INLINE_MEDIA 1 + +/* Use ARM NEON inline asm optimizations */ +#define OPUS_ARM_INLINE_NEON 1 + +/* Define if assembler supports EDSP instructions */ +#define OPUS_ARM_MAY_HAVE_EDSP 1 + +/* Define if assembler supports ARMv6 media instructions */ +#define OPUS_ARM_MAY_HAVE_MEDIA 1 + +/* Define if compiler supports NEON instructions */ +#define OPUS_ARM_MAY_HAVE_NEON 1 +#endif // OPUS_ARM_OPT + +#ifdef OPUS_ARM64_OPT +/* Make use of ARM asm optimization */ +#define OPUS_ARM_ASM 1 + +/* Use ARMv6 inline asm optimizations */ +#define OPUS_ARM_INLINE_MEDIA 1 // work + +/* Use ARM NEON inline asm optimizations */ +#define OPUS_ARM_INLINE_NEON 1 // work + +/* Define if assembler supports EDSP instructions */ +#define OPUS_ARM_MAY_HAVE_EDSP 1 // work + +/* Define if assembler supports ARMv6 media instructions */ +#define OPUS_ARM_MAY_HAVE_MEDIA 1 // work + +/* Define if compiler supports NEON instructions */ +#define OPUS_ARM_MAY_HAVE_NEON 1 + +#endif // OPUS_ARM64_OPT + +/* This is a build of OPUS */ +#define OPUS_BUILD /**/ + +#ifndef WIN32 + /* Use C99 variable-size arrays */ + #define VAR_ARRAYS 1 +#else + /* Fixes VS 2013 compile error */ + #define USE_ALLOCA 1 +#endif + + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* #undef inline */ +#endif + +/* Define to the equivalent of the C99 'restrict' keyword, or to + nothing if this is not supported. Do not define if restrict is + supported directly. */ +#define restrict __restrict +/* Work around a bug in Sun C++: it does not support _Restrict or + __restrict__, even though the corresponding Sun C compiler ends up with + "#define restrict _Restrict" or "#define restrict __restrict__" in the + previous line. Perhaps some future version of Sun C++ will work with + restrict; if so, hopefully it defines __RESTRICT like Sun C does. */ +#if defined __SUNPRO_CC && !defined __RESTRICT +# define _Restrict +# define __restrict__ +#endif diff --git a/drivers/opus/opus_custom.h b/drivers/opus/opus_custom.h new file mode 100644 index 0000000000..41f36bf2fb --- /dev/null +++ b/drivers/opus/opus_custom.h @@ -0,0 +1,342 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Copyright (c) 2008-2012 Gregory Maxwell + Written by Jean-Marc Valin and Gregory Maxwell */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** + @file opus_custom.h + @brief Opus-Custom reference implementation API + */ + +#ifndef OPUS_CUSTOM_H +#define OPUS_CUSTOM_H + +#include "opus_defines.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef CUSTOM_MODES +# define OPUS_CUSTOM_EXPORT OPUS_EXPORT +# define OPUS_CUSTOM_EXPORT_STATIC OPUS_EXPORT +#else +# define OPUS_CUSTOM_EXPORT +# ifdef OPUS_BUILD +# define OPUS_CUSTOM_EXPORT_STATIC static OPUS_INLINE +# else +# define OPUS_CUSTOM_EXPORT_STATIC +# endif +#endif + +/** @defgroup opus_custom Opus Custom + * @{ + * Opus Custom is an optional part of the Opus specification and + * reference implementation which uses a distinct API from the regular + * API and supports frame sizes that are not normally supported.\ Use + * of Opus Custom is discouraged for all but very special applications + * for which a frame size different from 2.5, 5, 10, or 20 ms is needed + * (for either complexity or latency reasons) and where interoperability + * is less important. + * + * In addition to the interoperability limitations the use of Opus custom + * disables a substantial chunk of the codec and generally lowers the + * quality available at a given bitrate. Normally when an application needs + * a different frame size from the codec it should buffer to match the + * sizes but this adds a small amount of delay which may be important + * in some very low latency applications. Some transports (especially + * constant rate RF transports) may also work best with frames of + * particular durations. + * + * Libopus only supports custom modes if they are enabled at compile time. + * + * The Opus Custom API is similar to the regular API but the + * @ref opus_encoder_create and @ref opus_decoder_create calls take + * an additional mode parameter which is a structure produced by + * a call to @ref opus_custom_mode_create. Both the encoder and decoder + * must create a mode using the same sample rate (fs) and frame size + * (frame size) so these parameters must either be signaled out of band + * or fixed in a particular implementation. + * + * Similar to regular Opus the custom modes support on the fly frame size + * switching, but the sizes available depend on the particular frame size in + * use. For some initial frame sizes on a single on the fly size is available. + */ + +/** Contains the state of an encoder. One encoder state is needed + for each stream. It is initialized once at the beginning of the + stream. Do *not* re-initialize the state for every frame. + @brief Encoder state + */ +typedef struct OpusCustomEncoder OpusCustomEncoder; + +/** State of the decoder. One decoder state is needed for each stream. + It is initialized once at the beginning of the stream. Do *not* + re-initialize the state for every frame. + @brief Decoder state + */ +typedef struct OpusCustomDecoder OpusCustomDecoder; + +/** The mode contains all the information necessary to create an + encoder. Both the encoder and decoder need to be initialized + with exactly the same mode, otherwise the output will be + corrupted. + @brief Mode configuration + */ +typedef struct OpusCustomMode OpusCustomMode; + +/** Creates a new mode struct. This will be passed to an encoder or + * decoder. The mode MUST NOT BE DESTROYED until the encoders and + * decoders that use it are destroyed as well. + * @param [in] Fs <tt>int</tt>: Sampling rate (8000 to 96000 Hz) + * @param [in] frame_size <tt>int</tt>: Number of samples (per channel) to encode in each + * packet (64 - 1024, prime factorization must contain zero or more 2s, 3s, or 5s and no other primes) + * @param [out] error <tt>int*</tt>: Returned error code (if NULL, no error will be returned) + * @return A newly created mode + */ +OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error); + +/** Destroys a mode struct. Only call this after all encoders and + * decoders using this mode are destroyed as well. + * @param [in] mode <tt>OpusCustomMode*</tt>: Mode to be freed. + */ +OPUS_CUSTOM_EXPORT void opus_custom_mode_destroy(OpusCustomMode *mode); + + +#if !defined(OPUS_BUILD) || defined(CELT_ENCODER_C) + +/* Encoder */ +/** Gets the size of an OpusCustomEncoder structure. + * @param [in] mode <tt>OpusCustomMode *</tt>: Mode configuration + * @param [in] channels <tt>int</tt>: Number of channels + * @returns size + */ +OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_encoder_get_size( + const OpusCustomMode *mode, + int channels +) OPUS_ARG_NONNULL(1); + +# ifdef CUSTOM_MODES +/** Initializes a previously allocated encoder state + * The memory pointed to by st must be the size returned by opus_custom_encoder_get_size. + * This is intended for applications which use their own allocator instead of malloc. + * @see opus_custom_encoder_create(),opus_custom_encoder_get_size() + * To reset a previously initialized state use the OPUS_RESET_STATE CTL. + * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state + * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of + * the stream (must be the same characteristics as used for the + * decoder) + * @param [in] channels <tt>int</tt>: Number of channels + * @return OPUS_OK Success or @ref opus_errorcodes + */ +OPUS_CUSTOM_EXPORT int opus_custom_encoder_init( + OpusCustomEncoder *st, + const OpusCustomMode *mode, + int channels +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); +# endif +#endif + + +/** Creates a new encoder state. Each stream needs its own encoder + * state (can't be shared across simultaneous streams). + * @param [in] mode <tt>OpusCustomMode*</tt>: Contains all the information about the characteristics of + * the stream (must be the same characteristics as used for the + * decoder) + * @param [in] channels <tt>int</tt>: Number of channels + * @param [out] error <tt>int*</tt>: Returns an error code + * @return Newly created encoder state. +*/ +OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomEncoder *opus_custom_encoder_create( + const OpusCustomMode *mode, + int channels, + int *error +) OPUS_ARG_NONNULL(1); + + +/** Destroys a an encoder state. + * @param[in] st <tt>OpusCustomEncoder*</tt>: State to be freed. + */ +OPUS_CUSTOM_EXPORT void opus_custom_encoder_destroy(OpusCustomEncoder *st); + +/** Encodes a frame of audio. + * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state + * @param [in] pcm <tt>float*</tt>: PCM audio in float format, with a normal range of +/-1.0. + * Samples with a range beyond +/-1.0 are supported but will + * be clipped by decoders using the integer API and should + * only be used if it is known that the far end supports + * extended dynamic range. There must be exactly + * frame_size samples per channel. + * @param [in] frame_size <tt>int</tt>: Number of samples per frame of input signal + * @param [out] compressed <tt>char *</tt>: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long. + * @param [in] maxCompressedBytes <tt>int</tt>: Maximum number of bytes to use for compressing the frame + * (can change from one frame to another) + * @return Number of bytes written to "compressed". + * If negative, an error has occurred (see error codes). It is IMPORTANT that + * the length returned be somehow transmitted to the decoder. Otherwise, no + * decoding is possible. + */ +OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode_float( + OpusCustomEncoder *st, + const float *pcm, + int frame_size, + unsigned char *compressed, + int maxCompressedBytes +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); + +/** Encodes a frame of audio. + * @param [in] st <tt>OpusCustomEncoder*</tt>: Encoder state + * @param [in] pcm <tt>opus_int16*</tt>: PCM audio in signed 16-bit format (native endian). + * There must be exactly frame_size samples per channel. + * @param [in] frame_size <tt>int</tt>: Number of samples per frame of input signal + * @param [out] compressed <tt>char *</tt>: The compressed data is written here. This may not alias pcm and must be at least maxCompressedBytes long. + * @param [in] maxCompressedBytes <tt>int</tt>: Maximum number of bytes to use for compressing the frame + * (can change from one frame to another) + * @return Number of bytes written to "compressed". + * If negative, an error has occurred (see error codes). It is IMPORTANT that + * the length returned be somehow transmitted to the decoder. Otherwise, no + * decoding is possible. + */ +OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_encode( + OpusCustomEncoder *st, + const opus_int16 *pcm, + int frame_size, + unsigned char *compressed, + int maxCompressedBytes +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); + +/** Perform a CTL function on an Opus custom encoder. + * + * Generally the request and subsequent arguments are generated + * by a convenience macro. + * @see opus_encoderctls + */ +OPUS_CUSTOM_EXPORT int opus_custom_encoder_ctl(OpusCustomEncoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1); + + +#if !defined(OPUS_BUILD) || defined(CELT_DECODER_C) +/* Decoder */ + +/** Gets the size of an OpusCustomDecoder structure. + * @param [in] mode <tt>OpusCustomMode *</tt>: Mode configuration + * @param [in] channels <tt>int</tt>: Number of channels + * @returns size + */ +OPUS_CUSTOM_EXPORT_STATIC OPUS_WARN_UNUSED_RESULT int opus_custom_decoder_get_size( + const OpusCustomMode *mode, + int channels +) OPUS_ARG_NONNULL(1); + +/** Initializes a previously allocated decoder state + * The memory pointed to by st must be the size returned by opus_custom_decoder_get_size. + * This is intended for applications which use their own allocator instead of malloc. + * @see opus_custom_decoder_create(),opus_custom_decoder_get_size() + * To reset a previously initialized state use the OPUS_RESET_STATE CTL. + * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state + * @param [in] mode <tt>OpusCustomMode *</tt>: Contains all the information about the characteristics of + * the stream (must be the same characteristics as used for the + * encoder) + * @param [in] channels <tt>int</tt>: Number of channels + * @return OPUS_OK Success or @ref opus_errorcodes + */ +OPUS_CUSTOM_EXPORT_STATIC int opus_custom_decoder_init( + OpusCustomDecoder *st, + const OpusCustomMode *mode, + int channels +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2); + +#endif + + +/** Creates a new decoder state. Each stream needs its own decoder state (can't + * be shared across simultaneous streams). + * @param [in] mode <tt>OpusCustomMode</tt>: Contains all the information about the characteristics of the + * stream (must be the same characteristics as used for the encoder) + * @param [in] channels <tt>int</tt>: Number of channels + * @param [out] error <tt>int*</tt>: Returns an error code + * @return Newly created decoder state. + */ +OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT OpusCustomDecoder *opus_custom_decoder_create( + const OpusCustomMode *mode, + int channels, + int *error +) OPUS_ARG_NONNULL(1); + +/** Destroys a an decoder state. + * @param[in] st <tt>OpusCustomDecoder*</tt>: State to be freed. + */ +OPUS_CUSTOM_EXPORT void opus_custom_decoder_destroy(OpusCustomDecoder *st); + +/** Decode an opus custom frame with floating point output + * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state + * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss + * @param [in] len <tt>int</tt>: Number of bytes in payload + * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length + * is frame_size*channels*sizeof(float) + * @param [in] frame_size Number of samples per channel of available space in *pcm. + * @returns Number of decoded samples or @ref opus_errorcodes + */ +OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode_float( + OpusCustomDecoder *st, + const unsigned char *data, + int len, + float *pcm, + int frame_size +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); + +/** Decode an opus custom frame + * @param [in] st <tt>OpusCustomDecoder*</tt>: Decoder state + * @param [in] data <tt>char*</tt>: Input payload. Use a NULL pointer to indicate packet loss + * @param [in] len <tt>int</tt>: Number of bytes in payload + * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length + * is frame_size*channels*sizeof(opus_int16) + * @param [in] frame_size Number of samples per channel of available space in *pcm. + * @returns Number of decoded samples or @ref opus_errorcodes + */ +OPUS_CUSTOM_EXPORT OPUS_WARN_UNUSED_RESULT int opus_custom_decode( + OpusCustomDecoder *st, + const unsigned char *data, + int len, + opus_int16 *pcm, + int frame_size +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); + +/** Perform a CTL function on an Opus custom decoder. + * + * Generally the request and subsequent arguments are generated + * by a convenience macro. + * @see opus_genericctls + */ +OPUS_CUSTOM_EXPORT int opus_custom_decoder_ctl(OpusCustomDecoder * OPUS_RESTRICT st, int request, ...) OPUS_ARG_NONNULL(1); + +/**@}*/ + +#ifdef __cplusplus +} +#endif + +#endif /* OPUS_CUSTOM_H */ diff --git a/drivers/opus/opus_decoder.c b/drivers/opus/opus_decoder.c new file mode 100644 index 0000000000..c5d4cc6aaa --- /dev/null +++ b/drivers/opus/opus_decoder.c @@ -0,0 +1,970 @@ +/* Copyright (c) 2010 Xiph.Org Foundation, Skype Limited + Written by Jean-Marc Valin and Koen Vos */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +# include "opus_config.h" +#endif + +#ifndef OPUS_BUILD +# error "OPUS_BUILD _MUST_ be defined to build Opus. This probably means you need other defines as well, as in a config.h. See the included build files for details." +#endif + +#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) +# pragma message "You appear to be compiling without optimization, if so opus will be very slow." +#endif + +#include <stdarg.h> +#include "celt.h" +#include "opus.h" +#include "entdec.h" +#include "opus_modes.h" +#include "API.h" +#include "stack_alloc.h" +#include "float_cast.h" +#include "opus_private.h" +#include "os_support.h" +#include "structs.h" +#include "define.h" +#include "mathops.h" +#include "cpu_support.h" + +struct OpusDecoder { + int celt_dec_offset; + int silk_dec_offset; + int channels; + opus_int32 Fs; /** Sampling rate (at the API level) */ + silk_DecControlStruct DecControl; + int decode_gain; + + /* Everything beyond this point gets cleared on a reset */ +#define OPUS_DECODER_RESET_START stream_channels + int stream_channels; + + int bandwidth; + int mode; + int prev_mode; + int frame_size; + int prev_redundancy; + int last_packet_duration; +#ifndef OPUS_FIXED_POINT + opus_val16 softclip_mem[2]; +#endif + + opus_uint32 rangeFinal; +}; + +#ifdef OPUS_FIXED_POINT +static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { + return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; +} +#endif + + +int opus_decoder_get_size(int channels) +{ + int silkDecSizeBytes, celtDecSizeBytes; + int ret; + if (channels<1 || channels > 2) + return 0; + ret = silk_Get_Decoder_Size( &silkDecSizeBytes ); + if(ret) + return 0; + silkDecSizeBytes = align(silkDecSizeBytes); + celtDecSizeBytes = celt_decoder_get_size(channels); + return align(sizeof(OpusDecoder))+silkDecSizeBytes+celtDecSizeBytes; +} + +int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels) +{ + void *silk_dec; + CELTDecoder *celt_dec; + int ret, silkDecSizeBytes; + + if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000) + || (channels!=1&&channels!=2)) + return OPUS_BAD_ARG; + + OPUS_CLEAR((char*)st, opus_decoder_get_size(channels)); + /* Initialize SILK encoder */ + ret = silk_Get_Decoder_Size(&silkDecSizeBytes); + if (ret) + return OPUS_INTERNAL_ERROR; + + silkDecSizeBytes = align(silkDecSizeBytes); + st->silk_dec_offset = align(sizeof(OpusDecoder)); + st->celt_dec_offset = st->silk_dec_offset+silkDecSizeBytes; + silk_dec = (char*)st+st->silk_dec_offset; + celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset); + st->stream_channels = st->channels = channels; + + st->Fs = Fs; + st->DecControl.API_sampleRate = st->Fs; + st->DecControl.nChannelsAPI = st->channels; + + /* Reset decoder */ + ret = silk_InitDecoder( silk_dec ); + if(ret)return OPUS_INTERNAL_ERROR; + + /* Initialize CELT decoder */ + ret = celt_decoder_init(celt_dec, Fs, channels); + if(ret!=OPUS_OK)return OPUS_INTERNAL_ERROR; + + celt_decoder_ctl(celt_dec, CELT_SET_SIGNALLING(0)); + + st->prev_mode = 0; + st->frame_size = Fs/400; + return OPUS_OK; +} + +OpusDecoder *opus_decoder_create(opus_int32 Fs, int channels, int *error) +{ + int ret; + OpusDecoder *st; + if ((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000) + || (channels!=1&&channels!=2)) + { + if (error) + *error = OPUS_BAD_ARG; + return NULL; + } + st = (OpusDecoder *)opus_alloc(opus_decoder_get_size(channels)); + if (st == NULL) + { + if (error) + *error = OPUS_ALLOC_FAIL; + return NULL; + } + ret = opus_decoder_init(st, Fs, channels); + if (error) + *error = ret; + if (ret != OPUS_OK) + { + opus_free(st); + st = NULL; + } + return st; +} + +static void smooth_fade(const opus_val16 *in1, const opus_val16 *in2, + opus_val16 *out, int overlap, int channels, + const opus_val16 *window, opus_int32 Fs) +{ + int i, c; + int inc = 48000/Fs; + for (c=0;c<channels;c++) + { + for (i=0;i<overlap;i++) + { + opus_val16 w = MULT16_16_Q15(window[i*inc], window[i*inc]); + out[i*channels+c] = SHR32(MAC16_16(MULT16_16(w,in2[i*channels+c]), + Q15ONE-w, in1[i*channels+c]), 15); + } + } +} + +static int opus_packet_get_mode(const unsigned char *data) +{ + int mode; + if (data[0]&0x80) + { + mode = MODE_CELT_ONLY; + } else if ((data[0]&0x60) == 0x60) + { + mode = MODE_HYBRID; + } else { + mode = MODE_SILK_ONLY; + } + return mode; +} + +static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, + opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) +{ + void *silk_dec; + CELTDecoder *celt_dec; + int i, silk_ret=0, celt_ret=0; + ec_dec dec; + opus_int32 silk_frame_size; + int pcm_silk_size; + VARDECL(opus_int16, pcm_silk); + int pcm_transition_silk_size; + VARDECL(opus_val16, pcm_transition_silk); + int pcm_transition_celt_size; + VARDECL(opus_val16, pcm_transition_celt); + opus_val16 *pcm_transition; + int redundant_audio_size; + VARDECL(opus_val16, redundant_audio); + + int audiosize; + int mode; + int transition=0; + int start_band; + int redundancy=0; + int redundancy_bytes = 0; + int celt_to_silk=0; + int c; + int F2_5, F5, F10, F20; + const opus_val16 *window; + opus_uint32 redundant_rng = 0; + ALLOC_STACK; + + silk_dec = (char*)st+st->silk_dec_offset; + celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset); + F20 = st->Fs/50; + F10 = F20>>1; + F5 = F10>>1; + F2_5 = F5>>1; + if (frame_size < F2_5) + { + RESTORE_STACK; + return OPUS_BUFFER_TOO_SMALL; + } + /* Limit frame_size to avoid excessive stack allocations. */ + frame_size = IMIN(frame_size, st->Fs/25*3); + /* Payloads of 1 (2 including ToC) or 0 trigger the PLC/DTX */ + if (len<=1) + { + data = NULL; + /* In that case, don't conceal more than what the ToC says */ + frame_size = IMIN(frame_size, st->frame_size); + } + if (data != NULL) + { + audiosize = st->frame_size; + mode = st->mode; + ec_dec_init(&dec,(unsigned char*)data,len); + } else { + audiosize = frame_size; + mode = st->prev_mode; + + if (mode == 0) + { + /* If we haven't got any packet yet, all we can do is return zeros */ + for (i=0;i<audiosize*st->channels;i++) + pcm[i] = 0; + RESTORE_STACK; + return audiosize; + } + + /* Avoids trying to run the PLC on sizes other than 2.5 (CELT), 5 (CELT), + 10, or 20 (e.g. 12.5 or 30 ms). */ + if (audiosize > F20) + { + do { + int ret = opus_decode_frame(st, NULL, 0, pcm, IMIN(audiosize, F20), 0); + if (ret<0) + { + RESTORE_STACK; + return ret; + } + pcm += ret*st->channels; + audiosize -= ret; + } while (audiosize > 0); + RESTORE_STACK; + return frame_size; + } else if (audiosize < F20) + { + if (audiosize > F10) + audiosize = F10; + else if (mode != MODE_SILK_ONLY && audiosize > F5 && audiosize < F10) + audiosize = F5; + } + } + + pcm_transition_silk_size = ALLOC_NONE; + pcm_transition_celt_size = ALLOC_NONE; + if (data!=NULL && st->prev_mode > 0 && ( + (mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY && !st->prev_redundancy) + || (mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ) + ) + { + transition = 1; + /* Decide where to allocate the stack memory for pcm_transition */ + if (mode == MODE_CELT_ONLY) + pcm_transition_celt_size = F5*st->channels; + else + pcm_transition_silk_size = F5*st->channels; + } + ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16); + if (transition && mode == MODE_CELT_ONLY) + { + pcm_transition = pcm_transition_celt; + opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); + } + if (audiosize > frame_size) + { + /*fprintf(stderr, "PCM buffer too small: %d vs %d (mode = %d)\n", audiosize, frame_size, mode);*/ + RESTORE_STACK; + return OPUS_BAD_ARG; + } else { + frame_size = audiosize; + } + + /* Don't allocate any memory when in CELT-only mode */ + pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; + ALLOC(pcm_silk, pcm_silk_size, opus_int16); + + /* SILK processing */ + if (mode != MODE_CELT_ONLY) + { + int lost_flag, decoded_samples; + opus_int16 *pcm_ptr = pcm_silk; + + if (st->prev_mode==MODE_CELT_ONLY) + silk_InitDecoder( silk_dec ); + + /* The SILK PLC cannot produce frames of less than 10 ms */ + st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs); + + if (data != NULL) + { + st->DecControl.nChannelsInternal = st->stream_channels; + if( mode == MODE_SILK_ONLY ) { + if( st->bandwidth == OPUS_BANDWIDTH_NARROWBAND ) { + st->DecControl.internalSampleRate = 8000; + } else if( st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND ) { + st->DecControl.internalSampleRate = 12000; + } else if( st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ) { + st->DecControl.internalSampleRate = 16000; + } else { + st->DecControl.internalSampleRate = 16000; + silk_assert( 0 ); + } + } else { + /* Hybrid mode */ + st->DecControl.internalSampleRate = 16000; + } + } + + lost_flag = data == NULL ? 1 : 2 * decode_fec; + decoded_samples = 0; + do { + /* Call SILK decoder */ + int first_frame = decoded_samples == 0; + silk_ret = silk_Decode( silk_dec, &st->DecControl, + lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size ); + if( silk_ret ) { + if (lost_flag) { + /* PLC failure should not be fatal */ + silk_frame_size = frame_size; + for (i=0;i<frame_size*st->channels;i++) + pcm_ptr[i] = 0; + } else { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + } + pcm_ptr += silk_frame_size * st->channels; + decoded_samples += silk_frame_size; + } while( decoded_samples < frame_size ); + } + + start_band = 0; + if (!decode_fec && mode != MODE_CELT_ONLY && data != NULL + && ec_tell(&dec)+17+20*(st->mode == MODE_HYBRID) <= 8*len) + { + /* Check if we have a redundant 0-8 kHz band */ + if (mode == MODE_HYBRID) + redundancy = ec_dec_bit_logp(&dec, 12); + else + redundancy = 1; + if (redundancy) + { + celt_to_silk = ec_dec_bit_logp(&dec, 1); + /* redundancy_bytes will be at least two, in the non-hybrid + case due to the ec_tell() check above */ + redundancy_bytes = mode==MODE_HYBRID ? + (opus_int32)ec_dec_uint(&dec, 256)+2 : + len-((ec_tell(&dec)+7)>>3); + len -= redundancy_bytes; + /* This is a sanity check. It should never happen for a valid + packet, so the exact behaviour is not normative. */ + if (len*8 < ec_tell(&dec)) + { + len = 0; + redundancy_bytes = 0; + redundancy = 0; + } + /* Shrink decoder because of raw bits */ + dec.storage -= redundancy_bytes; + } + } + if (mode != MODE_CELT_ONLY) + start_band = 17; + + { + int endband=21; + + switch(st->bandwidth) + { + case OPUS_BANDWIDTH_NARROWBAND: + endband = 13; + break; + case OPUS_BANDWIDTH_MEDIUMBAND: + case OPUS_BANDWIDTH_WIDEBAND: + endband = 17; + break; + case OPUS_BANDWIDTH_SUPERWIDEBAND: + endband = 19; + break; + case OPUS_BANDWIDTH_FULLBAND: + endband = 21; + break; + } + celt_decoder_ctl(celt_dec, CELT_SET_END_BAND(endband)); + celt_decoder_ctl(celt_dec, CELT_SET_CHANNELS(st->stream_channels)); + } + + if (redundancy) + { + transition = 0; + pcm_transition_silk_size=ALLOC_NONE; + } + + ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16); + + if (transition && mode != MODE_CELT_ONLY) + { + pcm_transition = pcm_transition_silk; + opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); + } + + /* Only allocation memory for redundancy if/when needed */ + redundant_audio_size = redundancy ? F5*st->channels : ALLOC_NONE; + ALLOC(redundant_audio, redundant_audio_size, opus_val16); + + /* 5 ms redundant frame for CELT->SILK*/ + if (redundancy && celt_to_silk) + { + celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); + celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, + redundant_audio, F5, NULL); + celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); + } + + /* MUST be after PLC */ + celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(start_band)); + + if (mode != MODE_SILK_ONLY) + { + int celt_frame_size = IMIN(F20, frame_size); + /* Make sure to discard any previous CELT state */ + if (mode != st->prev_mode && st->prev_mode > 0 && !st->prev_redundancy) + celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); + /* Decode CELT */ + celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, + len, pcm, celt_frame_size, &dec); + } else { + unsigned char silence[2] = {0xFF, 0xFF}; + for (i=0;i<frame_size*st->channels;i++) + pcm[i] = 0; + /* For hybrid -> SILK transitions, we let the CELT MDCT + do a fade-out by decoding a silence frame */ + if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) + { + celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); + celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL); + } + } + + if (mode != MODE_CELT_ONLY) + { +#ifdef OPUS_FIXED_POINT + for (i=0;i<frame_size*st->channels;i++) + pcm[i] = SAT16(pcm[i] + pcm_silk[i]); +#else + for (i=0;i<frame_size*st->channels;i++) + pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]); +#endif + } + + { + const CELTMode *celt_mode; + celt_decoder_ctl(celt_dec, CELT_GET_MODE(&celt_mode)); + window = celt_mode->window; + } + + /* 5 ms redundant frame for SILK->CELT */ + if (redundancy && !celt_to_silk) + { + celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); + celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); + + celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL); + celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); + smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, + pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); + } + if (redundancy && celt_to_silk) + { + for (c=0;c<st->channels;c++) + { + for (i=0;i<F2_5;i++) + pcm[st->channels*i+c] = redundant_audio[st->channels*i+c]; + } + smooth_fade(redundant_audio+st->channels*F2_5, pcm+st->channels*F2_5, + pcm+st->channels*F2_5, F2_5, st->channels, window, st->Fs); + } + if (transition) + { + if (audiosize >= F5) + { + for (i=0;i<st->channels*F2_5;i++) + pcm[i] = pcm_transition[i]; + smooth_fade(pcm_transition+st->channels*F2_5, pcm+st->channels*F2_5, + pcm+st->channels*F2_5, F2_5, + st->channels, window, st->Fs); + } else { + /* Not enough time to do a clean transition, but we do it anyway + This will not preserve amplitude perfectly and may introduce + a bit of temporal aliasing, but it shouldn't be too bad and + that's pretty much the best we can do. In any case, generating this + transition it pretty silly in the first place */ + smooth_fade(pcm_transition, pcm, + pcm, F2_5, + st->channels, window, st->Fs); + } + } + + if(st->decode_gain) + { + opus_val32 gain; + gain = celt_exp2(MULT16_16_P15(QCONST16(6.48814081e-4f, 25), st->decode_gain)); + for (i=0;i<frame_size*st->channels;i++) + { + opus_val32 x; + x = MULT16_32_P16(pcm[i],gain); + pcm[i] = SATURATE(x, 32767); + } + } + + if (len <= 1) + st->rangeFinal = 0; + else + st->rangeFinal = dec.rng ^ redundant_rng; + + st->prev_mode = mode; + st->prev_redundancy = redundancy && !celt_to_silk; + + if (celt_ret>=0) + { + if (OPUS_CHECK_ARRAY(pcm, audiosize*st->channels)) + OPUS_PRINT_INT(audiosize); + } + + RESTORE_STACK; + return celt_ret < 0 ? celt_ret : audiosize; + +} + +int opus_decode_native(OpusDecoder *st, const unsigned char *data, + opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, + int self_delimited, opus_int32 *packet_offset, int soft_clip) +{ + int i, nb_samples; + int count, offset; + unsigned char toc; + int packet_frame_size, packet_bandwidth, packet_mode, packet_stream_channels; + /* 48 x 2.5 ms = 120 ms */ + opus_int16 size[48]; + if (decode_fec<0 || decode_fec>1) + return OPUS_BAD_ARG; + /* For FEC/PLC, frame_size has to be to have a multiple of 2.5 ms */ + if ((decode_fec || len==0 || data==NULL) && frame_size%(st->Fs/400)!=0) + return OPUS_BAD_ARG; + if (len==0 || data==NULL) + { + int pcm_count=0; + do { + int ret; + ret = opus_decode_frame(st, NULL, 0, pcm+pcm_count*st->channels, frame_size-pcm_count, 0); + if (ret<0) + return ret; + pcm_count += ret; + } while (pcm_count < frame_size); + celt_assert(pcm_count == frame_size); + if (OPUS_CHECK_ARRAY(pcm, pcm_count*st->channels)) + OPUS_PRINT_INT(pcm_count); + st->last_packet_duration = pcm_count; + return pcm_count; + } else if (len<0) + return OPUS_BAD_ARG; + + packet_mode = opus_packet_get_mode(data); + packet_bandwidth = opus_packet_get_bandwidth(data); + packet_frame_size = opus_packet_get_samples_per_frame(data, st->Fs); + packet_stream_channels = opus_packet_get_nb_channels(data); + + count = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL, + size, &offset, packet_offset); + if (count<0) + return count; + + data += offset; + + if (decode_fec) + { + int duration_copy; + int ret; + /* If no FEC can be present, run the PLC (recursive call) */ + if (frame_size < packet_frame_size || packet_mode == MODE_CELT_ONLY || st->mode == MODE_CELT_ONLY) + return opus_decode_native(st, NULL, 0, pcm, frame_size, 0, 0, NULL, soft_clip); + /* Otherwise, run the PLC on everything except the size for which we might have FEC */ + duration_copy = st->last_packet_duration; + if (frame_size-packet_frame_size!=0) + { + ret = opus_decode_native(st, NULL, 0, pcm, frame_size-packet_frame_size, 0, 0, NULL, soft_clip); + if (ret<0) + { + st->last_packet_duration = duration_copy; + return ret; + } + celt_assert(ret==frame_size-packet_frame_size); + } + /* Complete with FEC */ + st->mode = packet_mode; + st->bandwidth = packet_bandwidth; + st->frame_size = packet_frame_size; + st->stream_channels = packet_stream_channels; + ret = opus_decode_frame(st, data, size[0], pcm+st->channels*(frame_size-packet_frame_size), + packet_frame_size, 1); + if (ret<0) + return ret; + else { + if (OPUS_CHECK_ARRAY(pcm, frame_size*st->channels)) + OPUS_PRINT_INT(frame_size); + st->last_packet_duration = frame_size; + return frame_size; + } + } + + if (count*packet_frame_size > frame_size) + return OPUS_BUFFER_TOO_SMALL; + + /* Update the state as the last step to avoid updating it on an invalid packet */ + st->mode = packet_mode; + st->bandwidth = packet_bandwidth; + st->frame_size = packet_frame_size; + st->stream_channels = packet_stream_channels; + + nb_samples=0; + for (i=0;i<count;i++) + { + int ret; + ret = opus_decode_frame(st, data, size[i], pcm+nb_samples*st->channels, frame_size-nb_samples, 0); + if (ret<0) + return ret; + celt_assert(ret==packet_frame_size); + data += size[i]; + nb_samples += ret; + } + st->last_packet_duration = nb_samples; + if (OPUS_CHECK_ARRAY(pcm, nb_samples*st->channels)) + OPUS_PRINT_INT(nb_samples); +#ifndef OPUS_FIXED_POINT + if (soft_clip) + opus_pcm_soft_clip(pcm, nb_samples, st->channels, st->softclip_mem); + else + st->softclip_mem[0]=st->softclip_mem[1]=0; +#endif + return nb_samples; +} + +#ifdef OPUS_FIXED_POINT + +int opus_decode(OpusDecoder *st, const unsigned char *data, + opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) +{ + if(frame_size<=0) + return OPUS_BAD_ARG; + return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0); +} + +#ifndef DISABLE_FLOAT_API +int opus_decode_float(OpusDecoder *st, const unsigned char *data, + opus_int32 len, float *pcm, int frame_size, int decode_fec) +{ + VARDECL(opus_int16, out); + int ret, i; + ALLOC_STACK; + + if(frame_size<=0) + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } + ALLOC(out, frame_size*st->channels, opus_int16); + + ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0); + if (ret > 0) + { + for (i=0;i<ret*st->channels;i++) + pcm[i] = (1.f/32768.f)*(out[i]); + } + RESTORE_STACK; + return ret; +} +#endif + + +#else +int opus_decode(OpusDecoder *st, const unsigned char *data, + opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec) +{ + VARDECL(float, out); + int ret, i; + ALLOC_STACK; + + if(frame_size<=0) + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } + + ALLOC(out, frame_size*st->channels, float); + + ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1); + if (ret > 0) + { + for (i=0;i<ret*st->channels;i++) + pcm[i] = FLOAT2INT16(out[i]); + } + RESTORE_STACK; + return ret; +} + +int opus_decode_float(OpusDecoder *st, const unsigned char *data, + opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec) +{ + if(frame_size<=0) + return OPUS_BAD_ARG; + return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0); +} + +#endif + +int opus_decoder_ctl(OpusDecoder *st, int request, ...) +{ + int ret = OPUS_OK; + va_list ap; + void *silk_dec; + CELTDecoder *celt_dec; + + silk_dec = (char*)st+st->silk_dec_offset; + celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset); + + + va_start(ap, request); + + switch (request) + { + case OPUS_GET_BANDWIDTH_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->bandwidth; + } + break; + case OPUS_GET_FINAL_RANGE_REQUEST: + { + opus_uint32 *value = va_arg(ap, opus_uint32*); + if (!value) + { + goto bad_arg; + } + *value = st->rangeFinal; + } + break; + case OPUS_RESET_STATE: + { + OPUS_CLEAR((char*)&st->OPUS_DECODER_RESET_START, + sizeof(OpusDecoder)- + ((char*)&st->OPUS_DECODER_RESET_START - (char*)st)); + + celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); + silk_InitDecoder( silk_dec ); + st->stream_channels = st->channels; + st->frame_size = st->Fs/400; + } + break; + case OPUS_GET_SAMPLE_RATE_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->Fs; + } + break; + case OPUS_GET_PITCH_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + if (st->prev_mode == MODE_CELT_ONLY) + celt_decoder_ctl(celt_dec, OPUS_GET_PITCH(value)); + else + *value = st->DecControl.prevPitchLag; + } + break; + case OPUS_GET_GAIN_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->decode_gain; + } + break; + case OPUS_SET_GAIN_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<-32768 || value>32767) + { + goto bad_arg; + } + st->decode_gain = value; + } + break; + case OPUS_GET_LAST_PACKET_DURATION_REQUEST: + { + opus_uint32 *value = va_arg(ap, opus_uint32*); + if (!value) + { + goto bad_arg; + } + *value = st->last_packet_duration; + } + break; + default: + /*fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);*/ + ret = OPUS_UNIMPLEMENTED; + break; + } + + va_end(ap); + return ret; +bad_arg: + va_end(ap); + return OPUS_BAD_ARG; +} + +void opus_decoder_destroy(OpusDecoder *st) +{ + opus_free(st); +} + + +int opus_packet_get_bandwidth(const unsigned char *data) +{ + int bandwidth; + if (data[0]&0x80) + { + bandwidth = OPUS_BANDWIDTH_MEDIUMBAND + ((data[0]>>5)&0x3); + if (bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) + bandwidth = OPUS_BANDWIDTH_NARROWBAND; + } else if ((data[0]&0x60) == 0x60) + { + bandwidth = (data[0]&0x10) ? OPUS_BANDWIDTH_FULLBAND : + OPUS_BANDWIDTH_SUPERWIDEBAND; + } else { + bandwidth = OPUS_BANDWIDTH_NARROWBAND + ((data[0]>>5)&0x3); + } + return bandwidth; +} + +int opus_packet_get_samples_per_frame(const unsigned char *data, + opus_int32 Fs) +{ + int audiosize; + if (data[0]&0x80) + { + audiosize = ((data[0]>>3)&0x3); + audiosize = (Fs<<audiosize)/400; + } else if ((data[0]&0x60) == 0x60) + { + audiosize = (data[0]&0x08) ? Fs/50 : Fs/100; + } else { + audiosize = ((data[0]>>3)&0x3); + if (audiosize == 3) + audiosize = Fs*60/1000; + else + audiosize = (Fs<<audiosize)/100; + } + return audiosize; +} + +int opus_packet_get_nb_channels(const unsigned char *data) +{ + return (data[0]&0x4) ? 2 : 1; +} + +int opus_packet_get_nb_frames(const unsigned char packet[], opus_int32 len) +{ + int count; + if (len<1) + return OPUS_BAD_ARG; + count = packet[0]&0x3; + if (count==0) + return 1; + else if (count!=3) + return 2; + else if (len<2) + return OPUS_INVALID_PACKET; + else + return packet[1]&0x3F; +} + +int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, + opus_int32 Fs) +{ + int samples; + int count = opus_packet_get_nb_frames(packet, len); + + if (count<0) + return count; + + samples = count*opus_packet_get_samples_per_frame(packet, Fs); + /* Can't have more than 120 ms */ + if (samples*25 > Fs*3) + return OPUS_INVALID_PACKET; + else + return samples; +} + +int opus_decoder_get_nb_samples(const OpusDecoder *dec, + const unsigned char packet[], opus_int32 len) +{ + return opus_packet_get_nb_samples(packet, len, dec->Fs); +} diff --git a/drivers/opus/opus_defines.h b/drivers/opus/opus_defines.h new file mode 100644 index 0000000000..265089f65e --- /dev/null +++ b/drivers/opus/opus_defines.h @@ -0,0 +1,726 @@ +/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited + Written by Jean-Marc Valin and Koen Vos */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** + * @file opus_defines.h + * @brief Opus reference implementation constants + */ + +#ifndef OPUS_DEFINES_H +#define OPUS_DEFINES_H + +#include "opus_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** @defgroup opus_errorcodes Error codes + * @{ + */ +/** No error @hideinitializer*/ +#define OPUS_OK 0 +/** One or more invalid/out of range arguments @hideinitializer*/ +#define OPUS_BAD_ARG -1 +/** The mode struct passed is invalid @hideinitializer*/ +#define OPUS_BUFFER_TOO_SMALL -2 +/** An internal error was detected @hideinitializer*/ +#define OPUS_INTERNAL_ERROR -3 +/** The compressed data passed is corrupted @hideinitializer*/ +#define OPUS_INVALID_PACKET -4 +/** Invalid/unsupported request number @hideinitializer*/ +#define OPUS_UNIMPLEMENTED -5 +/** An encoder or decoder structure is invalid or already freed @hideinitializer*/ +#define OPUS_INVALID_STATE -6 +/** Memory allocation has failed @hideinitializer*/ +#define OPUS_ALLOC_FAIL -7 +/**@}*/ + +/** @cond OPUS_INTERNAL_DOC */ +/**Export control for opus functions */ + +#ifndef OPUS_EXPORT +# if defined(WIN32) +# ifdef OPUS_BUILD +# define OPUS_EXPORT __declspec(dllexport) +# else +# define OPUS_EXPORT +# endif +# elif defined(__GNUC__) && defined(OPUS_BUILD) +# define OPUS_EXPORT __attribute__ ((visibility ("default"))) +# else +# define OPUS_EXPORT +# endif +#endif + +# if !defined(OPUS_GNUC_PREREQ) +# if defined(__GNUC__)&&defined(__GNUC_MINOR__) +# define OPUS_GNUC_PREREQ(_maj,_min) \ + ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) +# else +# define OPUS_GNUC_PREREQ(_maj,_min) 0 +# endif +# endif + +#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) +# if OPUS_GNUC_PREREQ(3,0) +# define OPUS_RESTRICT __restrict__ +# elif (defined(_MSC_VER) && _MSC_VER >= 1400) +# define OPUS_RESTRICT __restrict +# else +# define OPUS_RESTRICT +# endif +#else +# define OPUS_RESTRICT restrict +#endif + +#if (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L) ) +# if OPUS_GNUC_PREREQ(2,7) +# define OPUS_INLINE __inline__ +# elif (defined(_MSC_VER)) +# define OPUS_INLINE __inline +# else +# define OPUS_INLINE +# endif +#else +# define OPUS_INLINE inline +#endif + +/**Warning attributes for opus functions + * NONNULL is not used in OPUS_BUILD to avoid the compiler optimizing out + * some paranoid null checks. */ +#if defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4) +# define OPUS_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__)) +#else +# define OPUS_WARN_UNUSED_RESULT +#endif +#if !defined(OPUS_BUILD) && defined(__GNUC__) && OPUS_GNUC_PREREQ(3, 4) +# define OPUS_ARG_NONNULL(_x) __attribute__ ((__nonnull__(_x))) +#else +# define OPUS_ARG_NONNULL(_x) +#endif + +/** These are the actual Encoder CTL ID numbers. + * They should not be used directly by applications. + * In general, SETs should be even and GETs should be odd.*/ +#define OPUS_SET_APPLICATION_REQUEST 4000 +#define OPUS_GET_APPLICATION_REQUEST 4001 +#define OPUS_SET_BITRATE_REQUEST 4002 +#define OPUS_GET_BITRATE_REQUEST 4003 +#define OPUS_SET_MAX_BANDWIDTH_REQUEST 4004 +#define OPUS_GET_MAX_BANDWIDTH_REQUEST 4005 +#define OPUS_SET_VBR_REQUEST 4006 +#define OPUS_GET_VBR_REQUEST 4007 +#define OPUS_SET_BANDWIDTH_REQUEST 4008 +#define OPUS_GET_BANDWIDTH_REQUEST 4009 +#define OPUS_SET_COMPLEXITY_REQUEST 4010 +#define OPUS_GET_COMPLEXITY_REQUEST 4011 +#define OPUS_SET_INBAND_FEC_REQUEST 4012 +#define OPUS_GET_INBAND_FEC_REQUEST 4013 +#define OPUS_SET_PACKET_LOSS_PERC_REQUEST 4014 +#define OPUS_GET_PACKET_LOSS_PERC_REQUEST 4015 +#define OPUS_SET_DTX_REQUEST 4016 +#define OPUS_GET_DTX_REQUEST 4017 +#define OPUS_SET_VBR_CONSTRAINT_REQUEST 4020 +#define OPUS_GET_VBR_CONSTRAINT_REQUEST 4021 +#define OPUS_SET_FORCE_CHANNELS_REQUEST 4022 +#define OPUS_GET_FORCE_CHANNELS_REQUEST 4023 +#define OPUS_SET_SIGNAL_REQUEST 4024 +#define OPUS_GET_SIGNAL_REQUEST 4025 +#define OPUS_GET_LOOKAHEAD_REQUEST 4027 +/* #define OPUS_RESET_STATE 4028 */ +#define OPUS_GET_SAMPLE_RATE_REQUEST 4029 +#define OPUS_GET_FINAL_RANGE_REQUEST 4031 +#define OPUS_GET_PITCH_REQUEST 4033 +#define OPUS_SET_GAIN_REQUEST 4034 +#define OPUS_GET_GAIN_REQUEST 4045 /* Should have been 4035 */ +#define OPUS_SET_LSB_DEPTH_REQUEST 4036 +#define OPUS_GET_LSB_DEPTH_REQUEST 4037 +#define OPUS_GET_LAST_PACKET_DURATION_REQUEST 4039 +#define OPUS_SET_EXPERT_FRAME_DURATION_REQUEST 4040 +#define OPUS_GET_EXPERT_FRAME_DURATION_REQUEST 4041 +#define OPUS_SET_PREDICTION_DISABLED_REQUEST 4042 +#define OPUS_GET_PREDICTION_DISABLED_REQUEST 4043 + +/* Don't use 4045, it's already taken by OPUS_GET_GAIN_REQUEST */ + +/* Macros to trigger compilation errors when the wrong types are provided to a CTL */ +#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x)) +#define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr))) +#define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr))) +#define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr))) +/** @endcond */ + +/** @defgroup opus_ctlvalues Pre-defined values for CTL interface + * @see opus_genericctls, opus_encoderctls + * @{ + */ +/* Values for the various encoder CTLs */ +#define OPUS_AUTO -1000 /**<Auto/default setting @hideinitializer*/ +#define OPUS_BITRATE_MAX -1 /**<Maximum bitrate @hideinitializer*/ + +/** Best for most VoIP/videoconference applications where listening quality and intelligibility matter most + * @hideinitializer */ +#define OPUS_APPLICATION_VOIP 2048 +/** Best for broadcast/high-fidelity application where the decoded audio should be as close as possible to the input + * @hideinitializer */ +#define OPUS_APPLICATION_AUDIO 2049 +/** Only use when lowest-achievable latency is what matters most. Voice-optimized modes cannot be used. + * @hideinitializer */ +#define OPUS_APPLICATION_RESTRICTED_LOWDELAY 2051 + +#define OPUS_SIGNAL_VOICE 3001 /**< Signal being encoded is voice */ +#define OPUS_SIGNAL_MUSIC 3002 /**< Signal being encoded is music */ +#define OPUS_BANDWIDTH_NARROWBAND 1101 /**< 4 kHz bandpass @hideinitializer*/ +#define OPUS_BANDWIDTH_MEDIUMBAND 1102 /**< 6 kHz bandpass @hideinitializer*/ +#define OPUS_BANDWIDTH_WIDEBAND 1103 /**< 8 kHz bandpass @hideinitializer*/ +#define OPUS_BANDWIDTH_SUPERWIDEBAND 1104 /**<12 kHz bandpass @hideinitializer*/ +#define OPUS_BANDWIDTH_FULLBAND 1105 /**<20 kHz bandpass @hideinitializer*/ + +#define OPUS_FRAMESIZE_ARG 5000 /**< Select frame size from the argument (default) */ +#define OPUS_FRAMESIZE_2_5_MS 5001 /**< Use 2.5 ms frames */ +#define OPUS_FRAMESIZE_5_MS 5002 /**< Use 5 ms frames */ +#define OPUS_FRAMESIZE_10_MS 5003 /**< Use 10 ms frames */ +#define OPUS_FRAMESIZE_20_MS 5004 /**< Use 20 ms frames */ +#define OPUS_FRAMESIZE_40_MS 5005 /**< Use 40 ms frames */ +#define OPUS_FRAMESIZE_60_MS 5006 /**< Use 60 ms frames */ + +/**@}*/ + + +/** @defgroup opus_encoderctls Encoder related CTLs + * + * These are convenience macros for use with the \c opus_encode_ctl + * interface. They are used to generate the appropriate series of + * arguments for that call, passing the correct type, size and so + * on as expected for each particular request. + * + * Some usage examples: + * + * @code + * int ret; + * ret = opus_encoder_ctl(enc_ctx, OPUS_SET_BANDWIDTH(OPUS_AUTO)); + * if (ret != OPUS_OK) return ret; + * + * opus_int32 rate; + * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&rate)); + * + * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE); + * @endcode + * + * @see opus_genericctls, opus_encoder + * @{ + */ + +/** Configures the encoder's computational complexity. + * The supported range is 0-10 inclusive with 10 representing the highest complexity. + * @see OPUS_GET_COMPLEXITY + * @param[in] x <tt>opus_int32</tt>: Allowed values: 0-10, inclusive. + * + * @hideinitializer */ +#define OPUS_SET_COMPLEXITY(x) OPUS_SET_COMPLEXITY_REQUEST, __opus_check_int(x) +/** Gets the encoder's complexity configuration. + * @see OPUS_SET_COMPLEXITY + * @param[out] x <tt>opus_int32 *</tt>: Returns a value in the range 0-10, + * inclusive. + * @hideinitializer */ +#define OPUS_GET_COMPLEXITY(x) OPUS_GET_COMPLEXITY_REQUEST, __opus_check_int_ptr(x) + +/** Configures the bitrate in the encoder. + * Rates from 500 to 512000 bits per second are meaningful, as well as the + * special values #OPUS_AUTO and #OPUS_BITRATE_MAX. + * The value #OPUS_BITRATE_MAX can be used to cause the codec to use as much + * rate as it can, which is useful for controlling the rate by adjusting the + * output buffer size. + * @see OPUS_GET_BITRATE + * @param[in] x <tt>opus_int32</tt>: Bitrate in bits per second. The default + * is determined based on the number of + * channels and the input sampling rate. + * @hideinitializer */ +#define OPUS_SET_BITRATE(x) OPUS_SET_BITRATE_REQUEST, __opus_check_int(x) +/** Gets the encoder's bitrate configuration. + * @see OPUS_SET_BITRATE + * @param[out] x <tt>opus_int32 *</tt>: Returns the bitrate in bits per second. + * The default is determined based on the + * number of channels and the input + * sampling rate. + * @hideinitializer */ +#define OPUS_GET_BITRATE(x) OPUS_GET_BITRATE_REQUEST, __opus_check_int_ptr(x) + +/** Enables or disables variable bitrate (VBR) in the encoder. + * The configured bitrate may not be met exactly because frames must + * be an integer number of bytes in length. + * @warning Only the MDCT mode of Opus can provide hard CBR behavior. + * @see OPUS_GET_VBR + * @see OPUS_SET_VBR_CONSTRAINT + * @param[in] x <tt>opus_int32</tt>: Allowed values: + * <dl> + * <dt>0</dt><dd>Hard CBR. For LPC/hybrid modes at very low bit-rate, this can + * cause noticeable quality degradation.</dd> + * <dt>1</dt><dd>VBR (default). The exact type of VBR is controlled by + * #OPUS_SET_VBR_CONSTRAINT.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_SET_VBR(x) OPUS_SET_VBR_REQUEST, __opus_check_int(x) +/** Determine if variable bitrate (VBR) is enabled in the encoder. + * @see OPUS_SET_VBR + * @see OPUS_GET_VBR_CONSTRAINT + * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: + * <dl> + * <dt>0</dt><dd>Hard CBR.</dd> + * <dt>1</dt><dd>VBR (default). The exact type of VBR may be retrieved via + * #OPUS_GET_VBR_CONSTRAINT.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_VBR(x) OPUS_GET_VBR_REQUEST, __opus_check_int_ptr(x) + +/** Enables or disables constrained VBR in the encoder. + * This setting is ignored when the encoder is in CBR mode. + * @warning Only the MDCT mode of Opus currently heeds the constraint. + * Speech mode ignores it completely, hybrid mode may fail to obey it + * if the LPC layer uses more bitrate than the constraint would have + * permitted. + * @see OPUS_GET_VBR_CONSTRAINT + * @see OPUS_SET_VBR + * @param[in] x <tt>opus_int32</tt>: Allowed values: + * <dl> + * <dt>0</dt><dd>Unconstrained VBR.</dd> + * <dt>1</dt><dd>Constrained VBR (default). This creates a maximum of one + * frame of buffering delay assuming a transport with a + * serialization speed of the nominal bitrate.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_SET_VBR_CONSTRAINT(x) OPUS_SET_VBR_CONSTRAINT_REQUEST, __opus_check_int(x) +/** Determine if constrained VBR is enabled in the encoder. + * @see OPUS_SET_VBR_CONSTRAINT + * @see OPUS_GET_VBR + * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: + * <dl> + * <dt>0</dt><dd>Unconstrained VBR.</dd> + * <dt>1</dt><dd>Constrained VBR (default).</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_VBR_CONSTRAINT(x) OPUS_GET_VBR_CONSTRAINT_REQUEST, __opus_check_int_ptr(x) + +/** Configures mono/stereo forcing in the encoder. + * This can force the encoder to produce packets encoded as either mono or + * stereo, regardless of the format of the input audio. This is useful when + * the caller knows that the input signal is currently a mono source embedded + * in a stereo stream. + * @see OPUS_GET_FORCE_CHANNELS + * @param[in] x <tt>opus_int32</tt>: Allowed values: + * <dl> + * <dt>#OPUS_AUTO</dt><dd>Not forced (default)</dd> + * <dt>1</dt> <dd>Forced mono</dd> + * <dt>2</dt> <dd>Forced stereo</dd> + * </dl> + * @hideinitializer */ +#define OPUS_SET_FORCE_CHANNELS(x) OPUS_SET_FORCE_CHANNELS_REQUEST, __opus_check_int(x) +/** Gets the encoder's forced channel configuration. + * @see OPUS_SET_FORCE_CHANNELS + * @param[out] x <tt>opus_int32 *</tt>: + * <dl> + * <dt>#OPUS_AUTO</dt><dd>Not forced (default)</dd> + * <dt>1</dt> <dd>Forced mono</dd> + * <dt>2</dt> <dd>Forced stereo</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_FORCE_CHANNELS(x) OPUS_GET_FORCE_CHANNELS_REQUEST, __opus_check_int_ptr(x) + +/** Configures the maximum bandpass that the encoder will select automatically. + * Applications should normally use this instead of #OPUS_SET_BANDWIDTH + * (leaving that set to the default, #OPUS_AUTO). This allows the + * application to set an upper bound based on the type of input it is + * providing, but still gives the encoder the freedom to reduce the bandpass + * when the bitrate becomes too low, for better overall quality. + * @see OPUS_GET_MAX_BANDWIDTH + * @param[in] x <tt>opus_int32</tt>: Allowed values: + * <dl> + * <dt>OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd> + * <dt>OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd> + * <dt>OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd> + * <dt>OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd> + * <dt>OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband (default)</dd> + * </dl> + * @hideinitializer */ +#define OPUS_SET_MAX_BANDWIDTH(x) OPUS_SET_MAX_BANDWIDTH_REQUEST, __opus_check_int(x) + +/** Gets the encoder's configured maximum allowed bandpass. + * @see OPUS_SET_MAX_BANDWIDTH + * @param[out] x <tt>opus_int32 *</tt>: Allowed values: + * <dl> + * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband (default)</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_MAX_BANDWIDTH(x) OPUS_GET_MAX_BANDWIDTH_REQUEST, __opus_check_int_ptr(x) + +/** Sets the encoder's bandpass to a specific value. + * This prevents the encoder from automatically selecting the bandpass based + * on the available bitrate. If an application knows the bandpass of the input + * audio it is providing, it should normally use #OPUS_SET_MAX_BANDWIDTH + * instead, which still gives the encoder the freedom to reduce the bandpass + * when the bitrate becomes too low, for better overall quality. + * @see OPUS_GET_BANDWIDTH + * @param[in] x <tt>opus_int32</tt>: Allowed values: + * <dl> + * <dt>#OPUS_AUTO</dt> <dd>(default)</dd> + * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband</dd> + * </dl> + * @hideinitializer */ +#define OPUS_SET_BANDWIDTH(x) OPUS_SET_BANDWIDTH_REQUEST, __opus_check_int(x) + +/** Configures the type of signal being encoded. + * This is a hint which helps the encoder's mode selection. + * @see OPUS_GET_SIGNAL + * @param[in] x <tt>opus_int32</tt>: Allowed values: + * <dl> + * <dt>#OPUS_AUTO</dt> <dd>(default)</dd> + * <dt>#OPUS_SIGNAL_VOICE</dt><dd>Bias thresholds towards choosing LPC or Hybrid modes.</dd> + * <dt>#OPUS_SIGNAL_MUSIC</dt><dd>Bias thresholds towards choosing MDCT modes.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_SET_SIGNAL(x) OPUS_SET_SIGNAL_REQUEST, __opus_check_int(x) +/** Gets the encoder's configured signal type. + * @see OPUS_SET_SIGNAL + * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: + * <dl> + * <dt>#OPUS_AUTO</dt> <dd>(default)</dd> + * <dt>#OPUS_SIGNAL_VOICE</dt><dd>Bias thresholds towards choosing LPC or Hybrid modes.</dd> + * <dt>#OPUS_SIGNAL_MUSIC</dt><dd>Bias thresholds towards choosing MDCT modes.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_SIGNAL(x) OPUS_GET_SIGNAL_REQUEST, __opus_check_int_ptr(x) + + +/** Configures the encoder's intended application. + * The initial value is a mandatory argument to the encoder_create function. + * @see OPUS_GET_APPLICATION + * @param[in] x <tt>opus_int32</tt>: Returns one of the following values: + * <dl> + * <dt>#OPUS_APPLICATION_VOIP</dt> + * <dd>Process signal for improved speech intelligibility.</dd> + * <dt>#OPUS_APPLICATION_AUDIO</dt> + * <dd>Favor faithfulness to the original input.</dd> + * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt> + * <dd>Configure the minimum possible coding delay by disabling certain modes + * of operation.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_SET_APPLICATION(x) OPUS_SET_APPLICATION_REQUEST, __opus_check_int(x) +/** Gets the encoder's configured application. + * @see OPUS_SET_APPLICATION + * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: + * <dl> + * <dt>#OPUS_APPLICATION_VOIP</dt> + * <dd>Process signal for improved speech intelligibility.</dd> + * <dt>#OPUS_APPLICATION_AUDIO</dt> + * <dd>Favor faithfulness to the original input.</dd> + * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt> + * <dd>Configure the minimum possible coding delay by disabling certain modes + * of operation.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_APPLICATION(x) OPUS_GET_APPLICATION_REQUEST, __opus_check_int_ptr(x) + +/** Gets the sampling rate the encoder or decoder was initialized with. + * This simply returns the <code>Fs</code> value passed to opus_encoder_init() + * or opus_decoder_init(). + * @param[out] x <tt>opus_int32 *</tt>: Sampling rate of encoder or decoder. + * @hideinitializer + */ +#define OPUS_GET_SAMPLE_RATE(x) OPUS_GET_SAMPLE_RATE_REQUEST, __opus_check_int_ptr(x) + +/** Gets the total samples of delay added by the entire codec. + * This can be queried by the encoder and then the provided number of samples can be + * skipped on from the start of the decoder's output to provide time aligned input + * and output. From the perspective of a decoding application the real data begins this many + * samples late. + * + * The decoder contribution to this delay is identical for all decoders, but the + * encoder portion of the delay may vary from implementation to implementation, + * version to version, or even depend on the encoder's initial configuration. + * Applications needing delay compensation should call this CTL rather than + * hard-coding a value. + * @param[out] x <tt>opus_int32 *</tt>: Number of lookahead samples + * @hideinitializer */ +#define OPUS_GET_LOOKAHEAD(x) OPUS_GET_LOOKAHEAD_REQUEST, __opus_check_int_ptr(x) + +/** Configures the encoder's use of inband forward error correction (FEC). + * @note This is only applicable to the LPC layer + * @see OPUS_GET_INBAND_FEC + * @param[in] x <tt>opus_int32</tt>: Allowed values: + * <dl> + * <dt>0</dt><dd>Disable inband FEC (default).</dd> + * <dt>1</dt><dd>Enable inband FEC.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_SET_INBAND_FEC(x) OPUS_SET_INBAND_FEC_REQUEST, __opus_check_int(x) +/** Gets encoder's configured use of inband forward error correction. + * @see OPUS_SET_INBAND_FEC + * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: + * <dl> + * <dt>0</dt><dd>Inband FEC disabled (default).</dd> + * <dt>1</dt><dd>Inband FEC enabled.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_INBAND_FEC(x) OPUS_GET_INBAND_FEC_REQUEST, __opus_check_int_ptr(x) + +/** Configures the encoder's expected packet loss percentage. + * Higher values with trigger progressively more loss resistant behavior in the encoder + * at the expense of quality at a given bitrate in the lossless case, but greater quality + * under loss. + * @see OPUS_GET_PACKET_LOSS_PERC + * @param[in] x <tt>opus_int32</tt>: Loss percentage in the range 0-100, inclusive (default: 0). + * @hideinitializer */ +#define OPUS_SET_PACKET_LOSS_PERC(x) OPUS_SET_PACKET_LOSS_PERC_REQUEST, __opus_check_int(x) +/** Gets the encoder's configured packet loss percentage. + * @see OPUS_SET_PACKET_LOSS_PERC + * @param[out] x <tt>opus_int32 *</tt>: Returns the configured loss percentage + * in the range 0-100, inclusive (default: 0). + * @hideinitializer */ +#define OPUS_GET_PACKET_LOSS_PERC(x) OPUS_GET_PACKET_LOSS_PERC_REQUEST, __opus_check_int_ptr(x) + +/** Configures the encoder's use of discontinuous transmission (DTX). + * @note This is only applicable to the LPC layer + * @see OPUS_GET_DTX + * @param[in] x <tt>opus_int32</tt>: Allowed values: + * <dl> + * <dt>0</dt><dd>Disable DTX (default).</dd> + * <dt>1</dt><dd>Enabled DTX.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_SET_DTX(x) OPUS_SET_DTX_REQUEST, __opus_check_int(x) +/** Gets encoder's configured use of discontinuous transmission. + * @see OPUS_SET_DTX + * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: + * <dl> + * <dt>0</dt><dd>DTX disabled (default).</dd> + * <dt>1</dt><dd>DTX enabled.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_DTX(x) OPUS_GET_DTX_REQUEST, __opus_check_int_ptr(x) +/** Configures the depth of signal being encoded. + * This is a hint which helps the encoder identify silence and near-silence. + * @see OPUS_GET_LSB_DEPTH + * @param[in] x <tt>opus_int32</tt>: Input precision in bits, between 8 and 24 + * (default: 24). + * @hideinitializer */ +#define OPUS_SET_LSB_DEPTH(x) OPUS_SET_LSB_DEPTH_REQUEST, __opus_check_int(x) +/** Gets the encoder's configured signal depth. + * @see OPUS_SET_LSB_DEPTH + * @param[out] x <tt>opus_int32 *</tt>: Input precision in bits, between 8 and + * 24 (default: 24). + * @hideinitializer */ +#define OPUS_GET_LSB_DEPTH(x) OPUS_GET_LSB_DEPTH_REQUEST, __opus_check_int_ptr(x) + +/** Gets the duration (in samples) of the last packet successfully decoded or concealed. + * @param[out] x <tt>opus_int32 *</tt>: Number of samples (at current sampling rate). + * @hideinitializer */ +#define OPUS_GET_LAST_PACKET_DURATION(x) OPUS_GET_LAST_PACKET_DURATION_REQUEST, __opus_check_int_ptr(x) + +/** Configures the encoder's use of variable duration frames. + * When variable duration is enabled, the encoder is free to use a shorter frame + * size than the one requested in the opus_encode*() call. + * It is then the user's responsibility + * to verify how much audio was encoded by checking the ToC byte of the encoded + * packet. The part of the audio that was not encoded needs to be resent to the + * encoder for the next call. Do not use this option unless you <b>really</b> + * know what you are doing. + * @see OPUS_GET_EXPERT_VARIABLE_DURATION + * @param[in] x <tt>opus_int32</tt>: Allowed values: + * <dl> + * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd> + * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_SET_EXPERT_FRAME_DURATION(x) OPUS_SET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int(x) +/** Gets the encoder's configured use of variable duration frames. + * @see OPUS_SET_EXPERT_VARIABLE_DURATION + * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: + * <dl> + * <dt>OPUS_FRAMESIZE_ARG</dt><dd>Select frame size from the argument (default).</dd> + * <dt>OPUS_FRAMESIZE_2_5_MS</dt><dd>Use 2.5 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_5_MS</dt><dd>Use 2.5 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_10_MS</dt><dd>Use 10 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_20_MS</dt><dd>Use 20 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_40_MS</dt><dd>Use 40 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_60_MS</dt><dd>Use 60 ms frames.</dd> + * <dt>OPUS_FRAMESIZE_VARIABLE</dt><dd>Optimize the frame size dynamically.</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_EXPERT_FRAME_DURATION(x) OPUS_GET_EXPERT_FRAME_DURATION_REQUEST, __opus_check_int_ptr(x) + +/** If set to 1, disables almost all use of prediction, making frames almost + completely independent. This reduces quality. (default : 0) + * @hideinitializer */ +#define OPUS_SET_PREDICTION_DISABLED(x) OPUS_SET_PREDICTION_DISABLED_REQUEST, __opus_check_int(x) +/** Gets the encoder's configured prediction status. + * @hideinitializer */ +#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x) + +/**@}*/ + +/** @defgroup opus_genericctls Generic CTLs + * + * These macros are used with the \c opus_decoder_ctl and + * \c opus_encoder_ctl calls to generate a particular + * request. + * + * When called on an \c OpusDecoder they apply to that + * particular decoder instance. When called on an + * \c OpusEncoder they apply to the corresponding setting + * on that encoder instance, if present. + * + * Some usage examples: + * + * @code + * int ret; + * opus_int32 pitch; + * ret = opus_decoder_ctl(dec_ctx, OPUS_GET_PITCH(&pitch)); + * if (ret == OPUS_OK) return ret; + * + * opus_encoder_ctl(enc_ctx, OPUS_RESET_STATE); + * opus_decoder_ctl(dec_ctx, OPUS_RESET_STATE); + * + * opus_int32 enc_bw, dec_bw; + * opus_encoder_ctl(enc_ctx, OPUS_GET_BANDWIDTH(&enc_bw)); + * opus_decoder_ctl(dec_ctx, OPUS_GET_BANDWIDTH(&dec_bw)); + * if (enc_bw != dec_bw) { + * printf("packet bandwidth mismatch!\n"); + * } + * @endcode + * + * @see opus_encoder, opus_decoder_ctl, opus_encoder_ctl, opus_decoderctls, opus_encoderctls + * @{ + */ + +/** Resets the codec state to be equivalent to a freshly initialized state. + * This should be called when switching streams in order to prevent + * the back to back decoding from giving different results from + * one at a time decoding. + * @hideinitializer */ +#define OPUS_RESET_STATE 4028 + +/** Gets the final state of the codec's entropy coder. + * This is used for testing purposes, + * The encoder and decoder state should be identical after coding a payload + * (assuming no data corruption or software bugs) + * + * @param[out] x <tt>opus_uint32 *</tt>: Entropy coder state + * + * @hideinitializer */ +#define OPUS_GET_FINAL_RANGE(x) OPUS_GET_FINAL_RANGE_REQUEST, __opus_check_uint_ptr(x) + +/** Gets the pitch of the last decoded frame, if available. + * This can be used for any post-processing algorithm requiring the use of pitch, + * e.g. time stretching/shortening. If the last frame was not voiced, or if the + * pitch was not coded in the frame, then zero is returned. + * + * This CTL is only implemented for decoder instances. + * + * @param[out] x <tt>opus_int32 *</tt>: pitch period at 48 kHz (or 0 if not available) + * + * @hideinitializer */ +#define OPUS_GET_PITCH(x) OPUS_GET_PITCH_REQUEST, __opus_check_int_ptr(x) + +/** Gets the encoder's configured bandpass or the decoder's last bandpass. + * @see OPUS_SET_BANDWIDTH + * @param[out] x <tt>opus_int32 *</tt>: Returns one of the following values: + * <dl> + * <dt>#OPUS_AUTO</dt> <dd>(default)</dd> + * <dt>#OPUS_BANDWIDTH_NARROWBAND</dt> <dd>4 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_MEDIUMBAND</dt> <dd>6 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_WIDEBAND</dt> <dd>8 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_SUPERWIDEBAND</dt><dd>12 kHz passband</dd> + * <dt>#OPUS_BANDWIDTH_FULLBAND</dt> <dd>20 kHz passband</dd> + * </dl> + * @hideinitializer */ +#define OPUS_GET_BANDWIDTH(x) OPUS_GET_BANDWIDTH_REQUEST, __opus_check_int_ptr(x) + +/**@}*/ + +/** @defgroup opus_decoderctls Decoder related CTLs + * @see opus_genericctls, opus_encoderctls, opus_decoder + * @{ + */ + +/** Configures decoder gain adjustment. + * Scales the decoded output by a factor specified in Q8 dB units. + * This has a maximum range of -32768 to 32767 inclusive, and returns + * OPUS_BAD_ARG otherwise. The default is zero indicating no adjustment. + * This setting survives decoder reset. + * + * gain = pow(10, x/(20.0*256)) + * + * @param[in] x <tt>opus_int32</tt>: Amount to scale PCM signal by in Q8 dB units. + * @hideinitializer */ +#define OPUS_SET_GAIN(x) OPUS_SET_GAIN_REQUEST, __opus_check_int(x) +/** Gets the decoder's configured gain adjustment. @see OPUS_SET_GAIN + * + * @param[out] x <tt>opus_int32 *</tt>: Amount to scale PCM signal by in Q8 dB units. + * @hideinitializer */ +#define OPUS_GET_GAIN(x) OPUS_GET_GAIN_REQUEST, __opus_check_int_ptr(x) + +/**@}*/ + +/** @defgroup opus_libinfo Opus library information functions + * @{ + */ + +/** Converts an opus error code into a human readable string. + * + * @param[in] error <tt>int</tt>: Error number + * @returns Error string + */ +OPUS_EXPORT const char *opus_strerror(int error); + +/** Gets the libopus version string. + * + * @returns Version string + */ +OPUS_EXPORT const char *opus_get_version_string(void); +/**@}*/ + +#ifdef __cplusplus +} +#endif + +#endif /* OPUS_DEFINES_H */ diff --git a/drivers/opus/opus_demo.c b/drivers/opus/opus_demo.c new file mode 100644 index 0000000000..7fcf65fd8b --- /dev/null +++ b/drivers/opus/opus_demo.c @@ -0,0 +1,885 @@ +/* Copyright (c) 2007-2008 CSIRO + Copyright (c) 2007-2009 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> +#include "opus.h" +#include "debug.h" +#include "opus_types.h" +#include "opus_private.h" +#include "opus_multistream.h" + +#define MAX_PACKET 1500 + +void print_usage( char* argv[] ) +{ + fprintf(stderr, "Usage: %s [-e] <application> <sampling rate (Hz)> <channels (1/2)> " + "<bits per second> [options] <input> <output>\n", argv[0]); + fprintf(stderr, " %s -d <sampling rate (Hz)> <channels (1/2)> " + "[options] <input> <output>\n\n", argv[0]); + fprintf(stderr, "mode: voip | audio | restricted-lowdelay\n" ); + fprintf(stderr, "options:\n" ); + fprintf(stderr, "-e : only runs the encoder (output the bit-stream)\n" ); + fprintf(stderr, "-d : only runs the decoder (reads the bit-stream as input)\n" ); + fprintf(stderr, "-cbr : enable constant bitrate; default: variable bitrate\n" ); + fprintf(stderr, "-cvbr : enable constrained variable bitrate; default: unconstrained\n" ); + fprintf(stderr, "-variable-duration : enable frames of variable duration (experts only); default: disabled\n" ); + fprintf(stderr, "-bandwidth <NB|MB|WB|SWB|FB> : audio bandwidth (from narrowband to fullband); default: sampling rate\n" ); + fprintf(stderr, "-framesize <2.5|5|10|20|40|60> : frame size in ms; default: 20 \n" ); + fprintf(stderr, "-max_payload <bytes> : maximum payload size in bytes, default: 1024\n" ); + fprintf(stderr, "-complexity <comp> : complexity, 0 (lowest) ... 10 (highest); default: 10\n" ); + fprintf(stderr, "-inbandfec : enable SILK inband FEC\n" ); + fprintf(stderr, "-forcemono : force mono encoding, even for stereo input\n" ); + fprintf(stderr, "-dtx : enable SILK DTX\n" ); + fprintf(stderr, "-loss <perc> : simulate packet loss, in percent (0-100); default: 0\n" ); +} + +static void int_to_char(opus_uint32 i, unsigned char ch[4]) +{ + ch[0] = i>>24; + ch[1] = (i>>16)&0xFF; + ch[2] = (i>>8)&0xFF; + ch[3] = i&0xFF; +} + +static opus_uint32 char_to_int(unsigned char ch[4]) +{ + return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16) + | ((opus_uint32)ch[2]<< 8) | (opus_uint32)ch[3]; +} + +static void check_encoder_option(int decode_only, const char *opt) +{ + if (decode_only) + { + fprintf(stderr, "option %s is only for encoding\n", opt); + exit(EXIT_FAILURE); + } +} + +static const int silk8_test[][4] = { + {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*3, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*2, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 480, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*3, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960*2, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_NARROWBAND, 480, 2} +}; + +static const int silk12_test[][4] = { + {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*3, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*2, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 480, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*3, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960*2, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 960, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_MEDIUMBAND, 480, 2} +}; + +static const int silk16_test[][4] = { + {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*3, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*2, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 480, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*3, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960*2, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_WIDEBAND, 480, 2} +}; + +static const int hybrid24_test[][4] = { + {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 2} +}; + +static const int hybrid48_test[][4] = { + {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 1}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 2}, + {MODE_SILK_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 2} +}; + +static const int celt_test[][4] = { + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960, 1}, + + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 480, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 480, 1}, + + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 240, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 240, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 240, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 240, 1}, + + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 120, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 120, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 120, 1}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 120, 1}, + + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 960, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 960, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 960, 2}, + + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 480, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 480, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 480, 2}, + + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 240, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 240, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 240, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 240, 2}, + + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 120, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_SUPERWIDEBAND, 120, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_WIDEBAND, 120, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_NARROWBAND, 120, 2}, + +}; + +static const int celt_hq_test[][4] = { + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 960, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 480, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 240, 2}, + {MODE_CELT_ONLY, OPUS_BANDWIDTH_FULLBAND, 120, 2}, +}; + +#if 0 /* This is a hack that replaces the normal encoder/decoder with the multistream version */ +#define OpusEncoder OpusMSEncoder +#define OpusDecoder OpusMSDecoder +#define opus_encode opus_multistream_encode +#define opus_decode opus_multistream_decode +#define opus_encoder_ctl opus_multistream_encoder_ctl +#define opus_decoder_ctl opus_multistream_decoder_ctl +#define opus_encoder_create ms_opus_encoder_create +#define opus_decoder_create ms_opus_decoder_create +#define opus_encoder_destroy opus_multistream_encoder_destroy +#define opus_decoder_destroy opus_multistream_decoder_destroy + +static OpusEncoder *ms_opus_encoder_create(opus_int32 Fs, int channels, int application, int *error) +{ + int streams, coupled_streams; + unsigned char mapping[256]; + return (OpusEncoder *)opus_multistream_surround_encoder_create(Fs, channels, 1, &streams, &coupled_streams, mapping, application, error); +} +static OpusDecoder *ms_opus_decoder_create(opus_int32 Fs, int channels, int *error) +{ + int streams; + int coupled_streams; + unsigned char mapping[256]={0,1}; + streams = 1; + coupled_streams = channels==2; + return (OpusDecoder *)opus_multistream_decoder_create(Fs, channels, streams, coupled_streams, mapping, error); +} +#endif + +int main(int argc, char *argv[]) +{ + int err; + char *inFile, *outFile; + FILE *fin, *fout; + OpusEncoder *enc=NULL; + OpusDecoder *dec=NULL; + int args; + int len[2]; + int frame_size, channels; + opus_int32 bitrate_bps=0; + unsigned char *data[2]; + unsigned char *fbytes; + opus_int32 sampling_rate; + int use_vbr; + int max_payload_bytes; + int complexity; + int use_inbandfec; + int use_dtx; + int forcechannels; + int cvbr = 0; + int packet_loss_perc; + opus_int32 count=0, count_act=0; + int k; + opus_int32 skip=0; + int stop=0; + short *in, *out; + int application=OPUS_APPLICATION_AUDIO; + double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, nrg; + double tot_samples=0; + opus_uint64 tot_in, tot_out; + int bandwidth=-1; + const char *bandwidth_string; + int lost = 0, lost_prev = 1; + int toggle = 0; + opus_uint32 enc_final_range[2]; + opus_uint32 dec_final_range; + int encode_only=0, decode_only=0; + int max_frame_size = 960*6; + int curr_read=0; + int sweep_bps = 0; + int random_framesize=0, newsize=0, delayed_celt=0; + int sweep_max=0, sweep_min=0; + int random_fec=0; + const int (*mode_list)[4]=NULL; + int nb_modes_in_list=0; + int curr_mode=0; + int curr_mode_count=0; + int mode_switch_time = 48000; + int nb_encoded=0; + int remaining=0; + int variable_duration=OPUS_FRAMESIZE_ARG; + int delayed_decision=0; + + if (argc < 5 ) + { + print_usage( argv ); + return EXIT_FAILURE; + } + + tot_in=tot_out=0; + fprintf(stderr, "%s\n", opus_get_version_string()); + + args = 1; + if (strcmp(argv[args], "-e")==0) + { + encode_only = 1; + args++; + } else if (strcmp(argv[args], "-d")==0) + { + decode_only = 1; + args++; + } + if (!decode_only && argc < 7 ) + { + print_usage( argv ); + return EXIT_FAILURE; + } + + if (!decode_only) + { + if (strcmp(argv[args], "voip")==0) + application = OPUS_APPLICATION_VOIP; + else if (strcmp(argv[args], "restricted-lowdelay")==0) + application = OPUS_APPLICATION_RESTRICTED_LOWDELAY; + else if (strcmp(argv[args], "audio")!=0) { + fprintf(stderr, "unknown application: %s\n", argv[args]); + print_usage(argv); + return EXIT_FAILURE; + } + args++; + } + sampling_rate = (opus_int32)atol(argv[args]); + args++; + + if (sampling_rate != 8000 && sampling_rate != 12000 + && sampling_rate != 16000 && sampling_rate != 24000 + && sampling_rate != 48000) + { + fprintf(stderr, "Supported sampling rates are 8000, 12000, " + "16000, 24000 and 48000.\n"); + return EXIT_FAILURE; + } + frame_size = sampling_rate/50; + + channels = atoi(argv[args]); + args++; + + if (channels < 1 || channels > 2) + { + fprintf(stderr, "Opus_demo supports only 1 or 2 channels.\n"); + return EXIT_FAILURE; + } + + if (!decode_only) + { + bitrate_bps = (opus_int32)atol(argv[args]); + args++; + } + + /* defaults: */ + use_vbr = 1; + bandwidth = OPUS_AUTO; + max_payload_bytes = MAX_PACKET; + complexity = 10; + use_inbandfec = 0; + forcechannels = OPUS_AUTO; + use_dtx = 0; + packet_loss_perc = 0; + max_frame_size = 2*48000; + curr_read=0; + + while( args < argc - 2 ) { + /* process command line options */ + if( strcmp( argv[ args ], "-cbr" ) == 0 ) { + check_encoder_option(decode_only, "-cbr"); + use_vbr = 0; + args++; + } else if( strcmp( argv[ args ], "-bandwidth" ) == 0 ) { + check_encoder_option(decode_only, "-bandwidth"); + if (strcmp(argv[ args + 1 ], "NB")==0) + bandwidth = OPUS_BANDWIDTH_NARROWBAND; + else if (strcmp(argv[ args + 1 ], "MB")==0) + bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; + else if (strcmp(argv[ args + 1 ], "WB")==0) + bandwidth = OPUS_BANDWIDTH_WIDEBAND; + else if (strcmp(argv[ args + 1 ], "SWB")==0) + bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; + else if (strcmp(argv[ args + 1 ], "FB")==0) + bandwidth = OPUS_BANDWIDTH_FULLBAND; + else { + fprintf(stderr, "Unknown bandwidth %s. " + "Supported are NB, MB, WB, SWB, FB.\n", + argv[ args + 1 ]); + return EXIT_FAILURE; + } + args += 2; + } else if( strcmp( argv[ args ], "-framesize" ) == 0 ) { + check_encoder_option(decode_only, "-framesize"); + if (strcmp(argv[ args + 1 ], "2.5")==0) + frame_size = sampling_rate/400; + else if (strcmp(argv[ args + 1 ], "5")==0) + frame_size = sampling_rate/200; + else if (strcmp(argv[ args + 1 ], "10")==0) + frame_size = sampling_rate/100; + else if (strcmp(argv[ args + 1 ], "20")==0) + frame_size = sampling_rate/50; + else if (strcmp(argv[ args + 1 ], "40")==0) + frame_size = sampling_rate/25; + else if (strcmp(argv[ args + 1 ], "60")==0) + frame_size = 3*sampling_rate/50; + else { + fprintf(stderr, "Unsupported frame size: %s ms. " + "Supported are 2.5, 5, 10, 20, 40, 60.\n", + argv[ args + 1 ]); + return EXIT_FAILURE; + } + args += 2; + } else if( strcmp( argv[ args ], "-max_payload" ) == 0 ) { + check_encoder_option(decode_only, "-max_payload"); + max_payload_bytes = atoi( argv[ args + 1 ] ); + args += 2; + } else if( strcmp( argv[ args ], "-complexity" ) == 0 ) { + check_encoder_option(decode_only, "-complexity"); + complexity = atoi( argv[ args + 1 ] ); + args += 2; + } else if( strcmp( argv[ args ], "-inbandfec" ) == 0 ) { + use_inbandfec = 1; + args++; + } else if( strcmp( argv[ args ], "-forcemono" ) == 0 ) { + check_encoder_option(decode_only, "-forcemono"); + forcechannels = 1; + args++; + } else if( strcmp( argv[ args ], "-cvbr" ) == 0 ) { + check_encoder_option(decode_only, "-cvbr"); + cvbr = 1; + args++; + } else if( strcmp( argv[ args ], "-variable-duration" ) == 0 ) { + check_encoder_option(decode_only, "-variable-duration"); + variable_duration = OPUS_FRAMESIZE_VARIABLE; + args++; + } else if( strcmp( argv[ args ], "-delayed-decision" ) == 0 ) { + check_encoder_option(decode_only, "-delayed-decision"); + delayed_decision = 1; + args++; + } else if( strcmp( argv[ args ], "-dtx") == 0 ) { + check_encoder_option(decode_only, "-dtx"); + use_dtx = 1; + args++; + } else if( strcmp( argv[ args ], "-loss" ) == 0 ) { + packet_loss_perc = atoi( argv[ args + 1 ] ); + args += 2; + } else if( strcmp( argv[ args ], "-sweep" ) == 0 ) { + check_encoder_option(decode_only, "-sweep"); + sweep_bps = atoi( argv[ args + 1 ] ); + args += 2; + } else if( strcmp( argv[ args ], "-random_framesize" ) == 0 ) { + check_encoder_option(decode_only, "-random_framesize"); + random_framesize = 1; + args++; + } else if( strcmp( argv[ args ], "-sweep_max" ) == 0 ) { + check_encoder_option(decode_only, "-sweep_max"); + sweep_max = atoi( argv[ args + 1 ] ); + args += 2; + } else if( strcmp( argv[ args ], "-random_fec" ) == 0 ) { + check_encoder_option(decode_only, "-random_fec"); + random_fec = 1; + args++; + } else if( strcmp( argv[ args ], "-silk8k_test" ) == 0 ) { + check_encoder_option(decode_only, "-silk8k_test"); + mode_list = silk8_test; + nb_modes_in_list = 8; + args++; + } else if( strcmp( argv[ args ], "-silk12k_test" ) == 0 ) { + check_encoder_option(decode_only, "-silk12k_test"); + mode_list = silk12_test; + nb_modes_in_list = 8; + args++; + } else if( strcmp( argv[ args ], "-silk16k_test" ) == 0 ) { + check_encoder_option(decode_only, "-silk16k_test"); + mode_list = silk16_test; + nb_modes_in_list = 8; + args++; + } else if( strcmp( argv[ args ], "-hybrid24k_test" ) == 0 ) { + check_encoder_option(decode_only, "-hybrid24k_test"); + mode_list = hybrid24_test; + nb_modes_in_list = 4; + args++; + } else if( strcmp( argv[ args ], "-hybrid48k_test" ) == 0 ) { + check_encoder_option(decode_only, "-hybrid48k_test"); + mode_list = hybrid48_test; + nb_modes_in_list = 4; + args++; + } else if( strcmp( argv[ args ], "-celt_test" ) == 0 ) { + check_encoder_option(decode_only, "-celt_test"); + mode_list = celt_test; + nb_modes_in_list = 32; + args++; + } else if( strcmp( argv[ args ], "-celt_hq_test" ) == 0 ) { + check_encoder_option(decode_only, "-celt_hq_test"); + mode_list = celt_hq_test; + nb_modes_in_list = 4; + args++; + } else { + printf( "Error: unrecognized setting: %s\n\n", argv[ args ] ); + print_usage( argv ); + return EXIT_FAILURE; + } + } + + if (sweep_max) + sweep_min = bitrate_bps; + + if (max_payload_bytes < 0 || max_payload_bytes > MAX_PACKET) + { + fprintf (stderr, "max_payload_bytes must be between 0 and %d\n", + MAX_PACKET); + return EXIT_FAILURE; + } + + inFile = argv[argc-2]; + fin = fopen(inFile, "rb"); + if (!fin) + { + fprintf (stderr, "Could not open input file %s\n", argv[argc-2]); + return EXIT_FAILURE; + } + if (mode_list) + { + int size; + fseek(fin, 0, SEEK_END); + size = ftell(fin); + fprintf(stderr, "File size is %d bytes\n", size); + fseek(fin, 0, SEEK_SET); + mode_switch_time = size/sizeof(short)/channels/nb_modes_in_list; + fprintf(stderr, "Switching mode every %d samples\n", mode_switch_time); + } + + outFile = argv[argc-1]; + fout = fopen(outFile, "wb+"); + if (!fout) + { + fprintf (stderr, "Could not open output file %s\n", argv[argc-1]); + fclose(fin); + return EXIT_FAILURE; + } + + if (!decode_only) + { + enc = opus_encoder_create(sampling_rate, channels, application, &err); + if (err != OPUS_OK) + { + fprintf(stderr, "Cannot create encoder: %s\n", opus_strerror(err)); + fclose(fin); + fclose(fout); + return EXIT_FAILURE; + } + opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps)); + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(bandwidth)); + opus_encoder_ctl(enc, OPUS_SET_VBR(use_vbr)); + opus_encoder_ctl(enc, OPUS_SET_VBR_CONSTRAINT(cvbr)); + opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(complexity)); + opus_encoder_ctl(enc, OPUS_SET_INBAND_FEC(use_inbandfec)); + opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(forcechannels)); + opus_encoder_ctl(enc, OPUS_SET_DTX(use_dtx)); + opus_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC(packet_loss_perc)); + + opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&skip)); + opus_encoder_ctl(enc, OPUS_SET_LSB_DEPTH(16)); + opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration)); + } + if (!encode_only) + { + dec = opus_decoder_create(sampling_rate, channels, &err); + if (err != OPUS_OK) + { + fprintf(stderr, "Cannot create decoder: %s\n", opus_strerror(err)); + fclose(fin); + fclose(fout); + return EXIT_FAILURE; + } + } + + + switch(bandwidth) + { + case OPUS_BANDWIDTH_NARROWBAND: + bandwidth_string = "narrowband"; + break; + case OPUS_BANDWIDTH_MEDIUMBAND: + bandwidth_string = "mediumband"; + break; + case OPUS_BANDWIDTH_WIDEBAND: + bandwidth_string = "wideband"; + break; + case OPUS_BANDWIDTH_SUPERWIDEBAND: + bandwidth_string = "superwideband"; + break; + case OPUS_BANDWIDTH_FULLBAND: + bandwidth_string = "fullband"; + break; + case OPUS_AUTO: + bandwidth_string = "auto"; + break; + default: + bandwidth_string = "unknown"; + break; + } + + if (decode_only) + fprintf(stderr, "Decoding with %ld Hz output (%d channels)\n", + (long)sampling_rate, channels); + else + fprintf(stderr, "Encoding %ld Hz input at %.3f kb/s " + "in %s mode with %d-sample frames.\n", + (long)sampling_rate, bitrate_bps*0.001, + bandwidth_string, frame_size); + + in = (short*)malloc(max_frame_size*channels*sizeof(short)); + out = (short*)malloc(max_frame_size*channels*sizeof(short)); + fbytes = (unsigned char*)malloc(max_frame_size*channels*sizeof(short)); + data[0] = (unsigned char*)calloc(max_payload_bytes,sizeof(char)); + if ( use_inbandfec ) { + data[1] = (unsigned char*)calloc(max_payload_bytes,sizeof(char)); + } + if(delayed_decision) + { + if (variable_duration!=OPUS_FRAMESIZE_VARIABLE) + { + if (frame_size==sampling_rate/400) + variable_duration = OPUS_FRAMESIZE_2_5_MS; + else if (frame_size==sampling_rate/200) + variable_duration = OPUS_FRAMESIZE_5_MS; + else if (frame_size==sampling_rate/100) + variable_duration = OPUS_FRAMESIZE_10_MS; + else if (frame_size==sampling_rate/50) + variable_duration = OPUS_FRAMESIZE_20_MS; + else if (frame_size==sampling_rate/25) + variable_duration = OPUS_FRAMESIZE_40_MS; + else + variable_duration = OPUS_FRAMESIZE_60_MS; + opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(variable_duration)); + } + frame_size = 2*48000; + } + while (!stop) + { + if (delayed_celt) + { + frame_size = newsize; + delayed_celt = 0; + } else if (random_framesize && rand()%20==0) + { + newsize = rand()%6; + switch(newsize) + { + case 0: newsize=sampling_rate/400; break; + case 1: newsize=sampling_rate/200; break; + case 2: newsize=sampling_rate/100; break; + case 3: newsize=sampling_rate/50; break; + case 4: newsize=sampling_rate/25; break; + case 5: newsize=3*sampling_rate/50; break; + } + while (newsize < sampling_rate/25 && bitrate_bps-fabs(sweep_bps) <= 3*12*sampling_rate/newsize) + newsize*=2; + if (newsize < sampling_rate/100 && frame_size >= sampling_rate/100) + { + opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); + delayed_celt=1; + } else { + frame_size = newsize; + } + } + if (random_fec && rand()%30==0) + { + opus_encoder_ctl(enc, OPUS_SET_INBAND_FEC(rand()%4==0)); + } + if (decode_only) + { + unsigned char ch[4]; + err = fread(ch, 1, 4, fin); + if (feof(fin)) + break; + len[toggle] = char_to_int(ch); + if (len[toggle]>max_payload_bytes || len[toggle]<0) + { + fprintf(stderr, "Invalid payload length: %d\n",len[toggle]); + break; + } + err = fread(ch, 1, 4, fin); + enc_final_range[toggle] = char_to_int(ch); + err = fread(data[toggle], 1, len[toggle], fin); + if (err<len[toggle]) + { + fprintf(stderr, "Ran out of input, " + "expecting %d bytes got %d\n", + len[toggle],err); + break; + } + } else { + int i; + if (mode_list!=NULL) + { + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(mode_list[curr_mode][1])); + opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(mode_list[curr_mode][0])); + opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(mode_list[curr_mode][3])); + frame_size = mode_list[curr_mode][2]; + } + err = fread(fbytes, sizeof(short)*channels, frame_size-remaining, fin); + curr_read = err; + tot_in += curr_read; + for(i=0;i<curr_read*channels;i++) + { + opus_int32 s; + s=fbytes[2*i+1]<<8|fbytes[2*i]; + s=((s&0xFFFF)^0x8000)-0x8000; + in[i+remaining*channels]=s; + } + if (curr_read+remaining < frame_size) + { + for (i=(curr_read+remaining)*channels;i<frame_size*channels;i++) + in[i] = 0; + if (encode_only || decode_only) + stop = 1; + } + len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes); + nb_encoded = opus_packet_get_samples_per_frame(data[toggle], sampling_rate)*opus_packet_get_nb_frames(data[toggle], len[toggle]); + remaining = frame_size-nb_encoded; + for(i=0;i<remaining*channels;i++) + in[i] = in[nb_encoded*channels+i]; + if (sweep_bps!=0) + { + bitrate_bps += sweep_bps; + if (sweep_max) + { + if (bitrate_bps > sweep_max) + sweep_bps = -sweep_bps; + else if (bitrate_bps < sweep_min) + sweep_bps = -sweep_bps; + } + /* safety */ + if (bitrate_bps<1000) + bitrate_bps = 1000; + opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps)); + } + opus_encoder_ctl(enc, OPUS_GET_FINAL_RANGE(&enc_final_range[toggle])); + if (len[toggle] < 0) + { + fprintf (stderr, "opus_encode() returned %d\n", len[toggle]); + fclose(fin); + fclose(fout); + return EXIT_FAILURE; + } + curr_mode_count += frame_size; + if (curr_mode_count > mode_switch_time && curr_mode < nb_modes_in_list-1) + { + curr_mode++; + curr_mode_count = 0; + } + } + +#if 0 /* This is for testing the padding code, do not enable by default */ + if (len[toggle]<1275) + { + int new_len = len[toggle]+rand()%(max_payload_bytes-len[toggle]); + if ((err = opus_packet_pad(data[toggle], len[toggle], new_len)) != OPUS_OK) + { + fprintf(stderr, "padding failed: %s\n", opus_strerror(err)); + return EXIT_FAILURE; + } + len[toggle] = new_len; + } +#endif + if (encode_only) + { + unsigned char int_field[4]; + int_to_char(len[toggle], int_field); + if (fwrite(int_field, 1, 4, fout) != 4) { + fprintf(stderr, "Error writing.\n"); + return EXIT_FAILURE; + } + int_to_char(enc_final_range[toggle], int_field); + if (fwrite(int_field, 1, 4, fout) != 4) { + fprintf(stderr, "Error writing.\n"); + return EXIT_FAILURE; + } + if (fwrite(data[toggle], 1, len[toggle], fout) != (unsigned)len[toggle]) { + fprintf(stderr, "Error writing.\n"); + return EXIT_FAILURE; + } + tot_samples += nb_encoded; + } else { + int output_samples; + lost = len[toggle]==0 || (packet_loss_perc>0 && rand()%100 < packet_loss_perc); + if (lost) + opus_decoder_ctl(dec, OPUS_GET_LAST_PACKET_DURATION(&output_samples)); + else + output_samples = max_frame_size; + if( count >= use_inbandfec ) { + /* delay by one packet when using in-band FEC */ + if( use_inbandfec ) { + if( lost_prev ) { + /* attempt to decode with in-band FEC from next packet */ + opus_decoder_ctl(dec, OPUS_GET_LAST_PACKET_DURATION(&output_samples)); + output_samples = opus_decode(dec, lost ? NULL : data[toggle], len[toggle], out, output_samples, 1); + } else { + /* regular decode */ + output_samples = max_frame_size; + output_samples = opus_decode(dec, data[1-toggle], len[1-toggle], out, output_samples, 0); + } + } else { + output_samples = opus_decode(dec, lost ? NULL : data[toggle], len[toggle], out, output_samples, 0); + } + if (output_samples>0) + { + if (!decode_only && tot_out + output_samples > tot_in) + { + stop=1; + output_samples = tot_in-tot_out; + } + if (output_samples>skip) { + int i; + for(i=0;i<(output_samples-skip)*channels;i++) + { + short s; + s=out[i+(skip*channels)]; + fbytes[2*i]=s&0xFF; + fbytes[2*i+1]=(s>>8)&0xFF; + } + if (fwrite(fbytes, sizeof(short)*channels, output_samples-skip, fout) != (unsigned)(output_samples-skip)){ + fprintf(stderr, "Error writing.\n"); + return EXIT_FAILURE; + } + tot_out += output_samples-skip; + } + if (output_samples<skip) skip -= output_samples; + else skip = 0; + } else { + fprintf(stderr, "error decoding frame: %s\n", + opus_strerror(output_samples)); + } + tot_samples += output_samples; + } + } + + if (!encode_only) + opus_decoder_ctl(dec, OPUS_GET_FINAL_RANGE(&dec_final_range)); + /* compare final range encoder rng values of encoder and decoder */ + if( enc_final_range[toggle^use_inbandfec]!=0 && !encode_only + && !lost && !lost_prev + && dec_final_range != enc_final_range[toggle^use_inbandfec] ) { + fprintf (stderr, "Error: Range coder state mismatch " + "between encoder and decoder " + "in frame %ld: 0x%8lx vs 0x%8lx\n", + (long)count, + (unsigned long)enc_final_range[toggle^use_inbandfec], + (unsigned long)dec_final_range); + fclose(fin); + fclose(fout); + return EXIT_FAILURE; + } + + lost_prev = lost; + + /* count bits */ + bits += len[toggle]*8; + bits_max = ( len[toggle]*8 > bits_max ) ? len[toggle]*8 : bits_max; + if( count >= use_inbandfec ) { + nrg = 0.0; + if (!decode_only) + { + for ( k = 0; k < frame_size * channels; k++ ) { + nrg += in[ k ] * (double)in[ k ]; + } + } + if ( ( nrg / ( frame_size * channels ) ) > 1e5 ) { + bits_act += len[toggle]*8; + count_act++; + } + /* Variance */ + bits2 += len[toggle]*len[toggle]*64; + } + count++; + toggle = (toggle + use_inbandfec) & 1; + } + fprintf (stderr, "average bitrate: %7.3f kb/s\n", + 1e-3*bits*sampling_rate/tot_samples); + fprintf (stderr, "maximum bitrate: %7.3f kb/s\n", + 1e-3*bits_max*sampling_rate/frame_size); + if (!decode_only) + fprintf (stderr, "active bitrate: %7.3f kb/s\n", + 1e-3*bits_act*sampling_rate/(frame_size*(double)count_act)); + fprintf (stderr, "bitrate standard deviation: %7.3f kb/s\n", + 1e-3*sqrt(bits2/count - bits*bits/(count*(double)count))*sampling_rate/frame_size); + /* Close any files to which intermediate results were stored */ + SILK_DEBUG_STORE_CLOSE_FILES + silk_TimerSave("opus_timing.txt"); + opus_encoder_destroy(enc); + opus_decoder_destroy(dec); + free(data[0]); + if (use_inbandfec) + free(data[1]); + fclose(fin); + fclose(fout); + free(in); + free(out); + free(fbytes); + return EXIT_SUCCESS; +} diff --git a/drivers/opus/opus_encoder.c b/drivers/opus/opus_encoder.c new file mode 100644 index 0000000000..f739daa258 --- /dev/null +++ b/drivers/opus/opus_encoder.c @@ -0,0 +1,2488 @@ +/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited + Written by Jean-Marc Valin and Koen Vos */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include <stdarg.h> +#include "celt.h" +#include "entenc.h" +#include "opus_modes.h" +#include "API.h" +#include "stack_alloc.h" +#include "float_cast.h" +#include "opus.h" +#include "arch.h" +#include "opus_private.h" +#include "os_support.h" +#include "cpu_support.h" +#include "analysis.h" +#include "mathops.h" +#include "tuning_parameters.h" +#ifdef OPUS_FIXED_POINT +#include "fixed/structs_FIX.h" +#else +#include "float/structs_FLP.h" +#endif + +#define MAX_ENCODER_BUFFER 480 + +typedef struct { + opus_val32 XX, XY, YY; + opus_val16 smoothed_width; + opus_val16 max_follower; +} StereoWidthState; + +struct OpusEncoder { + int celt_enc_offset; + int silk_enc_offset; + silk_EncControlStruct silk_mode; + int application; + int channels; + int delay_compensation; + int force_channels; + int signal_type; + int user_bandwidth; + int max_bandwidth; + int user_forced_mode; + int voice_ratio; + opus_int32 Fs; + int use_vbr; + int vbr_constraint; + int variable_duration; + opus_int32 bitrate_bps; + opus_int32 user_bitrate_bps; + int lsb_depth; + int encoder_buffer; + int lfe; + +#define OPUS_ENCODER_RESET_START stream_channels + int stream_channels; + opus_int16 hybrid_stereo_width_Q14; + opus_int32 variable_HP_smth2_Q15; + opus_val16 prev_HB_gain; + opus_val32 hp_mem[4]; + int mode; + int prev_mode; + int prev_channels; + int prev_framesize; + int bandwidth; + int silk_bw_switch; + /* Sampling rate (at the API level) */ + int first; + opus_val16 * energy_masking; + StereoWidthState width_mem; + opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; +#ifndef DISABLE_FLOAT_API + TonalityAnalysisState analysis; + int detected_bandwidth; + int analysis_offset; +#endif + opus_uint32 rangeFinal; + int arch; +}; + +/* Transition tables for the voice and music. First column is the + middle (memoriless) threshold. The second column is the hysteresis + (difference with the middle) */ +static const opus_int32 mono_voice_bandwidth_thresholds[8] = { + 11000, 1000, /* NB<->MB */ + 14000, 1000, /* MB<->WB */ + 17000, 1000, /* WB<->SWB */ + 21000, 2000, /* SWB<->FB */ +}; +static const opus_int32 mono_music_bandwidth_thresholds[8] = { + 12000, 1000, /* NB<->MB */ + 15000, 1000, /* MB<->WB */ + 18000, 2000, /* WB<->SWB */ + 22000, 2000, /* SWB<->FB */ +}; +static const opus_int32 stereo_voice_bandwidth_thresholds[8] = { + 11000, 1000, /* NB<->MB */ + 14000, 1000, /* MB<->WB */ + 21000, 2000, /* WB<->SWB */ + 28000, 2000, /* SWB<->FB */ +}; +static const opus_int32 stereo_music_bandwidth_thresholds[8] = { + 12000, 1000, /* NB<->MB */ + 18000, 2000, /* MB<->WB */ + 21000, 2000, /* WB<->SWB */ + 30000, 2000, /* SWB<->FB */ +}; +/* Threshold bit-rates for switching between mono and stereo */ +static const opus_int32 stereo_voice_threshold = 30000; +static const opus_int32 stereo_music_threshold = 30000; + +/* Threshold bit-rate for switching between SILK/hybrid and CELT-only */ +static const opus_int32 mode_thresholds[2][2] = { + /* voice */ /* music */ + { 64000, 16000}, /* mono */ + { 36000, 16000}, /* stereo */ +}; + +int opus_encoder_get_size(int channels) +{ + int silkEncSizeBytes, celtEncSizeBytes; + int ret; + if (channels<1 || channels > 2) + return 0; + ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); + if (ret) + return 0; + silkEncSizeBytes = align(silkEncSizeBytes); + celtEncSizeBytes = celt_encoder_get_size(channels); + return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes; +} + +int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application) +{ + void *silk_enc; + CELTEncoder *celt_enc; + int err; + int ret, silkEncSizeBytes; + + if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| + (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO + && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) + return OPUS_BAD_ARG; + + OPUS_CLEAR((char*)st, opus_encoder_get_size(channels)); + /* Create SILK encoder */ + ret = silk_Get_Encoder_Size( &silkEncSizeBytes ); + if (ret) + return OPUS_BAD_ARG; + silkEncSizeBytes = align(silkEncSizeBytes); + st->silk_enc_offset = align(sizeof(OpusEncoder)); + st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes; + silk_enc = (char*)st+st->silk_enc_offset; + celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); + + st->stream_channels = st->channels = channels; + + st->Fs = Fs; + + st->arch = opus_select_arch(); + + ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode ); + if(ret)return OPUS_INTERNAL_ERROR; + + /* default SILK parameters */ + st->silk_mode.nChannelsAPI = channels; + st->silk_mode.nChannelsInternal = channels; + st->silk_mode.API_sampleRate = st->Fs; + st->silk_mode.maxInternalSampleRate = 16000; + st->silk_mode.minInternalSampleRate = 8000; + st->silk_mode.desiredInternalSampleRate = 16000; + st->silk_mode.payloadSize_ms = 20; + st->silk_mode.bitRate = 25000; + st->silk_mode.packetLossPercentage = 0; + st->silk_mode.complexity = 9; + st->silk_mode.useInBandFEC = 0; + st->silk_mode.useDTX = 0; + st->silk_mode.useCBR = 0; + st->silk_mode.reducedDependency = 0; + + /* Create CELT encoder */ + /* Initialize CELT encoder */ + err = celt_encoder_init(celt_enc, Fs, channels, st->arch); + if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR; + + celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0)); + celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity)); + + st->use_vbr = 1; + /* Makes constrained VBR the default (safer for real-time use) */ + st->vbr_constraint = 1; + st->user_bitrate_bps = OPUS_AUTO; + st->bitrate_bps = 3000+Fs*channels; + st->application = application; + st->signal_type = OPUS_AUTO; + st->user_bandwidth = OPUS_AUTO; + st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND; + st->force_channels = OPUS_AUTO; + st->user_forced_mode = OPUS_AUTO; + st->voice_ratio = -1; + st->encoder_buffer = st->Fs/100; + st->lsb_depth = 24; + st->variable_duration = OPUS_FRAMESIZE_ARG; + + /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead + + 1.5 ms for SILK resamplers and stereo prediction) */ + st->delay_compensation = st->Fs/250; + + st->hybrid_stereo_width_Q14 = 1 << 14; + st->prev_HB_gain = Q15ONE; + st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); + st->first = 1; + st->mode = MODE_HYBRID; + st->bandwidth = OPUS_BANDWIDTH_FULLBAND; + + return OPUS_OK; +} + +static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels) +{ + int period; + unsigned char toc; + period = 0; + while (framerate < 400) + { + framerate <<= 1; + period++; + } + if (mode == MODE_SILK_ONLY) + { + toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5; + toc |= (period-2)<<3; + } else if (mode == MODE_CELT_ONLY) + { + int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND; + if (tmp < 0) + tmp = 0; + toc = 0x80; + toc |= tmp << 5; + toc |= period<<3; + } else /* Hybrid */ + { + toc = 0x60; + toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4; + toc |= (period-2)<<3; + } + toc |= (channels==2)<<2; + return toc; +} + +#ifndef OPUS_FIXED_POINT +static void silk_biquad_float( + const opus_val16 *in, /* I: Input signal */ + const opus_int32 *B_Q28, /* I: MA coefficients [3] */ + const opus_int32 *A_Q28, /* I: AR coefficients [2] */ + opus_val32 *S, /* I/O: State vector [2] */ + opus_val16 *out, /* O: Output signal */ + const opus_int32 len, /* I: Signal length (must be even) */ + int stride +) +{ + /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ + opus_int k; + opus_val32 vout; + opus_val32 inval; + opus_val32 A[2], B[3]; + + A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28))); + A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28))); + B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28))); + B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28))); + B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28))); + + /* Negate A_Q28 values and split in two parts */ + + for( k = 0; k < len; k++ ) { + /* S[ 0 ], S[ 1 ]: Q12 */ + inval = in[ k*stride ]; + vout = S[ 0 ] + B[0]*inval; + + S[ 0 ] = S[1] - vout*A[0] + B[1]*inval; + + S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL; + + /* Scale back to Q0 and saturate */ + out[ k*stride ] = vout; + } +} +#endif + +static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) +{ + opus_int32 B_Q28[ 3 ], A_Q28[ 2 ]; + opus_int32 Fc_Q19, r_Q28, r_Q22; + + silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) ); + Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 ); + silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 ); + + r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 ); + + /* b = r * [ 1; -2; 1 ]; */ + /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */ + B_Q28[ 0 ] = r_Q28; + B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 ); + B_Q28[ 2 ] = r_Q28; + + /* -r * ( 2 - Fc * Fc ); */ + r_Q22 = silk_RSHIFT( r_Q28, 6 ); + A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0, 22 ) ); + A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 ); + +#ifdef OPUS_FIXED_POINT + silk_biquad_alt( in, B_Q28, A_Q28, hp_mem, out, len, channels ); + if( channels == 2 ) { + silk_biquad_alt( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); + } +#else + silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels ); + if( channels == 2 ) { + silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels ); + } +#endif +} + +#ifdef OPUS_FIXED_POINT +static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) +{ + int c, i; + int shift; + + /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */ + shift=celt_ilog2(Fs/(cutoff_Hz*3)); + for (c=0;c<channels;c++) + { + for (i=0;i<len;i++) + { + opus_val32 x, tmp, y; + x = SHL32(EXTEND32(in[channels*i+c]), 15); + /* First stage */ + tmp = x-hp_mem[2*c]; + hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift); + /* Second stage */ + y = tmp - hp_mem[2*c+1]; + hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift); + out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767)); + } + } +} + +#else +static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) +{ + int c, i; + float coef; + + coef = 4.0f*cutoff_Hz/Fs; + for (c=0;c<channels;c++) + { + for (i=0;i<len;i++) + { + opus_val32 x, tmp, y; + x = in[channels*i+c]; + /* First stage */ + tmp = x-hp_mem[2*c]; + hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]) + VERY_SMALL; + /* Second stage */ + y = tmp - hp_mem[2*c+1]; + hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]) + VERY_SMALL; + out[channels*i+c] = y; + } + } +} +#endif + +static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, + int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) +{ + int i; + int overlap; + int inc; + inc = 48000/Fs; + overlap=overlap48/inc; + g1 = Q15ONE-g1; + g2 = Q15ONE-g2; + for (i=0;i<overlap;i++) + { + opus_val32 diff; + opus_val16 g, w; + w = MULT16_16_Q15(window[i*inc], window[i*inc]); + g = SHR32(MAC16_16(MULT16_16(w,g2), + Q15ONE-w, g1), 15); + diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1])); + diff = MULT16_16_Q15(g, diff); + out[i*channels] = out[i*channels] - diff; + out[i*channels+1] = out[i*channels+1] + diff; + } + for (;i<frame_size;i++) + { + opus_val32 diff; + diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1])); + diff = MULT16_16_Q15(g2, diff); + out[i*channels] = out[i*channels] - diff; + out[i*channels+1] = out[i*channels+1] + diff; + } +} + +static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, + int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) +{ + int i; + int inc; + int overlap; + int c; + inc = 48000/Fs; + overlap=overlap48/inc; + if (channels==1) + { + for (i=0;i<overlap;i++) + { + opus_val16 g, w; + w = MULT16_16_Q15(window[i*inc], window[i*inc]); + g = SHR32(MAC16_16(MULT16_16(w,g2), + Q15ONE-w, g1), 15); + out[i] = MULT16_16_Q15(g, in[i]); + } + } else { + for (i=0;i<overlap;i++) + { + opus_val16 g, w; + w = MULT16_16_Q15(window[i*inc], window[i*inc]); + g = SHR32(MAC16_16(MULT16_16(w,g2), + Q15ONE-w, g1), 15); + out[i*2] = MULT16_16_Q15(g, in[i*2]); + out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]); + } + } + c=0;do { + for (i=overlap;i<frame_size;i++) + { + out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]); + } + } + while (++c<channels); +} + +OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error) +{ + int ret; + OpusEncoder *st; + if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)|| + (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO + && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)) + { + if (error) + *error = OPUS_BAD_ARG; + return NULL; + } + st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels)); + if (st == NULL) + { + if (error) + *error = OPUS_ALLOC_FAIL; + return NULL; + } + ret = opus_encoder_init(st, Fs, channels, application); + if (error) + *error = ret; + if (ret != OPUS_OK) + { + opus_free(st); + st = NULL; + } + return st; +} + +static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes) +{ + if(!frame_size)frame_size=st->Fs/400; + if (st->user_bitrate_bps==OPUS_AUTO) + return 60*st->Fs/frame_size + st->Fs*st->channels; + else if (st->user_bitrate_bps==OPUS_BITRATE_MAX) + return max_data_bytes*8*st->Fs/frame_size; + else + return st->user_bitrate_bps; +} + +#ifndef DISABLE_FLOAT_API +/* Don't use more than 60 ms for the frame size analysis */ +#define MAX_DYNAMIC_FRAMESIZE 24 +/* Estimates how much the bitrate will be boosted based on the sub-frame energy */ +static float transient_boost(const float *E, const float *E_1, int LM, int maxM) +{ + int i; + int M; + float sumE=0, sumE_1=0; + float metric; + + M = IMIN(maxM, (1<<LM)+1); + for (i=0;i<M;i++) + { + sumE += E[i]; + sumE_1 += E_1[i]; + } + metric = sumE*sumE_1/(M*M); + /*if (LM==3) + printf("%f\n", metric);*/ + /*return metric>10 ? 1 : 0;*/ + /*return MAX16(0,1-exp(-.25*(metric-2.)));*/ + return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2)))); +} + +/* Viterbi decoding trying to find the best frame size combination using look-ahead + + State numbering: + 0: unused + 1: 2.5 ms + 2: 5 ms (#1) + 3: 5 ms (#2) + 4: 10 ms (#1) + 5: 10 ms (#2) + 6: 10 ms (#3) + 7: 10 ms (#4) + 8: 20 ms (#1) + 9: 20 ms (#2) + 10: 20 ms (#3) + 11: 20 ms (#4) + 12: 20 ms (#5) + 13: 20 ms (#6) + 14: 20 ms (#7) + 15: 20 ms (#8) +*/ +static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate) +{ + int i; + float cost[MAX_DYNAMIC_FRAMESIZE][16]; + int states[MAX_DYNAMIC_FRAMESIZE][16]; + float best_cost; + int best_state; + float factor; + /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */ + if (rate<80) + factor=0; + else if (rate>160) + factor=1; + else + factor = (rate-80.f)/80.f; + /* Makes variable framesize less aggressive at lower bitrates, but I can't + find any valid theoretical justification for this (other than it seems + to help) */ + for (i=0;i<16;i++) + { + /* Impossible state */ + states[0][i] = -1; + cost[0][i] = 1e10; + } + for (i=0;i<4;i++) + { + cost[0][1<<i] = (frame_cost + rate*(1<<i))*(1+factor*transient_boost(E, E_1, i, N+1)); + states[0][1<<i] = i; + } + for (i=1;i<N;i++) + { + int j; + + /* Follow continuations */ + for (j=2;j<16;j++) + { + cost[i][j] = cost[i-1][j-1]; + states[i][j] = j-1; + } + + /* New frames */ + for(j=0;j<4;j++) + { + int k; + float min_cost; + float curr_cost; + states[i][1<<j] = 1; + min_cost = cost[i-1][1]; + for(k=1;k<4;k++) + { + float tmp = cost[i-1][(1<<(k+1))-1]; + if (tmp < min_cost) + { + states[i][1<<j] = (1<<(k+1))-1; + min_cost = tmp; + } + } + curr_cost = (frame_cost + rate*(1<<j))*(1+factor*transient_boost(E+i, E_1+i, j, N-i+1)); + cost[i][1<<j] = min_cost; + /* If part of the frame is outside the analysis window, only count part of the cost */ + if (N-i < (1<<j)) + cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j); + else + cost[i][1<<j] += curr_cost; + } + } + + best_state=1; + best_cost = cost[N-1][1]; + /* Find best end state (doesn't force a frame to end at N-1) */ + for (i=2;i<16;i++) + { + if (cost[N-1][i]<best_cost) + { + best_cost = cost[N-1][i]; + best_state = i; + } + } + + /* Follow transitions back */ + for (i=N-1;i>=0;i--) + { + /*printf("%d ", best_state);*/ + best_state = states[i][best_state]; + } + /*printf("%d\n", best_state);*/ + return best_state; +} + +int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, + int bitrate, opus_val16 tonality, float *mem, int buffering, + downmix_func downmix) +{ + int N; + int i; + float e[MAX_DYNAMIC_FRAMESIZE+4]; + float e_1[MAX_DYNAMIC_FRAMESIZE+3]; + opus_val32 memx; + int bestLM=0; + int subframe; + int pos; + VARDECL(opus_val32, sub); + + subframe = Fs/400; + ALLOC(sub, subframe, opus_val32); + e[0]=mem[0]; + e_1[0]=1.f/(EPSILON+mem[0]); + if (buffering) + { + /* Consider the CELT delay when not in restricted-lowdelay */ + /* We assume the buffering is between 2.5 and 5 ms */ + int offset = 2*subframe - buffering; + celt_assert(offset>=0 && offset <= subframe); + x += C*offset; + len -= offset; + e[1]=mem[1]; + e_1[1]=1.f/(EPSILON+mem[1]); + e[2]=mem[2]; + e_1[2]=1.f/(EPSILON+mem[2]); + pos = 3; + } else { + pos=1; + } + N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE); + /* Just silencing a warning, it's really initialized later */ + memx = 0; + for (i=0;i<N;i++) + { + float tmp; + opus_val32 tmpx; + int j; + tmp=EPSILON; + + downmix(x, sub, subframe, i*subframe, 0, -2, C); + if (i==0) + memx = sub[0]; + for (j=0;j<subframe;j++) + { + tmpx = sub[j]; + tmp += (tmpx-memx)*(float)(tmpx-memx); + memx = tmpx; + } + e[i+pos] = tmp; + e_1[i+pos] = 1.f/tmp; + } + /* Hack to get 20 ms working with APPLICATION_AUDIO + The real problem is that the corresponding memory needs to use 1.5 ms + from this frame and 1 ms from the next frame */ + e[i+pos] = e[i+pos-1]; + if (buffering) + N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2); + bestLM = transient_viterbi(e, e_1, N, (int)((1.f+.5f*tonality)*(60*C+40)), bitrate/400); + mem[0] = e[1<<bestLM]; + if (buffering) + { + mem[1] = e[(1<<bestLM)+1]; + mem[2] = e[(1<<bestLM)+2]; + } + return bestLM; +} + +#endif + +#ifndef DISABLE_FLOAT_API +#ifdef OPUS_FIXED_POINT +#define PCM2VAL(x) FLOAT2INT16(x) +#else +#define PCM2VAL(x) SCALEIN(x) +#endif +void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) +{ + const float *x; + opus_val32 scale; + int j; + x = (const float *)_x; + for (j=0;j<subframe;j++) + sub[j] = PCM2VAL(x[(j+offset)*C+c1]); + if (c2>-1) + { + for (j=0;j<subframe;j++) + sub[j] += PCM2VAL(x[(j+offset)*C+c2]); + } else if (c2==-2) + { + int c; + for (c=1;c<C;c++) + { + for (j=0;j<subframe;j++) + sub[j] += PCM2VAL(x[(j+offset)*C+c]); + } + } +#ifdef OPUS_FIXED_POINT + scale = (1<<SIG_SHIFT); +#else + scale = 1.f; +#endif + if (C==-2) + scale /= C; + else + scale /= 2; + for (j=0;j<subframe;j++) + sub[j] *= scale; +} +#endif + +void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) +{ + const opus_int16 *x; + opus_val32 scale; + int j; + x = (const opus_int16 *)_x; + for (j=0;j<subframe;j++) + sub[j] = x[(j+offset)*C+c1]; + if (c2>-1) + { + for (j=0;j<subframe;j++) + sub[j] += x[(j+offset)*C+c2]; + } else if (c2==-2) + { + int c; + for (c=1;c<C;c++) + { + for (j=0;j<subframe;j++) + sub[j] += x[(j+offset)*C+c]; + } + } +#ifdef OPUS_FIXED_POINT + scale = (1<<SIG_SHIFT); +#else + scale = 1.f/32768; +#endif + if (C==-2) + scale /= C; + else + scale /= 2; + for (j=0;j<subframe;j++) + sub[j] *= scale; +} + +opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs) +{ + int new_size; + if (frame_size<Fs/400) + return -1; + if (variable_duration == OPUS_FRAMESIZE_ARG) + new_size = frame_size; + else if (variable_duration == OPUS_FRAMESIZE_VARIABLE) + new_size = Fs/50; + else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS) + new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS)); + else + return -1; + if (new_size>frame_size) + return -1; + if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs && + 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs) + return -1; + return new_size; +} + +opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, + int variable_duration, int C, opus_int32 Fs, int bitrate_bps, + int delay_compensation, downmix_func downmix +#ifndef DISABLE_FLOAT_API + , float *subframe_mem +#endif + ) +{ +#ifndef DISABLE_FLOAT_API + if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) + { + int LM = 3; + LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps, + 0, subframe_mem, delay_compensation, downmix); + while ((Fs/400<<LM)>frame_size) + LM--; + frame_size = (Fs/400<<LM); + } else +#endif + { + frame_size = frame_size_select(frame_size, variable_duration, Fs); + } + if (frame_size<0) + return -1; + return frame_size; +} + +opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem) +{ + opus_val16 corr; + opus_val16 ldiff; + opus_val16 width; + opus_val32 xx, xy, yy; + opus_val16 sqrt_xx, sqrt_yy; + opus_val16 qrrt_xx, qrrt_yy; + int frame_rate; + int i; + opus_val16 short_alpha; + + frame_rate = Fs/frame_size; + short_alpha = Q15ONE - 25*Q15ONE/IMAX(50,frame_rate); + xx=xy=yy=0; + for (i=0;i<frame_size;i+=4) + { + opus_val32 pxx=0; + opus_val32 pxy=0; + opus_val32 pyy=0; + opus_val16 x, y; + x = pcm[2*i]; + y = pcm[2*i+1]; + pxx = SHR32(MULT16_16(x,x),2); + pxy = SHR32(MULT16_16(x,y),2); + pyy = SHR32(MULT16_16(y,y),2); + x = pcm[2*i+2]; + y = pcm[2*i+3]; + pxx += SHR32(MULT16_16(x,x),2); + pxy += SHR32(MULT16_16(x,y),2); + pyy += SHR32(MULT16_16(y,y),2); + x = pcm[2*i+4]; + y = pcm[2*i+5]; + pxx += SHR32(MULT16_16(x,x),2); + pxy += SHR32(MULT16_16(x,y),2); + pyy += SHR32(MULT16_16(y,y),2); + x = pcm[2*i+6]; + y = pcm[2*i+7]; + pxx += SHR32(MULT16_16(x,x),2); + pxy += SHR32(MULT16_16(x,y),2); + pyy += SHR32(MULT16_16(y,y),2); + + xx += SHR32(pxx, 10); + xy += SHR32(pxy, 10); + yy += SHR32(pyy, 10); + } + mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX); + mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY); + mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY); + mem->XX = MAX32(0, mem->XX); + mem->XY = MAX32(0, mem->XY); + mem->YY = MAX32(0, mem->YY); + if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18)) + { + sqrt_xx = celt_sqrt(mem->XX); + sqrt_yy = celt_sqrt(mem->YY); + qrrt_xx = celt_sqrt(sqrt_xx); + qrrt_yy = celt_sqrt(sqrt_yy); + /* Inter-channel correlation */ + mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy); + corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16); + /* Approximate loudness difference */ + ldiff = Q15ONE*ABS16(qrrt_xx-qrrt_yy)/(EPSILON+qrrt_xx+qrrt_yy); + width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff); + /* Smoothing over one second */ + mem->smoothed_width += (width-mem->smoothed_width)/frame_rate; + /* Peak follower */ + mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width); + } else { + width = 0; + corr=Q15ONE; + ldiff=0; + } + /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/ + return EXTRACT16(MIN32(Q15ONE,20*mem->max_follower)); +} + +opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, + unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix) +{ + void *silk_enc; + CELTEncoder *celt_enc; + int i; + int ret=0; + opus_int32 nBytes; + ec_enc enc; + int bytes_target; + int prefill=0; + int start_band = 0; + int redundancy = 0; + int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */ + int celt_to_silk = 0; + VARDECL(opus_val16, pcm_buf); + int nb_compr_bytes; + int to_celt = 0; + opus_uint32 redundant_rng = 0; + int cutoff_Hz, hp_freq_smth1; + int voice_est; /* Probability of voice in Q7 */ + opus_int32 equiv_rate; + int delay_compensation; + int frame_rate; + opus_int32 max_rate; /* Max bitrate we're allowed to use */ + int curr_bandwidth; + opus_val16 HB_gain; + opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */ + int total_buffer; + opus_val16 stereo_width; + const CELTMode *celt_mode; + AnalysisInfo analysis_info; + int analysis_read_pos_bak=-1; + int analysis_read_subframe_bak=-1; + VARDECL(opus_val16, tmp_prefill); + + ALLOC_STACK; + + max_data_bytes = IMIN(1276, out_data_bytes); + + st->rangeFinal = 0; + if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs && + 50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs) + || (400*frame_size < st->Fs) + || max_data_bytes<=0 + ) + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } + silk_enc = (char*)st+st->silk_enc_offset; + celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + + lsb_depth = IMIN(lsb_depth, st->lsb_depth); + + analysis_info.valid = 0; + celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); +#ifndef DISABLE_FLOAT_API +#ifdef OPUS_FIXED_POINT + if (st->silk_mode.complexity >= 10 && st->Fs==48000) +#else + if (st->silk_mode.complexity >= 7 && st->Fs==48000) +#endif + { + analysis_read_pos_bak = st->analysis.read_pos; + analysis_read_subframe_bak = st->analysis.read_subframe; + run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, + c1, c2, analysis_channels, st->Fs, + lsb_depth, downmix, &analysis_info); + } +#endif + + st->voice_ratio = -1; + +#ifndef DISABLE_FLOAT_API + st->detected_bandwidth = 0; + if (analysis_info.valid) + { + int analysis_bandwidth; + if (st->signal_type == OPUS_AUTO) + st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob)); + + analysis_bandwidth = analysis_info.bandwidth; + if (analysis_bandwidth<=12) + st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; + else if (analysis_bandwidth<=14) + st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; + else if (analysis_bandwidth<=16) + st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; + else if (analysis_bandwidth<=18) + st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; + else + st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; + } +#endif + + if (st->channels==2 && st->force_channels!=1) + stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem); + else + stereo_width = 0; + total_buffer = delay_compensation; + st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes); + + frame_rate = st->Fs/frame_size; + if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8 + || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400))) + { + /*If the space is too low to do something useful, emit 'PLC' frames.*/ + int tocmode = st->mode; + int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth; + if (tocmode==0) + tocmode = MODE_SILK_ONLY; + if (frame_rate>100) + tocmode = MODE_CELT_ONLY; + if (frame_rate < 50) + tocmode = MODE_SILK_ONLY; + if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND) + bw=OPUS_BANDWIDTH_WIDEBAND; + else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND) + bw=OPUS_BANDWIDTH_NARROWBAND; + else if (bw<=OPUS_BANDWIDTH_SUPERWIDEBAND) + bw=OPUS_BANDWIDTH_SUPERWIDEBAND; + data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels); + RESTORE_STACK; + return 1; + } + if (!st->use_vbr) + { + int cbrBytes; + cbrBytes = IMIN( (st->bitrate_bps + 4*frame_rate)/(8*frame_rate) , max_data_bytes); + st->bitrate_bps = cbrBytes * (8*frame_rate); + max_data_bytes = cbrBytes; + } + max_rate = frame_rate*max_data_bytes*8; + + /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */ + equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50); + + if (st->signal_type == OPUS_SIGNAL_VOICE) + voice_est = 127; + else if (st->signal_type == OPUS_SIGNAL_MUSIC) + voice_est = 0; + else if (st->voice_ratio >= 0) + { + voice_est = st->voice_ratio*327>>8; + /* For AUDIO, never be more than 90% confident of having speech */ + if (st->application == OPUS_APPLICATION_AUDIO) + voice_est = IMIN(voice_est, 115); + } else if (st->application == OPUS_APPLICATION_VOIP) + voice_est = 115; + else + voice_est = 48; + + if (st->force_channels!=OPUS_AUTO && st->channels == 2) + { + st->stream_channels = st->force_channels; + } else { +#ifdef FUZZING + /* Random mono/stereo decision */ + if (st->channels == 2 && (rand()&0x1F)==0) + st->stream_channels = 3-st->stream_channels; +#else + /* Rate-dependent mono-stereo decision */ + if (st->channels == 2) + { + opus_int32 stereo_threshold; + stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14); + if (st->stream_channels == 2) + stereo_threshold -= 1000; + else + stereo_threshold += 1000; + st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1; + } else { + st->stream_channels = st->channels; + } +#endif + } + equiv_rate = st->bitrate_bps - (40*st->stream_channels+20)*(st->Fs/frame_size - 50); + + /* Mode selection depending on application and signal type */ + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + { + st->mode = MODE_CELT_ONLY; + } else if (st->user_forced_mode == OPUS_AUTO) + { +#ifdef FUZZING + /* Random mode switching */ + if ((rand()&0xF)==0) + { + if ((rand()&0x1)==0) + st->mode = MODE_CELT_ONLY; + else + st->mode = MODE_SILK_ONLY; + } else { + if (st->prev_mode==MODE_CELT_ONLY) + st->mode = MODE_CELT_ONLY; + else + st->mode = MODE_SILK_ONLY; + } +#else + opus_int32 mode_voice, mode_music; + opus_int32 threshold; + + /* Interpolate based on stereo width */ + mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0]) + + MULT16_32_Q15(stereo_width,mode_thresholds[1][0])); + mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1]) + + MULT16_32_Q15(stereo_width,mode_thresholds[1][1])); + /* Interpolate based on speech/music probability */ + threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14); + /* Bias towards SILK for VoIP because of some useful features */ + if (st->application == OPUS_APPLICATION_VOIP) + threshold += 8000; + + /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/ + /* Hysteresis */ + if (st->prev_mode == MODE_CELT_ONLY) + threshold -= 4000; + else if (st->prev_mode>0) + threshold += 4000; + + st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY; + + /* When FEC is enabled and there's enough packet loss, use SILK */ + if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4) + st->mode = MODE_SILK_ONLY; + /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */ + if (st->silk_mode.useDTX && voice_est > 100) + st->mode = MODE_SILK_ONLY; +#endif + } else { + st->mode = st->user_forced_mode; + } + + /* Override the chosen mode to make sure we meet the requested frame size */ + if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100) + st->mode = MODE_CELT_ONLY; + if (st->lfe) + st->mode = MODE_CELT_ONLY; + /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */ + if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8)) + st->mode = MODE_CELT_ONLY; + + if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0 + && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY) + { + /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ + st->silk_mode.toMono = 1; + st->stream_channels = 2; + } else { + st->silk_mode.toMono = 0; + } + + if (st->prev_mode > 0 && + ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) || + (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY))) + { + redundancy = 1; + celt_to_silk = (st->mode != MODE_CELT_ONLY); + if (!celt_to_silk) + { + /* Switch to SILK/hybrid if frame size is 10 ms or more*/ + if (frame_size >= st->Fs/100) + { + st->mode = st->prev_mode; + to_celt = 1; + } else { + redundancy=0; + } + } + } + /* For the first frame at a new SILK bandwidth */ + if (st->silk_bw_switch) + { + redundancy = 1; + celt_to_silk = 1; + st->silk_bw_switch = 0; + prefill=1; + } + + if (redundancy) + { + /* Fair share of the max size allowed */ + redundancy_bytes = IMIN(257, max_data_bytes*(opus_int32)(st->Fs/200)/(frame_size+st->Fs/200)); + /* For VBR, target the actual bitrate (subject to the limit above) */ + if (st->use_vbr) + redundancy_bytes = IMIN(redundancy_bytes, st->bitrate_bps/1600); + } + + if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) + { + silk_EncControlStruct dummy; + silk_InitEncoder( silk_enc, st->arch, &dummy); + prefill=1; + } + + /* Automatic (rate-dependent) bandwidth selection */ + if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch) + { + const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds; + opus_int32 bandwidth_thresholds[8]; + int bandwidth = OPUS_BANDWIDTH_FULLBAND; + opus_int32 equiv_rate2; + + equiv_rate2 = equiv_rate; + if (st->mode != MODE_CELT_ONLY) + { + /* Adjust the threshold +/- 10% depending on complexity */ + equiv_rate2 = equiv_rate2 * (45+st->silk_mode.complexity)/50; + /* CBR is less efficient by ~1 kb/s */ + if (!st->use_vbr) + equiv_rate2 -= 1000; + } + if (st->channels==2 && st->force_channels!=1) + { + voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds; + music_bandwidth_thresholds = stereo_music_bandwidth_thresholds; + } else { + voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds; + music_bandwidth_thresholds = mono_music_bandwidth_thresholds; + } + /* Interpolate bandwidth thresholds depending on voice estimation */ + for (i=0;i<8;i++) + { + bandwidth_thresholds[i] = music_bandwidth_thresholds[i] + + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14); + } + do { + int threshold, hysteresis; + threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)]; + hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1]; + if (!st->first) + { + if (st->bandwidth >= bandwidth) + threshold -= hysteresis; + else + threshold += hysteresis; + } + if (equiv_rate2 >= threshold) + break; + } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND); + st->bandwidth = bandwidth; + /* Prevents any transition to SWB/FB until the SILK layer has fully + switched to WB mode and turned the variable LP filter off */ + if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) + st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; + } + + if (st->bandwidth>st->max_bandwidth) + st->bandwidth = st->max_bandwidth; + + if (st->user_bandwidth != OPUS_AUTO) + st->bandwidth = st->user_bandwidth; + + /* This prevents us from using hybrid at unsafe CBR/max rates */ + if (st->mode != MODE_CELT_ONLY && max_rate < 15000) + { + st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND); + } + + /* Prevents Opus from wasting bits on frequencies that are above + the Nyquist rate of the input signal */ + if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND) + st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; + if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) + st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; + if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND) + st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; + if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND) + st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; +#ifndef DISABLE_FLOAT_API + /* Use detected bandwidth to reduce the encoded bandwidth. */ + if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO) + { + int min_detected_bandwidth; + /* Makes bandwidth detection more conservative just in case the detector + gets it wrong when we could have coded a high bandwidth transparently. + When operating in SILK/hybrid mode, we don't go below wideband to avoid + more complicated switches that require redundancy. */ + if (equiv_rate <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY) + min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; + else if (equiv_rate <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY) + min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; + else if (equiv_rate <= 30000*st->stream_channels) + min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND; + else if (equiv_rate <= 44000*st->stream_channels) + min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND; + else + min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND; + + st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth); + st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth); + } +#endif + celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth)); + + /* CELT mode doesn't support mediumband, use wideband instead */ + if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) + st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; + if (st->lfe) + st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; + + /* Can't support higher than wideband for >20 ms frames */ + if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)) + { + VARDECL(unsigned char, tmp_data); + int nb_frames; + int bak_mode, bak_bandwidth, bak_channels, bak_to_mono; + VARDECL(OpusRepacketizer, rp); + opus_int32 bytes_per_frame; + opus_int32 repacketize_len; + +#ifndef DISABLE_FLOAT_API + if (analysis_read_pos_bak!= -1) + { + st->analysis.read_pos = analysis_read_pos_bak; + st->analysis.read_subframe = analysis_read_subframe_bak; + } +#endif + + nb_frames = frame_size > st->Fs/25 ? 3 : 2; + bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames); + + ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char); + + ALLOC(rp, 1, OpusRepacketizer); + opus_repacketizer_init(rp); + + bak_mode = st->user_forced_mode; + bak_bandwidth = st->user_bandwidth; + bak_channels = st->force_channels; + + st->user_forced_mode = st->mode; + st->user_bandwidth = st->bandwidth; + st->force_channels = st->stream_channels; + bak_to_mono = st->silk_mode.toMono; + + if (bak_to_mono) + st->force_channels = 1; + else + st->prev_channels = st->stream_channels; + for (i=0;i<nb_frames;i++) + { + int tmp_len; + st->silk_mode.toMono = 0; + /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ + if (to_celt && i==nb_frames-1) + st->user_forced_mode = MODE_CELT_ONLY; + tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, + tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, + NULL, 0, c1, c2, analysis_channels, downmix); + if (tmp_len<0) + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len); + if (ret<0) + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + } + if (st->use_vbr) + repacketize_len = out_data_bytes; + else + repacketize_len = IMIN(3*st->bitrate_bps/(3*8*50/nb_frames), out_data_bytes); + ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr); + if (ret<0) + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + st->user_forced_mode = bak_mode; + st->user_bandwidth = bak_bandwidth; + st->force_channels = bak_channels; + st->silk_mode.toMono = bak_to_mono; + RESTORE_STACK; + return ret; + } + curr_bandwidth = st->bandwidth; + + /* Chooses the appropriate mode for speech + *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ + if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) + st->mode = MODE_HYBRID; + if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) + st->mode = MODE_SILK_ONLY; + + /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */ + bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1; + + data += 1; + + ec_enc_init(&enc, data, max_data_bytes-1); + + ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); + for (i=0;i<total_buffer*st->channels;i++) + pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i]; + + if (st->mode == MODE_CELT_ONLY) + hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); + else + hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15; + + st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15, + hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) ); + + /* convert from log scale to Hertz */ + cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) ); + + if (st->application == OPUS_APPLICATION_VOIP) + { + hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); + } else { + dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); + } + + + + /* SILK processing */ + HB_gain = Q15ONE; + if (st->mode != MODE_CELT_ONLY) + { + opus_int32 total_bitRate, celt_rate; +#ifdef OPUS_FIXED_POINT + const opus_int16 *pcm_silk; +#else + VARDECL(opus_int16, pcm_silk); + ALLOC(pcm_silk, st->channels*frame_size, opus_int16); +#endif + + /* Distribute bits between SILK and CELT */ + total_bitRate = 8 * bytes_target * frame_rate; + if( st->mode == MODE_HYBRID ) { + int HB_gain_ref; + /* Base rate for SILK */ + st->silk_mode.bitRate = st->stream_channels * ( 5000 + 1000 * ( st->Fs == 100 * frame_size ) ); + if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) { + /* SILK gets 2/3 of the remaining bits */ + st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 2 / 3; + } else { /* FULLBAND */ + /* SILK gets 3/5 of the remaining bits */ + st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 3 / 5; + } + /* Don't let SILK use more than 80% */ + if( st->silk_mode.bitRate > total_bitRate * 4/5 ) { + st->silk_mode.bitRate = total_bitRate * 4/5; + } + if (!st->energy_masking) + { + /* Increasingly attenuate high band when it gets allocated fewer bits */ + celt_rate = total_bitRate - st->silk_mode.bitRate; + HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600; + HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6); + HB_gain = HB_gain < Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE; + } + } else { + /* SILK gets all bits */ + st->silk_mode.bitRate = total_bitRate; + } + + /* Surround masking for SILK */ + if (st->energy_masking && st->use_vbr && !st->lfe) + { + opus_val32 mask_sum=0; + opus_val16 masking_depth; + opus_int32 rate_offset; + int c; + int end = 17; + opus_int16 srate = 16000; + if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) + { + end = 13; + srate = 8000; + } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) + { + end = 15; + srate = 12000; + } + for (c=0;c<st->channels;c++) + { + for(i=0;i<end;i++) + { + opus_val16 mask; + mask = MAX16(MIN16(st->energy_masking[21*c+i], + QCONST16(.5f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); + if (mask > 0) + mask = HALF16(mask); + mask_sum += mask; + } + } + /* Conservative rate reduction, we cut the masking in half */ + masking_depth = mask_sum / end*st->channels; + masking_depth += QCONST16(.2f, DB_SHIFT); + rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT); + rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3); + /* Split the rate change between the SILK and CELT part for hybrid. */ + if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND) + st->silk_mode.bitRate += 3*rate_offset/5; + else + st->silk_mode.bitRate += rate_offset; + bytes_target += rate_offset * frame_size / (8 * st->Fs); + } + + st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; + st->silk_mode.nChannelsAPI = st->channels; + st->silk_mode.nChannelsInternal = st->stream_channels; + if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { + st->silk_mode.desiredInternalSampleRate = 8000; + } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { + st->silk_mode.desiredInternalSampleRate = 12000; + } else { + silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND ); + st->silk_mode.desiredInternalSampleRate = 16000; + } + if( st->mode == MODE_HYBRID ) { + /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */ + st->silk_mode.minInternalSampleRate = 16000; + } else { + st->silk_mode.minInternalSampleRate = 8000; + } + + if (st->mode == MODE_SILK_ONLY) + { + opus_int32 effective_max_rate = max_rate; + st->silk_mode.maxInternalSampleRate = 16000; + if (frame_rate > 50) + effective_max_rate = effective_max_rate*2/3; + if (effective_max_rate < 13000) + { + st->silk_mode.maxInternalSampleRate = 12000; + st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate); + } + if (effective_max_rate < 9600) + { + st->silk_mode.maxInternalSampleRate = 8000; + st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate); + } + } else { + st->silk_mode.maxInternalSampleRate = 16000; + } + + st->silk_mode.useCBR = !st->use_vbr; + + /* Call SILK encoder for the low band */ + nBytes = IMIN(1275, max_data_bytes-1-redundancy_bytes); + + st->silk_mode.maxBits = nBytes*8; + /* Only allow up to 90% of the bits for hybrid mode*/ + if (st->mode == MODE_HYBRID) + st->silk_mode.maxBits = (opus_int32)st->silk_mode.maxBits*9/10; + if (st->silk_mode.useCBR) + { + st->silk_mode.maxBits = (st->silk_mode.bitRate * frame_size / (st->Fs * 8))*8; + /* Reduce the initial target to make it easier to reach the CBR rate */ + st->silk_mode.bitRate = IMAX(1, st->silk_mode.bitRate-2000); + } + + if (prefill) + { + opus_int32 zero=0; + int prefill_offset; + /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode + a discontinuity. The exact location is what we need to avoid leaving any "gap" + in the audio when mixing with the redundant CELT frame. Here we can afford to + overwrite st->delay_buffer because the only thing that uses it before it gets + rewritten is tmp_prefill[] and even then only the part after the ramp really + gets used (rather than sent to the encoder and discarded) */ + prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400); + gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset, + 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs); + for(i=0;i<prefill_offset;i++) + st->delay_buffer[i]=0; +#ifdef OPUS_FIXED_POINT + pcm_silk = st->delay_buffer; +#else + for (i=0;i<st->encoder_buffer*st->channels;i++) + pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]); +#endif + silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 ); + } + +#ifdef OPUS_FIXED_POINT + pcm_silk = pcm_buf+total_buffer*st->channels; +#else + for (i=0;i<frame_size*st->channels;i++) + pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]); +#endif + ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 ); + if( ret ) { + /*fprintf (stderr, "SILK encode error: %d\n", ret);*/ + /* Handle error */ + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + if (nBytes==0) + { + st->rangeFinal = 0; + data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); + RESTORE_STACK; + return 1; + } + /* Extract SILK internal bandwidth for signaling in first byte */ + if( st->mode == MODE_SILK_ONLY ) { + if( st->silk_mode.internalSampleRate == 8000 ) { + curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND; + } else if( st->silk_mode.internalSampleRate == 12000 ) { + curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; + } else if( st->silk_mode.internalSampleRate == 16000 ) { + curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND; + } + } else { + silk_assert( st->silk_mode.internalSampleRate == 16000 ); + } + + st->silk_mode.opusCanSwitch = st->silk_mode.switchReady; + /* FIXME: How do we allocate the redundancy for CBR? */ + if (st->silk_mode.opusCanSwitch) + { + redundancy = 1; + celt_to_silk = 0; + st->silk_bw_switch = 1; + } + } + + /* CELT processing */ + { + int endband=21; + + switch(curr_bandwidth) + { + case OPUS_BANDWIDTH_NARROWBAND: + endband = 13; + break; + case OPUS_BANDWIDTH_MEDIUMBAND: + case OPUS_BANDWIDTH_WIDEBAND: + endband = 17; + break; + case OPUS_BANDWIDTH_SUPERWIDEBAND: + endband = 19; + break; + case OPUS_BANDWIDTH_FULLBAND: + endband = 21; + break; + } + celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband)); + celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels)); + } + celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX)); + if (st->mode != MODE_SILK_ONLY) + { + opus_val32 celt_pred=2; + celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); + /* We may still decide to disable prediction later */ + if (st->silk_mode.reducedDependency) + celt_pred = 0; + celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred)); + + if (st->mode == MODE_HYBRID) + { + int len; + + len = (ec_tell(&enc)+7)>>3; + if (redundancy) + len += st->mode == MODE_HYBRID ? 3 : 1; + if( st->use_vbr ) { + nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs); + } else { + /* check if SILK used up too much */ + nb_compr_bytes = len > bytes_target ? len : bytes_target; + } + } else { + if (st->use_vbr) + { + opus_int32 bonus=0; +#ifndef DISABLE_FLOAT_API + if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) + { + bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); + if (analysis_info.valid) + bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality)); + } +#endif + celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); + celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint)); + celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus)); + nb_compr_bytes = max_data_bytes-1-redundancy_bytes; + } else { + nb_compr_bytes = bytes_target; + } + } + + } else { + nb_compr_bytes = 0; + } + + ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16); + if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) + { + for (i=0;i<st->channels*st->Fs/400;i++) + tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i]; + } + + for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++) + st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size]; + for (;i<st->encoder_buffer*st->channels;i++) + st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i]; + + /* gain_fade() and stereo_fade() need to be after the buffer copying + because we don't want any of this to affect the SILK part */ + if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { + gain_fade(pcm_buf, pcm_buf, + st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); + } + st->prev_HB_gain = HB_gain; + if (st->mode != MODE_HYBRID || st->stream_channels==1) + st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),2*IMAX(0,equiv_rate-30000)); + if( !st->energy_masking && st->channels == 2 ) { + /* Apply stereo width reduction (at low bitrates) */ + if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { + opus_val16 g1, g2; + g1 = st->hybrid_stereo_width_Q14; + g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14); +#ifdef OPUS_FIXED_POINT + g1 = g1==16384 ? Q15ONE : SHL16(g1,1); + g2 = g2==16384 ? Q15ONE : SHL16(g2,1); +#else + g1 *= (1.f/16384); + g2 *= (1.f/16384); +#endif + stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap, + frame_size, st->channels, celt_mode->window, st->Fs); + st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14; + } + } + + if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1)) + { + /* For SILK mode, the redundancy is inferred from the length */ + if (st->mode == MODE_HYBRID && (redundancy || ec_tell(&enc)+37 <= 8*nb_compr_bytes)) + ec_enc_bit_logp(&enc, redundancy, 12); + if (redundancy) + { + int max_redundancy; + ec_enc_bit_logp(&enc, celt_to_silk, 1); + if (st->mode == MODE_HYBRID) + max_redundancy = (max_data_bytes-1)-nb_compr_bytes; + else + max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3); + /* Target the same bit-rate for redundancy as for the rest, + up to a max of 257 bytes */ + redundancy_bytes = IMIN(max_redundancy, st->bitrate_bps/1600); + redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes)); + if (st->mode == MODE_HYBRID) + ec_enc_uint(&enc, redundancy_bytes-2, 256); + } + } else { + redundancy = 0; + } + + if (!redundancy) + { + st->silk_bw_switch = 0; + redundancy_bytes = 0; + } + if (st->mode != MODE_CELT_ONLY)start_band=17; + + if (st->mode == MODE_SILK_ONLY) + { + ret = (ec_tell(&enc)+7)>>3; + ec_enc_done(&enc); + nb_compr_bytes = ret; + } else { + nb_compr_bytes = IMIN((max_data_bytes-1)-redundancy_bytes, nb_compr_bytes); + ec_enc_shrink(&enc, nb_compr_bytes); + } + +#ifndef DISABLE_FLOAT_API + if (redundancy || st->mode != MODE_SILK_ONLY) + celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); +#endif + + /* 5 ms redundant frame for CELT->SILK */ + if (redundancy && celt_to_silk) + { + int err; + celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); + celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); + err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); + if (err < 0) + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); + celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); + } + + celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band)); + + if (st->mode != MODE_SILK_ONLY) + { + if (st->mode != st->prev_mode && st->prev_mode > 0) + { + unsigned char dummy[2]; + celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); + + /* Prefilling */ + celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL); + celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); + } + /* If false, we already busted the budget and we'll end up with a "PLC packet" */ + if (ec_tell(&enc) <= 8*nb_compr_bytes) + { + ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc); + if (ret < 0) + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + } + } + + /* 5 ms redundant frame for SILK->CELT */ + if (redundancy && !celt_to_silk) + { + int err; + unsigned char dummy[2]; + int N2, N4; + N2 = st->Fs/200; + N4 = st->Fs/400; + + celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); + celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); + celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); + + /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */ + celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL); + + err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL); + if (err < 0) + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng)); + } + + + + /* Signalling the mode in the first byte */ + data--; + data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); + + st->rangeFinal = enc.rng ^ redundant_rng; + + if (to_celt) + st->prev_mode = MODE_CELT_ONLY; + else + st->prev_mode = st->mode; + st->prev_channels = st->stream_channels; + st->prev_framesize = frame_size; + + st->first = 0; + + /* In the unlikely case that the SILK encoder busted its target, tell + the decoder to call the PLC */ + if (ec_tell(&enc) > (max_data_bytes-1)*8) + { + if (max_data_bytes < 2) + { + RESTORE_STACK; + return OPUS_BUFFER_TOO_SMALL; + } + data[1] = 0; + ret = 1; + st->rangeFinal = 0; + } else if (st->mode==MODE_SILK_ONLY&&!redundancy) + { + /*When in LPC only mode it's perfectly + reasonable to strip off trailing zero bytes as + the required range decoder behavior is to + fill these in. This can't be done when the MDCT + modes are used because the decoder needs to know + the actual length for allocation purposes.*/ + while(ret>2&&data[ret]==0)ret--; + } + /* Count ToC and redundancy */ + ret += 1+redundancy_bytes; + if (!st->use_vbr) + { + if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK) + + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + ret = max_data_bytes; + } + RESTORE_STACK; + return ret; +} + +#ifdef OPUS_FIXED_POINT + +#ifndef DISABLE_FLOAT_API +opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, + unsigned char *data, opus_int32 max_data_bytes) +{ + int i, ret; + int frame_size; + int delay_compensation; + VARDECL(opus_int16, in); + ALLOC_STACK; + + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); + + ALLOC(in, frame_size*st->channels, opus_int16); + + for (i=0;i<frame_size*st->channels;i++) + in[i] = FLOAT2INT16(pcm[i]); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); + RESTORE_STACK; + return ret; +} +#endif + +opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, + unsigned char *data, opus_int32 out_data_bytes) +{ + int frame_size; + int delay_compensation; + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_int +#ifndef DISABLE_FLOAT_API + , st->analysis.subframe_mem +#endif + ); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); +} + +#else +opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, + unsigned char *data, opus_int32 max_data_bytes) +{ + int i, ret; + int frame_size; + int delay_compensation; + VARDECL(float, in); + ALLOC_STACK; + + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_int, st->analysis.subframe_mem); + + ALLOC(in, frame_size*st->channels, float); + + for (i=0;i<frame_size*st->channels;i++) + in[i] = (1.0f/32768)*pcm[i]; + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); + RESTORE_STACK; + return ret; +} +opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, + unsigned char *data, opus_int32 out_data_bytes) +{ + int frame_size; + int delay_compensation; + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); +} +#endif + + +int opus_encoder_ctl(OpusEncoder *st, int request, ...) +{ + int ret; + CELTEncoder *celt_enc; + va_list ap; + + ret = OPUS_OK; + va_start(ap, request); + + celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); + + switch (request) + { + case OPUS_SET_APPLICATION_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if ( (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO + && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY) + || (!st->first && st->application != value)) + { + ret = OPUS_BAD_ARG; + break; + } + st->application = value; + } + break; + case OPUS_GET_APPLICATION_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->application; + } + break; + case OPUS_SET_BITRATE_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX) + { + if (value <= 0) + goto bad_arg; + else if (value <= 500) + value = 500; + else if (value > (opus_int32)300000*st->channels) + value = (opus_int32)300000*st->channels; + } + st->user_bitrate_bps = value; + } + break; + case OPUS_GET_BITRATE_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276); + } + break; + case OPUS_SET_FORCE_CHANNELS_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if((value<1 || value>st->channels) && value != OPUS_AUTO) + { + goto bad_arg; + } + st->force_channels = value; + } + break; + case OPUS_GET_FORCE_CHANNELS_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->force_channels; + } + break; + case OPUS_SET_MAX_BANDWIDTH_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) + { + goto bad_arg; + } + st->max_bandwidth = value; + if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { + st->silk_mode.maxInternalSampleRate = 8000; + } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { + st->silk_mode.maxInternalSampleRate = 12000; + } else { + st->silk_mode.maxInternalSampleRate = 16000; + } + } + break; + case OPUS_GET_MAX_BANDWIDTH_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->max_bandwidth; + } + break; + case OPUS_SET_BANDWIDTH_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO) + { + goto bad_arg; + } + st->user_bandwidth = value; + if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { + st->silk_mode.maxInternalSampleRate = 8000; + } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { + st->silk_mode.maxInternalSampleRate = 12000; + } else { + st->silk_mode.maxInternalSampleRate = 16000; + } + } + break; + case OPUS_GET_BANDWIDTH_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->bandwidth; + } + break; + case OPUS_SET_DTX_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if(value<0 || value>1) + { + goto bad_arg; + } + st->silk_mode.useDTX = value; + } + break; + case OPUS_GET_DTX_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->silk_mode.useDTX; + } + break; + case OPUS_SET_COMPLEXITY_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if(value<0 || value>10) + { + goto bad_arg; + } + st->silk_mode.complexity = value; + celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value)); + } + break; + case OPUS_GET_COMPLEXITY_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->silk_mode.complexity; + } + break; + case OPUS_SET_INBAND_FEC_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if(value<0 || value>1) + { + goto bad_arg; + } + st->silk_mode.useInBandFEC = value; + } + break; + case OPUS_GET_INBAND_FEC_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->silk_mode.useInBandFEC; + } + break; + case OPUS_SET_PACKET_LOSS_PERC_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value < 0 || value > 100) + { + goto bad_arg; + } + st->silk_mode.packetLossPercentage = value; + celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value)); + } + break; + case OPUS_GET_PACKET_LOSS_PERC_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->silk_mode.packetLossPercentage; + } + break; + case OPUS_SET_VBR_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if(value<0 || value>1) + { + goto bad_arg; + } + st->use_vbr = value; + st->silk_mode.useCBR = 1-value; + } + break; + case OPUS_GET_VBR_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->use_vbr; + } + break; + case OPUS_SET_VOICE_RATIO_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<-1 || value>100) + { + goto bad_arg; + } + st->voice_ratio = value; + } + break; + case OPUS_GET_VOICE_RATIO_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->voice_ratio; + } + break; + case OPUS_SET_VBR_CONSTRAINT_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if(value<0 || value>1) + { + goto bad_arg; + } + st->vbr_constraint = value; + } + break; + case OPUS_GET_VBR_CONSTRAINT_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->vbr_constraint; + } + break; + case OPUS_SET_SIGNAL_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC) + { + goto bad_arg; + } + st->signal_type = value; + } + break; + case OPUS_GET_SIGNAL_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->signal_type; + } + break; + case OPUS_GET_LOOKAHEAD_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->Fs/400; + if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY) + *value += st->delay_compensation; + } + break; + case OPUS_GET_SAMPLE_RATE_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->Fs; + } + break; + case OPUS_GET_FINAL_RANGE_REQUEST: + { + opus_uint32 *value = va_arg(ap, opus_uint32*); + if (!value) + { + goto bad_arg; + } + *value = st->rangeFinal; + } + break; + case OPUS_SET_LSB_DEPTH_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<8 || value>24) + { + goto bad_arg; + } + st->lsb_depth=value; + } + break; + case OPUS_GET_LSB_DEPTH_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->lsb_depth; + } + break; + case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS && + value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS && + value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS && + value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE) + { + goto bad_arg; + } + st->variable_duration = value; + celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value)); + } + break; + case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->variable_duration; + } + break; + case OPUS_SET_PREDICTION_DISABLED_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value > 1 || value < 0) + goto bad_arg; + st->silk_mode.reducedDependency = value; + } + break; + case OPUS_GET_PREDICTION_DISABLED_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + goto bad_arg; + *value = st->silk_mode.reducedDependency; + } + break; + case OPUS_RESET_STATE: + { + void *silk_enc; + silk_EncControlStruct dummy; + silk_enc = (char*)st+st->silk_enc_offset; + + OPUS_CLEAR((char*)&st->OPUS_ENCODER_RESET_START, + sizeof(OpusEncoder)- + ((char*)&st->OPUS_ENCODER_RESET_START - (char*)st)); + + celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); + silk_InitEncoder( silk_enc, st->arch, &dummy ); + st->stream_channels = st->channels; + st->hybrid_stereo_width_Q14 = 1 << 14; + st->prev_HB_gain = Q15ONE; + st->first = 1; + st->mode = MODE_HYBRID; + st->bandwidth = OPUS_BANDWIDTH_FULLBAND; + st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); + } + break; + case OPUS_SET_FORCE_MODE_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO) + { + goto bad_arg; + } + st->user_forced_mode = value; + } + break; + case OPUS_SET_LFE_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->lfe = value; + ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); + } + break; + case OPUS_SET_ENERGY_MASK_REQUEST: + { + opus_val16 *value = va_arg(ap, opus_val16*); + st->energy_masking = value; + ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); + } + break; + + case CELT_GET_MODE_REQUEST: + { + const CELTMode ** value = va_arg(ap, const CELTMode**); + if (!value) + { + goto bad_arg; + } + ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value)); + } + break; + default: + /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/ + ret = OPUS_UNIMPLEMENTED; + break; + } + va_end(ap); + return ret; +bad_arg: + va_end(ap); + return OPUS_BAD_ARG; +} + +void opus_encoder_destroy(OpusEncoder *st) +{ + opus_free(st); +} diff --git a/drivers/opus/opus_multistream.c b/drivers/opus/opus_multistream.c new file mode 100644 index 0000000000..8211c0b470 --- /dev/null +++ b/drivers/opus/opus_multistream.c @@ -0,0 +1,92 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "opus_multistream.h" +#include "opus.h" +#include "opus_private.h" +#include "stack_alloc.h" +#include <stdarg.h> +#include "float_cast.h" +#include "os_support.h" + + +int validate_layout(const ChannelLayout *layout) +{ + int i, max_channel; + + max_channel = layout->nb_streams+layout->nb_coupled_streams; + if (max_channel>255) + return 0; + for (i=0;i<layout->nb_channels;i++) + { + if (layout->mapping[i] >= max_channel && layout->mapping[i] != 255) + return 0; + } + return 1; +} + + +int get_left_channel(const ChannelLayout *layout, int stream_id, int prev) +{ + int i; + i = (prev<0) ? 0 : prev+1; + for (;i<layout->nb_channels;i++) + { + if (layout->mapping[i]==stream_id*2) + return i; + } + return -1; +} + +int get_right_channel(const ChannelLayout *layout, int stream_id, int prev) +{ + int i; + i = (prev<0) ? 0 : prev+1; + for (;i<layout->nb_channels;i++) + { + if (layout->mapping[i]==stream_id*2+1) + return i; + } + return -1; +} + +int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev) +{ + int i; + i = (prev<0) ? 0 : prev+1; + for (;i<layout->nb_channels;i++) + { + if (layout->mapping[i]==stream_id+layout->nb_coupled_streams) + return i; + } + return -1; +} + diff --git a/drivers/opus/opus_multistream.h b/drivers/opus/opus_multistream.h new file mode 100644 index 0000000000..ae5997934a --- /dev/null +++ b/drivers/opus/opus_multistream.h @@ -0,0 +1,660 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/** + * @file opus_multistream.h + * @brief Opus reference implementation multistream API + */ + +#ifndef OPUS_MULTISTREAM_H +#define OPUS_MULTISTREAM_H + +#include "opus.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** @cond OPUS_INTERNAL_DOC */ + +/** Macros to trigger compilation errors when the wrong types are provided to a + * CTL. */ +/**@{*/ +#define __opus_check_encstate_ptr(ptr) ((ptr) + ((ptr) - (OpusEncoder**)(ptr))) +#define __opus_check_decstate_ptr(ptr) ((ptr) + ((ptr) - (OpusDecoder**)(ptr))) +/**@}*/ + +/** These are the actual encoder and decoder CTL ID numbers. + * They should not be used directly by applications. + * In general, SETs should be even and GETs should be odd.*/ +/**@{*/ +#define OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST 5120 +#define OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST 5122 +/**@}*/ + +/** @endcond */ + +/** @defgroup opus_multistream_ctls Multistream specific encoder and decoder CTLs + * + * These are convenience macros that are specific to the + * opus_multistream_encoder_ctl() and opus_multistream_decoder_ctl() + * interface. + * The CTLs from @ref opus_genericctls, @ref opus_encoderctls, and + * @ref opus_decoderctls may be applied to a multistream encoder or decoder as + * well. + * In addition, you may retrieve the encoder or decoder state for an specific + * stream via #OPUS_MULTISTREAM_GET_ENCODER_STATE or + * #OPUS_MULTISTREAM_GET_DECODER_STATE and apply CTLs to it individually. + */ +/**@{*/ + +/** Gets the encoder state for an individual stream of a multistream encoder. + * @param[in] x <tt>opus_int32</tt>: The index of the stream whose encoder you + * wish to retrieve. + * This must be non-negative and less than + * the <code>streams</code> parameter used + * to initialize the encoder. + * @param[out] y <tt>OpusEncoder**</tt>: Returns a pointer to the given + * encoder state. + * @retval OPUS_BAD_ARG The index of the requested stream was out of range. + * @hideinitializer + */ +#define OPUS_MULTISTREAM_GET_ENCODER_STATE(x,y) OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST, __opus_check_int(x), __opus_check_encstate_ptr(y) + +/** Gets the decoder state for an individual stream of a multistream decoder. + * @param[in] x <tt>opus_int32</tt>: The index of the stream whose decoder you + * wish to retrieve. + * This must be non-negative and less than + * the <code>streams</code> parameter used + * to initialize the decoder. + * @param[out] y <tt>OpusDecoder**</tt>: Returns a pointer to the given + * decoder state. + * @retval OPUS_BAD_ARG The index of the requested stream was out of range. + * @hideinitializer + */ +#define OPUS_MULTISTREAM_GET_DECODER_STATE(x,y) OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST, __opus_check_int(x), __opus_check_decstate_ptr(y) + +/**@}*/ + +/** @defgroup opus_multistream Opus Multistream API + * @{ + * + * The multistream API allows individual Opus streams to be combined into a + * single packet, enabling support for up to 255 channels. Unlike an + * elementary Opus stream, the encoder and decoder must negotiate the channel + * configuration before the decoder can successfully interpret the data in the + * packets produced by the encoder. Some basic information, such as packet + * duration, can be computed without any special negotiation. + * + * The format for multistream Opus packets is defined in the + * <a href="http://tools.ietf.org/html/draft-terriberry-oggopus">Ogg + * encapsulation specification</a> and is based on the self-delimited Opus + * framing described in Appendix B of <a href="http://tools.ietf.org/html/rfc6716">RFC 6716</a>. + * Normal Opus packets are just a degenerate case of multistream Opus packets, + * and can be encoded or decoded with the multistream API by setting + * <code>streams</code> to <code>1</code> when initializing the encoder or + * decoder. + * + * Multistream Opus streams can contain up to 255 elementary Opus streams. + * These may be either "uncoupled" or "coupled", indicating that the decoder + * is configured to decode them to either 1 or 2 channels, respectively. + * The streams are ordered so that all coupled streams appear at the + * beginning. + * + * A <code>mapping</code> table defines which decoded channel <code>i</code> + * should be used for each input/output (I/O) channel <code>j</code>. This table is + * typically provided as an unsigned char array. + * Let <code>i = mapping[j]</code> be the index for I/O channel <code>j</code>. + * If <code>i < 2*coupled_streams</code>, then I/O channel <code>j</code> is + * encoded as the left channel of stream <code>(i/2)</code> if <code>i</code> + * is even, or as the right channel of stream <code>(i/2)</code> if + * <code>i</code> is odd. Otherwise, I/O channel <code>j</code> is encoded as + * mono in stream <code>(i - coupled_streams)</code>, unless it has the special + * value 255, in which case it is omitted from the encoding entirely (the + * decoder will reproduce it as silence). Each value <code>i</code> must either + * be the special value 255 or be less than <code>streams + coupled_streams</code>. + * + * The output channels specified by the encoder + * should use the + * <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis + * channel ordering</a>. A decoder may wish to apply an additional permutation + * to the mapping the encoder used to achieve a different output channel + * order (e.g. for outputing in WAV order). + * + * Each multistream packet contains an Opus packet for each stream, and all of + * the Opus packets in a single multistream packet must have the same + * duration. Therefore the duration of a multistream packet can be extracted + * from the TOC sequence of the first stream, which is located at the + * beginning of the packet, just like an elementary Opus stream: + * + * @code + * int nb_samples; + * int nb_frames; + * nb_frames = opus_packet_get_nb_frames(data, len); + * if (nb_frames < 1) + * return nb_frames; + * nb_samples = opus_packet_get_samples_per_frame(data, 48000) * nb_frames; + * @endcode + * + * The general encoding and decoding process proceeds exactly the same as in + * the normal @ref opus_encoder and @ref opus_decoder APIs. + * See their documentation for an overview of how to use the corresponding + * multistream functions. + */ + +/** Opus multistream encoder state. + * This contains the complete state of a multistream Opus encoder. + * It is position independent and can be freely copied. + * @see opus_multistream_encoder_create + * @see opus_multistream_encoder_init + */ +typedef struct OpusMSEncoder OpusMSEncoder; + +/** Opus multistream decoder state. + * This contains the complete state of a multistream Opus decoder. + * It is position independent and can be freely copied. + * @see opus_multistream_decoder_create + * @see opus_multistream_decoder_init + */ +typedef struct OpusMSDecoder OpusMSDecoder; + +/**\name Multistream encoder functions */ +/**@{*/ + +/** Gets the size of an OpusMSEncoder structure. + * @param streams <tt>int</tt>: The total number of streams to encode from the + * input. + * This must be no more than 255. + * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams + * to encode. + * This must be no larger than the total + * number of streams. + * Additionally, The total number of + * encoded channels (<code>streams + + * coupled_streams</code>) must be no + * more than 255. + * @returns The size in bytes on success, or a negative error code + * (see @ref opus_errorcodes) on error. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_encoder_get_size( + int streams, + int coupled_streams +); + +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_surround_encoder_get_size( + int channels, + int mapping_family +); + + +/** Allocates and initializes a multistream encoder state. + * Call opus_multistream_encoder_destroy() to release + * this object when finished. + * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz). + * This must be one of 8000, 12000, 16000, + * 24000, or 48000. + * @param channels <tt>int</tt>: Number of channels in the input signal. + * This must be at most 255. + * It may be greater than the number of + * coded channels (<code>streams + + * coupled_streams</code>). + * @param streams <tt>int</tt>: The total number of streams to encode from the + * input. + * This must be no more than the number of channels. + * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams + * to encode. + * This must be no larger than the total + * number of streams. + * Additionally, The total number of + * encoded channels (<code>streams + + * coupled_streams</code>) must be no + * more than the number of input channels. + * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from + * encoded channels to input channels, as described in + * @ref opus_multistream. As an extra constraint, the + * multistream encoder does not allow encoding coupled + * streams for which one channel is unused since this + * is never a good idea. + * @param application <tt>int</tt>: The target encoder application. + * This must be one of the following: + * <dl> + * <dt>#OPUS_APPLICATION_VOIP</dt> + * <dd>Process signal for improved speech intelligibility.</dd> + * <dt>#OPUS_APPLICATION_AUDIO</dt> + * <dd>Favor faithfulness to the original input.</dd> + * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt> + * <dd>Configure the minimum possible coding delay by disabling certain modes + * of operation.</dd> + * </dl> + * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error + * code (see @ref opus_errorcodes) on + * failure. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_encoder_create( + opus_int32 Fs, + int channels, + int streams, + int coupled_streams, + const unsigned char *mapping, + int application, + int *error +) OPUS_ARG_NONNULL(5); + +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSEncoder *opus_multistream_surround_encoder_create( + opus_int32 Fs, + int channels, + int mapping_family, + int *streams, + int *coupled_streams, + unsigned char *mapping, + int application, + int *error +) OPUS_ARG_NONNULL(5); + +/** Initialize a previously allocated multistream encoder state. + * The memory pointed to by \a st must be at least the size returned by + * opus_multistream_encoder_get_size(). + * This is intended for applications which use their own allocator instead of + * malloc. + * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. + * @see opus_multistream_encoder_create + * @see opus_multistream_encoder_get_size + * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to initialize. + * @param Fs <tt>opus_int32</tt>: Sampling rate of the input signal (in Hz). + * This must be one of 8000, 12000, 16000, + * 24000, or 48000. + * @param channels <tt>int</tt>: Number of channels in the input signal. + * This must be at most 255. + * It may be greater than the number of + * coded channels (<code>streams + + * coupled_streams</code>). + * @param streams <tt>int</tt>: The total number of streams to encode from the + * input. + * This must be no more than the number of channels. + * @param coupled_streams <tt>int</tt>: Number of coupled (2 channel) streams + * to encode. + * This must be no larger than the total + * number of streams. + * Additionally, The total number of + * encoded channels (<code>streams + + * coupled_streams</code>) must be no + * more than the number of input channels. + * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from + * encoded channels to input channels, as described in + * @ref opus_multistream. As an extra constraint, the + * multistream encoder does not allow encoding coupled + * streams for which one channel is unused since this + * is never a good idea. + * @param application <tt>int</tt>: The target encoder application. + * This must be one of the following: + * <dl> + * <dt>#OPUS_APPLICATION_VOIP</dt> + * <dd>Process signal for improved speech intelligibility.</dd> + * <dt>#OPUS_APPLICATION_AUDIO</dt> + * <dd>Favor faithfulness to the original input.</dd> + * <dt>#OPUS_APPLICATION_RESTRICTED_LOWDELAY</dt> + * <dd>Configure the minimum possible coding delay by disabling certain modes + * of operation.</dd> + * </dl> + * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes) + * on failure. + */ +OPUS_EXPORT int opus_multistream_encoder_init( + OpusMSEncoder *st, + opus_int32 Fs, + int channels, + int streams, + int coupled_streams, + const unsigned char *mapping, + int application +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); + +OPUS_EXPORT int opus_multistream_surround_encoder_init( + OpusMSEncoder *st, + opus_int32 Fs, + int channels, + int mapping_family, + int *streams, + int *coupled_streams, + unsigned char *mapping, + int application +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); + +/** Encodes a multistream Opus frame. + * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state. + * @param[in] pcm <tt>const opus_int16*</tt>: The input signal as interleaved + * samples. + * This must contain + * <code>frame_size*channels</code> + * samples. + * @param frame_size <tt>int</tt>: Number of samples per channel in the input + * signal. + * This must be an Opus frame size for the + * encoder's sampling rate. + * For example, at 48 kHz the permitted values + * are 120, 240, 480, 960, 1920, and 2880. + * Passing in a duration of less than 10 ms + * (480 samples at 48 kHz) will prevent the + * encoder from using the LPC or hybrid modes. + * @param[out] data <tt>unsigned char*</tt>: Output payload. + * This must contain storage for at + * least \a max_data_bytes. + * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated + * memory for the output + * payload. This may be + * used to impose an upper limit on + * the instant bitrate, but should + * not be used as the only bitrate + * control. Use #OPUS_SET_BITRATE to + * control the bitrate. + * @returns The length of the encoded packet (in bytes) on success or a + * negative error code (see @ref opus_errorcodes) on failure. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode( + OpusMSEncoder *st, + const opus_int16 *pcm, + int frame_size, + unsigned char *data, + opus_int32 max_data_bytes +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); + +/** Encodes a multistream Opus frame from floating point input. + * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state. + * @param[in] pcm <tt>const float*</tt>: The input signal as interleaved + * samples with a normal range of + * +/-1.0. + * Samples with a range beyond +/-1.0 + * are supported but will be clipped by + * decoders using the integer API and + * should only be used if it is known + * that the far end supports extended + * dynamic range. + * This must contain + * <code>frame_size*channels</code> + * samples. + * @param frame_size <tt>int</tt>: Number of samples per channel in the input + * signal. + * This must be an Opus frame size for the + * encoder's sampling rate. + * For example, at 48 kHz the permitted values + * are 120, 240, 480, 960, 1920, and 2880. + * Passing in a duration of less than 10 ms + * (480 samples at 48 kHz) will prevent the + * encoder from using the LPC or hybrid modes. + * @param[out] data <tt>unsigned char*</tt>: Output payload. + * This must contain storage for at + * least \a max_data_bytes. + * @param [in] max_data_bytes <tt>opus_int32</tt>: Size of the allocated + * memory for the output + * payload. This may be + * used to impose an upper limit on + * the instant bitrate, but should + * not be used as the only bitrate + * control. Use #OPUS_SET_BITRATE to + * control the bitrate. + * @returns The length of the encoded packet (in bytes) on success or a + * negative error code (see @ref opus_errorcodes) on failure. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_encode_float( + OpusMSEncoder *st, + const float *pcm, + int frame_size, + unsigned char *data, + opus_int32 max_data_bytes +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(2) OPUS_ARG_NONNULL(4); + +/** Frees an <code>OpusMSEncoder</code> allocated by + * opus_multistream_encoder_create(). + * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to be freed. + */ +OPUS_EXPORT void opus_multistream_encoder_destroy(OpusMSEncoder *st); + +/** Perform a CTL function on a multistream Opus encoder. + * + * Generally the request and subsequent arguments are generated by a + * convenience macro. + * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state. + * @param request This and all remaining parameters should be replaced by one + * of the convenience macros in @ref opus_genericctls, + * @ref opus_encoderctls, or @ref opus_multistream_ctls. + * @see opus_genericctls + * @see opus_encoderctls + * @see opus_multistream_ctls + */ +OPUS_EXPORT int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) OPUS_ARG_NONNULL(1); + +/**@}*/ + +/**\name Multistream decoder functions */ +/**@{*/ + +/** Gets the size of an <code>OpusMSDecoder</code> structure. + * @param streams <tt>int</tt>: The total number of streams coded in the + * input. + * This must be no more than 255. + * @param coupled_streams <tt>int</tt>: Number streams to decode as coupled + * (2 channel) streams. + * This must be no larger than the total + * number of streams. + * Additionally, The total number of + * coded channels (<code>streams + + * coupled_streams</code>) must be no + * more than 255. + * @returns The size in bytes on success, or a negative error code + * (see @ref opus_errorcodes) on error. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT opus_int32 opus_multistream_decoder_get_size( + int streams, + int coupled_streams +); + +/** Allocates and initializes a multistream decoder state. + * Call opus_multistream_decoder_destroy() to release + * this object when finished. + * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz). + * This must be one of 8000, 12000, 16000, + * 24000, or 48000. + * @param channels <tt>int</tt>: Number of channels to output. + * This must be at most 255. + * It may be different from the number of coded + * channels (<code>streams + + * coupled_streams</code>). + * @param streams <tt>int</tt>: The total number of streams coded in the + * input. + * This must be no more than 255. + * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled + * (2 channel) streams. + * This must be no larger than the total + * number of streams. + * Additionally, The total number of + * coded channels (<code>streams + + * coupled_streams</code>) must be no + * more than 255. + * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from + * coded channels to output channels, as described in + * @ref opus_multistream. + * @param[out] error <tt>int *</tt>: Returns #OPUS_OK on success, or an error + * code (see @ref opus_errorcodes) on + * failure. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT OpusMSDecoder *opus_multistream_decoder_create( + opus_int32 Fs, + int channels, + int streams, + int coupled_streams, + const unsigned char *mapping, + int *error +) OPUS_ARG_NONNULL(5); + +/** Intialize a previously allocated decoder state object. + * The memory pointed to by \a st must be at least the size returned by + * opus_multistream_encoder_get_size(). + * This is intended for applications which use their own allocator instead of + * malloc. + * To reset a previously initialized state, use the #OPUS_RESET_STATE CTL. + * @see opus_multistream_decoder_create + * @see opus_multistream_deocder_get_size + * @param st <tt>OpusMSEncoder*</tt>: Multistream encoder state to initialize. + * @param Fs <tt>opus_int32</tt>: Sampling rate to decode at (in Hz). + * This must be one of 8000, 12000, 16000, + * 24000, or 48000. + * @param channels <tt>int</tt>: Number of channels to output. + * This must be at most 255. + * It may be different from the number of coded + * channels (<code>streams + + * coupled_streams</code>). + * @param streams <tt>int</tt>: The total number of streams coded in the + * input. + * This must be no more than 255. + * @param coupled_streams <tt>int</tt>: Number of streams to decode as coupled + * (2 channel) streams. + * This must be no larger than the total + * number of streams. + * Additionally, The total number of + * coded channels (<code>streams + + * coupled_streams</code>) must be no + * more than 255. + * @param[in] mapping <code>const unsigned char[channels]</code>: Mapping from + * coded channels to output channels, as described in + * @ref opus_multistream. + * @returns #OPUS_OK on success, or an error code (see @ref opus_errorcodes) + * on failure. + */ +OPUS_EXPORT int opus_multistream_decoder_init( + OpusMSDecoder *st, + opus_int32 Fs, + int channels, + int streams, + int coupled_streams, + const unsigned char *mapping +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(6); + +/** Decode a multistream Opus packet. + * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state. + * @param[in] data <tt>const unsigned char*</tt>: Input payload. + * Use a <code>NULL</code> + * pointer to indicate packet + * loss. + * @param len <tt>opus_int32</tt>: Number of bytes in payload. + * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved + * samples. + * This must contain room for + * <code>frame_size*channels</code> + * samples. + * @param frame_size <tt>int</tt>: The number of samples per channel of + * available space in \a pcm. + * If this is less than the maximum packet duration + * (120 ms; 5760 for 48kHz), this function will not be capable + * of decoding some packets. In the case of PLC (data==NULL) + * or FEC (decode_fec=1), then frame_size needs to be exactly + * the duration of audio that is missing, otherwise the + * decoder will not be in the optimal state to decode the + * next incoming packet. For the PLC and FEC cases, frame_size + * <b>must</b> be a multiple of 2.5 ms. + * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band + * forward error correction data be decoded. + * If no such data is available, the frame is + * decoded as if it were lost. + * @returns Number of samples decoded on success or a negative error code + * (see @ref opus_errorcodes) on failure. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode( + OpusMSDecoder *st, + const unsigned char *data, + opus_int32 len, + opus_int16 *pcm, + int frame_size, + int decode_fec +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); + +/** Decode a multistream Opus packet with floating point output. + * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state. + * @param[in] data <tt>const unsigned char*</tt>: Input payload. + * Use a <code>NULL</code> + * pointer to indicate packet + * loss. + * @param len <tt>opus_int32</tt>: Number of bytes in payload. + * @param[out] pcm <tt>opus_int16*</tt>: Output signal, with interleaved + * samples. + * This must contain room for + * <code>frame_size*channels</code> + * samples. + * @param frame_size <tt>int</tt>: The number of samples per channel of + * available space in \a pcm. + * If this is less than the maximum packet duration + * (120 ms; 5760 for 48kHz), this function will not be capable + * of decoding some packets. In the case of PLC (data==NULL) + * or FEC (decode_fec=1), then frame_size needs to be exactly + * the duration of audio that is missing, otherwise the + * decoder will not be in the optimal state to decode the + * next incoming packet. For the PLC and FEC cases, frame_size + * <b>must</b> be a multiple of 2.5 ms. + * @param decode_fec <tt>int</tt>: Flag (0 or 1) to request that any in-band + * forward error correction data be decoded. + * If no such data is available, the frame is + * decoded as if it were lost. + * @returns Number of samples decoded on success or a negative error code + * (see @ref opus_errorcodes) on failure. + */ +OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_multistream_decode_float( + OpusMSDecoder *st, + const unsigned char *data, + opus_int32 len, + float *pcm, + int frame_size, + int decode_fec +) OPUS_ARG_NONNULL(1) OPUS_ARG_NONNULL(4); + +/** Perform a CTL function on a multistream Opus decoder. + * + * Generally the request and subsequent arguments are generated by a + * convenience macro. + * @param st <tt>OpusMSDecoder*</tt>: Multistream decoder state. + * @param request This and all remaining parameters should be replaced by one + * of the convenience macros in @ref opus_genericctls, + * @ref opus_decoderctls, or @ref opus_multistream_ctls. + * @see opus_genericctls + * @see opus_decoderctls + * @see opus_multistream_ctls + */ +OPUS_EXPORT int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...) OPUS_ARG_NONNULL(1); + +/** Frees an <code>OpusMSDecoder</code> allocated by + * opus_multistream_decoder_create(). + * @param st <tt>OpusMSDecoder</tt>: Multistream decoder state to be freed. + */ +OPUS_EXPORT void opus_multistream_decoder_destroy(OpusMSDecoder *st); + +/**@}*/ + +/**@}*/ + +#ifdef __cplusplus +} +#endif + +#endif /* OPUS_MULTISTREAM_H */ diff --git a/drivers/opus/opus_multistream_decoder.c b/drivers/opus/opus_multistream_decoder.c new file mode 100644 index 0000000000..64a0c24067 --- /dev/null +++ b/drivers/opus/opus_multistream_decoder.c @@ -0,0 +1,537 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "opus_multistream.h" +#include "opus.h" +#include "opus_private.h" +#include "stack_alloc.h" +#include <stdarg.h> +#include "float_cast.h" +#include "os_support.h" + +struct OpusMSDecoder { + ChannelLayout layout; + /* Decoder states go here */ +}; + + + + +/* DECODER */ + +opus_int32 opus_multistream_decoder_get_size(int nb_streams, int nb_coupled_streams) +{ + int coupled_size; + int mono_size; + + if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0; + coupled_size = opus_decoder_get_size(2); + mono_size = opus_decoder_get_size(1); + return align(sizeof(OpusMSDecoder)) + + nb_coupled_streams * align(coupled_size) + + (nb_streams-nb_coupled_streams) * align(mono_size); +} + +int opus_multistream_decoder_init( + OpusMSDecoder *st, + opus_int32 Fs, + int channels, + int streams, + int coupled_streams, + const unsigned char *mapping +) +{ + int coupled_size; + int mono_size; + int i, ret; + char *ptr; + + if ((channels>255) || (channels<1) || (coupled_streams>streams) || + (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + return OPUS_BAD_ARG; + + st->layout.nb_channels = channels; + st->layout.nb_streams = streams; + st->layout.nb_coupled_streams = coupled_streams; + + for (i=0;i<st->layout.nb_channels;i++) + st->layout.mapping[i] = mapping[i]; + if (!validate_layout(&st->layout)) + return OPUS_BAD_ARG; + + ptr = (char*)st + align(sizeof(OpusMSDecoder)); + coupled_size = opus_decoder_get_size(2); + mono_size = opus_decoder_get_size(1); + + for (i=0;i<st->layout.nb_coupled_streams;i++) + { + ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 2); + if(ret!=OPUS_OK)return ret; + ptr += align(coupled_size); + } + for (;i<st->layout.nb_streams;i++) + { + ret=opus_decoder_init((OpusDecoder*)ptr, Fs, 1); + if(ret!=OPUS_OK)return ret; + ptr += align(mono_size); + } + return OPUS_OK; +} + + +OpusMSDecoder *opus_multistream_decoder_create( + opus_int32 Fs, + int channels, + int streams, + int coupled_streams, + const unsigned char *mapping, + int *error +) +{ + int ret; + OpusMSDecoder *st; + if ((channels>255) || (channels<1) || (coupled_streams>streams) || + (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + { + if (error) + *error = OPUS_BAD_ARG; + return NULL; + } + st = (OpusMSDecoder *)opus_alloc(opus_multistream_decoder_get_size(streams, coupled_streams)); + if (st==NULL) + { + if (error) + *error = OPUS_ALLOC_FAIL; + return NULL; + } + ret = opus_multistream_decoder_init(st, Fs, channels, streams, coupled_streams, mapping); + if (error) + *error = ret; + if (ret != OPUS_OK) + { + opus_free(st); + st = NULL; + } + return st; +} + +typedef void (*opus_copy_channel_out_func)( + void *dst, + int dst_stride, + int dst_channel, + const opus_val16 *src, + int src_stride, + int frame_size +); + +static int opus_multistream_packet_validate(const unsigned char *data, + opus_int32 len, int nb_streams, opus_int32 Fs) +{ + int s; + int count; + unsigned char toc; + opus_int16 size[48]; + int samples=0; + opus_int32 packet_offset; + + for (s=0;s<nb_streams;s++) + { + int tmp_samples; + if (len<=0) + return OPUS_INVALID_PACKET; + count = opus_packet_parse_impl(data, len, s!=nb_streams-1, &toc, NULL, + size, NULL, &packet_offset); + if (count<0) + return count; + tmp_samples = opus_packet_get_nb_samples(data, packet_offset, Fs); + if (s!=0 && samples != tmp_samples) + return OPUS_INVALID_PACKET; + samples = tmp_samples; + data += packet_offset; + len -= packet_offset; + } + return samples; +} + +static int opus_multistream_decode_native( + OpusMSDecoder *st, + const unsigned char *data, + opus_int32 len, + void *pcm, + opus_copy_channel_out_func copy_channel_out, + int frame_size, + int decode_fec, + int soft_clip +) +{ + opus_int32 Fs; + int coupled_size; + int mono_size; + int s, c; + char *ptr; + int do_plc=0; + VARDECL(opus_val16, buf); + ALLOC_STACK; + + /* Limit frame_size to avoid excessive stack allocations. */ + opus_multistream_decoder_ctl(st, OPUS_GET_SAMPLE_RATE(&Fs)); + frame_size = IMIN(frame_size, Fs/25*3); + ALLOC(buf, 2*frame_size, opus_val16); + ptr = (char*)st + align(sizeof(OpusMSDecoder)); + coupled_size = opus_decoder_get_size(2); + mono_size = opus_decoder_get_size(1); + + if (len==0) + do_plc = 1; + if (len < 0) + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } + if (!do_plc && len < 2*st->layout.nb_streams-1) + { + RESTORE_STACK; + return OPUS_INVALID_PACKET; + } + if (!do_plc) + { + int ret = opus_multistream_packet_validate(data, len, st->layout.nb_streams, Fs); + if (ret < 0) + { + RESTORE_STACK; + return ret; + } else if (ret > frame_size) + { + RESTORE_STACK; + return OPUS_BUFFER_TOO_SMALL; + } + } + for (s=0;s<st->layout.nb_streams;s++) + { + OpusDecoder *dec; + int packet_offset, ret; + + dec = (OpusDecoder*)ptr; + ptr += (s < st->layout.nb_coupled_streams) ? align(coupled_size) : align(mono_size); + + if (!do_plc && len<=0) + { + RESTORE_STACK; + return OPUS_INTERNAL_ERROR; + } + packet_offset = 0; + ret = opus_decode_native(dec, data, len, buf, frame_size, decode_fec, s!=st->layout.nb_streams-1, &packet_offset, soft_clip); + data += packet_offset; + len -= packet_offset; + if (ret <= 0) + { + RESTORE_STACK; + return ret; + } + frame_size = ret; + if (s < st->layout.nb_coupled_streams) + { + int chan, prev; + prev = -1; + /* Copy "left" audio to the channel(s) where it belongs */ + while ( (chan = get_left_channel(&st->layout, s, prev)) != -1) + { + (*copy_channel_out)(pcm, st->layout.nb_channels, chan, + buf, 2, frame_size); + prev = chan; + } + prev = -1; + /* Copy "right" audio to the channel(s) where it belongs */ + while ( (chan = get_right_channel(&st->layout, s, prev)) != -1) + { + (*copy_channel_out)(pcm, st->layout.nb_channels, chan, + buf+1, 2, frame_size); + prev = chan; + } + } else { + int chan, prev; + prev = -1; + /* Copy audio to the channel(s) where it belongs */ + while ( (chan = get_mono_channel(&st->layout, s, prev)) != -1) + { + (*copy_channel_out)(pcm, st->layout.nb_channels, chan, + buf, 1, frame_size); + prev = chan; + } + } + } + /* Handle muted channels */ + for (c=0;c<st->layout.nb_channels;c++) + { + if (st->layout.mapping[c] == 255) + { + (*copy_channel_out)(pcm, st->layout.nb_channels, c, + NULL, 0, frame_size); + } + } + RESTORE_STACK; + return frame_size; +} + +#if !defined(DISABLE_FLOAT_API) +static void opus_copy_channel_out_float( + void *dst, + int dst_stride, + int dst_channel, + const opus_val16 *src, + int src_stride, + int frame_size +) +{ + float *float_dst; + opus_int32 i; + float_dst = (float*)dst; + if (src != NULL) + { + for (i=0;i<frame_size;i++) +#if defined(OPUS_FIXED_POINT) + float_dst[i*dst_stride+dst_channel] = (1/32768.f)*src[i*src_stride]; +#else + float_dst[i*dst_stride+dst_channel] = src[i*src_stride]; +#endif + } + else + { + for (i=0;i<frame_size;i++) + float_dst[i*dst_stride+dst_channel] = 0; + } +} +#endif + +static void opus_copy_channel_out_short( + void *dst, + int dst_stride, + int dst_channel, + const opus_val16 *src, + int src_stride, + int frame_size +) +{ + opus_int16 *short_dst; + opus_int32 i; + short_dst = (opus_int16*)dst; + if (src != NULL) + { + for (i=0;i<frame_size;i++) +#if defined(OPUS_FIXED_POINT) + short_dst[i*dst_stride+dst_channel] = src[i*src_stride]; +#else + short_dst[i*dst_stride+dst_channel] = FLOAT2INT16(src[i*src_stride]); +#endif + } + else + { + for (i=0;i<frame_size;i++) + short_dst[i*dst_stride+dst_channel] = 0; + } +} + + + +#ifdef OPUS_FIXED_POINT +int opus_multistream_decode( + OpusMSDecoder *st, + const unsigned char *data, + opus_int32 len, + opus_int16 *pcm, + int frame_size, + int decode_fec +) +{ + return opus_multistream_decode_native(st, data, len, + pcm, opus_copy_channel_out_short, frame_size, decode_fec, 0); +} + +#ifndef DISABLE_FLOAT_API +int opus_multistream_decode_float(OpusMSDecoder *st, const unsigned char *data, + opus_int32 len, float *pcm, int frame_size, int decode_fec) +{ + return opus_multistream_decode_native(st, data, len, + pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0); +} +#endif + +#else + +int opus_multistream_decode(OpusMSDecoder *st, const unsigned char *data, + opus_int32 len, opus_int16 *pcm, int frame_size, int decode_fec) +{ + return opus_multistream_decode_native(st, data, len, + pcm, opus_copy_channel_out_short, frame_size, decode_fec, 1); +} + +int opus_multistream_decode_float( + OpusMSDecoder *st, + const unsigned char *data, + opus_int32 len, + float *pcm, + int frame_size, + int decode_fec +) +{ + return opus_multistream_decode_native(st, data, len, + pcm, opus_copy_channel_out_float, frame_size, decode_fec, 0); +} +#endif + +int opus_multistream_decoder_ctl(OpusMSDecoder *st, int request, ...) +{ + va_list ap; + int coupled_size, mono_size; + char *ptr; + int ret = OPUS_OK; + + va_start(ap, request); + + coupled_size = opus_decoder_get_size(2); + mono_size = opus_decoder_get_size(1); + ptr = (char*)st + align(sizeof(OpusMSDecoder)); + switch (request) + { + case OPUS_GET_BANDWIDTH_REQUEST: + case OPUS_GET_SAMPLE_RATE_REQUEST: + case OPUS_GET_GAIN_REQUEST: + case OPUS_GET_LAST_PACKET_DURATION_REQUEST: + { + OpusDecoder *dec; + /* For int32* GET params, just query the first stream */ + opus_int32 *value = va_arg(ap, opus_int32*); + dec = (OpusDecoder*)ptr; + ret = opus_decoder_ctl(dec, request, value); + } + break; + case OPUS_GET_FINAL_RANGE_REQUEST: + { + int s; + opus_uint32 *value = va_arg(ap, opus_uint32*); + opus_uint32 tmp; + if (!value) + { + goto bad_arg; + } + *value = 0; + for (s=0;s<st->layout.nb_streams;s++) + { + OpusDecoder *dec; + dec = (OpusDecoder*)ptr; + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + ret = opus_decoder_ctl(dec, request, &tmp); + if (ret != OPUS_OK) break; + *value ^= tmp; + } + } + break; + case OPUS_RESET_STATE: + { + int s; + for (s=0;s<st->layout.nb_streams;s++) + { + OpusDecoder *dec; + + dec = (OpusDecoder*)ptr; + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + ret = opus_decoder_ctl(dec, OPUS_RESET_STATE); + if (ret != OPUS_OK) + break; + } + } + break; + case OPUS_MULTISTREAM_GET_DECODER_STATE_REQUEST: + { + int s; + opus_int32 stream_id; + OpusDecoder **value; + stream_id = va_arg(ap, opus_int32); + if (stream_id<0 || stream_id >= st->layout.nb_streams) + ret = OPUS_BAD_ARG; + value = va_arg(ap, OpusDecoder**); + if (!value) + { + goto bad_arg; + } + for (s=0;s<stream_id;s++) + { + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + } + *value = (OpusDecoder*)ptr; + } + break; + case OPUS_SET_GAIN_REQUEST: + { + int s; + /* This works for int32 params */ + opus_int32 value = va_arg(ap, opus_int32); + for (s=0;s<st->layout.nb_streams;s++) + { + OpusDecoder *dec; + + dec = (OpusDecoder*)ptr; + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + ret = opus_decoder_ctl(dec, request, value); + if (ret != OPUS_OK) + break; + } + } + break; + default: + ret = OPUS_UNIMPLEMENTED; + break; + } + + va_end(ap); + return ret; +bad_arg: + va_end(ap); + return OPUS_BAD_ARG; +} + + +void opus_multistream_decoder_destroy(OpusMSDecoder *st) +{ + opus_free(st); +} diff --git a/drivers/opus/opus_multistream_encoder.c b/drivers/opus/opus_multistream_encoder.c new file mode 100644 index 0000000000..8d559743ea --- /dev/null +++ b/drivers/opus/opus_multistream_encoder.c @@ -0,0 +1,1174 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "opus_multistream.h" +#include "opus.h" +#include "opus_private.h" +#include "stack_alloc.h" +#include <stdarg.h> +#include "float_cast.h" +#include "os_support.h" +#include "mathops.h" +#include "mdct.h" +#include "opus_modes.h" +#include "bands.h" +#include "quant_bands.h" + +typedef struct { + int nb_streams; + int nb_coupled_streams; + unsigned char mapping[8]; +} VorbisLayout; + +/* Index is nb_channel-1*/ +static const VorbisLayout vorbis_mappings[8] = { + {1, 0, {0}}, /* 1: mono */ + {1, 1, {0, 1}}, /* 2: stereo */ + {2, 1, {0, 2, 1}}, /* 3: 1-d surround */ + {2, 2, {0, 1, 2, 3}}, /* 4: quadraphonic surround */ + {3, 2, {0, 4, 1, 2, 3}}, /* 5: 5-channel surround */ + {4, 2, {0, 4, 1, 2, 3, 5}}, /* 6: 5.1 surround */ + {4, 3, {0, 4, 1, 2, 3, 5, 6}}, /* 7: 6.1 surround */ + {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */ +}; + +typedef void (*opus_copy_channel_in_func)( + opus_val16 *dst, + int dst_stride, + const void *src, + int src_stride, + int src_channel, + int frame_size +); + +struct OpusMSEncoder { + ChannelLayout layout; + int lfe_stream; + int application; + int variable_duration; + int surround; + opus_int32 bitrate_bps; + float subframe_mem[3]; + /* Encoder states go here */ + /* then opus_val32 window_mem[channels*120]; */ + /* then opus_val32 preemph_mem[channels]; */ +}; + +static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st) +{ + int s; + char *ptr; + int coupled_size, mono_size; + + coupled_size = opus_encoder_get_size(2); + mono_size = opus_encoder_get_size(1); + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + for (s=0;s<st->layout.nb_streams;s++) + { + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + } + return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32)); +} + +static opus_val32 *ms_get_window_mem(OpusMSEncoder *st) +{ + int s; + char *ptr; + int coupled_size, mono_size; + + coupled_size = opus_encoder_get_size(2); + mono_size = opus_encoder_get_size(1); + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + for (s=0;s<st->layout.nb_streams;s++) + { + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + } + return (opus_val32*)ptr; +} + +static int validate_encoder_layout(const ChannelLayout *layout) +{ + int s; + for (s=0;s<layout->nb_streams;s++) + { + if (s < layout->nb_coupled_streams) + { + if (get_left_channel(layout, s, -1)==-1) + return 0; + if (get_right_channel(layout, s, -1)==-1) + return 0; + } else { + if (get_mono_channel(layout, s, -1)==-1) + return 0; + } + } + return 1; +} + +static void channel_pos(int channels, int pos[8]) +{ + /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */ + if (channels==4) + { + pos[0]=1; + pos[1]=3; + pos[2]=1; + pos[3]=3; + } else if (channels==3||channels==5||channels==6) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=0; + } else if (channels==7) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=2; + pos[6]=0; + } else if (channels==8) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=1; + pos[6]=3; + pos[7]=0; + } +} + +#if 1 +/* Computes a rough approximation of log2(2^a + 2^b) */ +static opus_val16 logSum(opus_val16 a, opus_val16 b) +{ + opus_val16 max; + opus_val32 diff; + opus_val16 frac; + static const opus_val16 diff_table[17] = { + QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT), + QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT), + QCONST16(0.0028123f, DB_SHIFT) + }; + int low; + if (a>b) + { + max = a; + diff = SUB32(EXTEND32(a),EXTEND32(b)); + } else { + max = b; + diff = SUB32(EXTEND32(b),EXTEND32(a)); + } + if (diff >= QCONST16(8.f, DB_SHIFT)) + return max; +#ifdef OPUS_FIXED_POINT + low = SHR32(diff, DB_SHIFT-1); + frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT); +#else + low = (int)floor(2*diff); + frac = 2*diff - low; +#endif + return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low])); +} +#else +opus_val16 logSum(opus_val16 a, opus_val16 b) +{ + return log2(pow(4, a)+ pow(4, b))/2; +} +#endif + +void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem, + int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in +) +{ + int c; + int i; + int LM; + int pos[8] = {0}; + int upsample; + int frame_size; + opus_val16 channel_offset; + opus_val32 bandE[21]; + opus_val16 maskLogE[3][21]; + VARDECL(opus_val32, in); + VARDECL(opus_val16, x); + VARDECL(opus_val32, freq); + SAVE_STACK; + + upsample = resampling_factor(rate); + frame_size = len*upsample; + + for (LM=0;LM<celt_mode->maxLM;LM++) + if (celt_mode->shortMdctSize<<LM==frame_size) + break; + + ALLOC(in, frame_size+overlap, opus_val32); + ALLOC(x, len, opus_val16); + ALLOC(freq, frame_size, opus_val32); + + channel_pos(channels, pos); + + for (c=0;c<3;c++) + for (i=0;i<21;i++) + maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT); + + for (c=0;c<channels;c++) + { + OPUS_COPY(in, mem+c*overlap, overlap); + (*copy_channel_in)(x, 1, pcm, channels, c, len); + celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0); + clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1); + if (upsample != 1) + { + int bound = len; + for (i=0;i<bound;i++) + freq[i] *= upsample; + for (;i<frame_size;i++) + freq[i] = 0; + } + + compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM); + amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1); + /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */ + for (i=1;i<21;i++) + bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i-1]-QCONST16(1.f, DB_SHIFT)); + for (i=19;i>=0;i--) + bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT)); + if (pos[c]==1) + { + for (i=0;i<21;i++) + maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]); + } else if (pos[c]==3) + { + for (i=0;i<21;i++) + maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]); + } else if (pos[c]==2) + { + for (i=0;i<21;i++) + { + maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT)); + maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT)); + } + } +#if 0 + for (i=0;i<21;i++) + printf("%f ", bandLogE[21*c+i]); + float sum=0; + for (i=0;i<21;i++) + sum += bandLogE[21*c+i]; + printf("%f ", sum/21); +#endif + OPUS_COPY(mem+c*overlap, in+frame_size, overlap); + } + for (i=0;i<21;i++) + maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]); + channel_offset = HALF16(celt_log2(QCONST32(2.f,14)/(channels-1))); + for (c=0;c<3;c++) + for (i=0;i<21;i++) + maskLogE[c][i] += channel_offset; +#if 0 + for (c=0;c<3;c++) + { + for (i=0;i<21;i++) + printf("%f ", maskLogE[c][i]); + } +#endif + for (c=0;c<channels;c++) + { + opus_val16 *mask; + if (pos[c]!=0) + { + mask = &maskLogE[pos[c]-1][0]; + for (i=0;i<21;i++) + bandLogE[21*c+i] = bandLogE[21*c+i] - mask[i]; + } else { + for (i=0;i<21;i++) + bandLogE[21*c+i] = 0; + } +#if 0 + for (i=0;i<21;i++) + printf("%f ", bandLogE[21*c+i]); + printf("\n"); +#endif +#if 0 + float sum=0; + for (i=0;i<21;i++) + sum += bandLogE[21*c+i]; + printf("%f ", sum/(float)QCONST32(21.f, DB_SHIFT)); + printf("\n"); +#endif + } + RESTORE_STACK; +} + +opus_int32 opus_multistream_encoder_get_size(int nb_streams, int nb_coupled_streams) +{ + int coupled_size; + int mono_size; + + if(nb_streams<1||nb_coupled_streams>nb_streams||nb_coupled_streams<0)return 0; + coupled_size = opus_encoder_get_size(2); + mono_size = opus_encoder_get_size(1); + return align(sizeof(OpusMSEncoder)) + + nb_coupled_streams * align(coupled_size) + + (nb_streams-nb_coupled_streams) * align(mono_size); +} + +opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_family) +{ + int nb_streams; + int nb_coupled_streams; + opus_int32 size; + + if (mapping_family==0) + { + if (channels==1) + { + nb_streams=1; + nb_coupled_streams=0; + } else if (channels==2) + { + nb_streams=1; + nb_coupled_streams=1; + } else + return 0; + } else if (mapping_family==1 && channels<=8 && channels>=1) + { + nb_streams=vorbis_mappings[channels-1].nb_streams; + nb_coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams; + } else if (mapping_family==255) + { + nb_streams=channels; + nb_coupled_streams=0; + } else + return 0; + size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); + if (channels>2) + { + size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32)); + } + return size; +} + + +static int opus_multistream_encoder_init_impl( + OpusMSEncoder *st, + opus_int32 Fs, + int channels, + int streams, + int coupled_streams, + const unsigned char *mapping, + int application, + int surround +) +{ + int coupled_size; + int mono_size; + int i, ret; + char *ptr; + + if ((channels>255) || (channels<1) || (coupled_streams>streams) || + (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + return OPUS_BAD_ARG; + + st->layout.nb_channels = channels; + st->layout.nb_streams = streams; + st->layout.nb_coupled_streams = coupled_streams; + st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0; + if (!surround) + st->lfe_stream = -1; + st->bitrate_bps = OPUS_AUTO; + st->application = application; + st->variable_duration = OPUS_FRAMESIZE_ARG; + for (i=0;i<st->layout.nb_channels;i++) + st->layout.mapping[i] = mapping[i]; + if (!validate_layout(&st->layout) || !validate_encoder_layout(&st->layout)) + return OPUS_BAD_ARG; + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + coupled_size = opus_encoder_get_size(2); + mono_size = opus_encoder_get_size(1); + + for (i=0;i<st->layout.nb_coupled_streams;i++) + { + ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 2, application); + if(ret!=OPUS_OK)return ret; + if (i==st->lfe_stream) + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1)); + ptr += align(coupled_size); + } + for (;i<st->layout.nb_streams;i++) + { + ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 1, application); + if (i==st->lfe_stream) + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1)); + if(ret!=OPUS_OK)return ret; + ptr += align(mono_size); + } + if (surround) + { + OPUS_CLEAR(ms_get_preemph_mem(st), channels); + OPUS_CLEAR(ms_get_window_mem(st), channels*120); + } + st->surround = surround; + return OPUS_OK; +} + +int opus_multistream_encoder_init( + OpusMSEncoder *st, + opus_int32 Fs, + int channels, + int streams, + int coupled_streams, + const unsigned char *mapping, + int application +) +{ + return opus_multistream_encoder_init_impl(st, Fs, channels, streams, coupled_streams, mapping, application, 0); +} + +int opus_multistream_surround_encoder_init( + OpusMSEncoder *st, + opus_int32 Fs, + int channels, + int mapping_family, + int *streams, + int *coupled_streams, + unsigned char *mapping, + int application +) +{ + if ((channels>255) || (channels<1)) + return OPUS_BAD_ARG; + st->lfe_stream = -1; + if (mapping_family==0) + { + if (channels==1) + { + *streams=1; + *coupled_streams=0; + mapping[0]=0; + } else if (channels==2) + { + *streams=1; + *coupled_streams=1; + mapping[0]=0; + mapping[1]=1; + } else + return OPUS_UNIMPLEMENTED; + } else if (mapping_family==1 && channels<=8 && channels>=1) + { + int i; + *streams=vorbis_mappings[channels-1].nb_streams; + *coupled_streams=vorbis_mappings[channels-1].nb_coupled_streams; + for (i=0;i<channels;i++) + mapping[i] = vorbis_mappings[channels-1].mapping[i]; + if (channels>=6) + st->lfe_stream = *streams-1; + } else if (mapping_family==255) + { + int i; + *streams=channels; + *coupled_streams=0; + for(i=0;i<channels;i++) + mapping[i] = i; + } else + return OPUS_UNIMPLEMENTED; + return opus_multistream_encoder_init_impl(st, Fs, channels, *streams, *coupled_streams, + mapping, application, channels>2&&mapping_family==1); +} + +OpusMSEncoder *opus_multistream_encoder_create( + opus_int32 Fs, + int channels, + int streams, + int coupled_streams, + const unsigned char *mapping, + int application, + int *error +) +{ + int ret; + OpusMSEncoder *st; + if ((channels>255) || (channels<1) || (coupled_streams>streams) || + (coupled_streams+streams>255) || (streams<1) || (coupled_streams<0)) + { + if (error) + *error = OPUS_BAD_ARG; + return NULL; + } + st = (OpusMSEncoder *)opus_alloc(opus_multistream_encoder_get_size(streams, coupled_streams)); + if (st==NULL) + { + if (error) + *error = OPUS_ALLOC_FAIL; + return NULL; + } + ret = opus_multistream_encoder_init(st, Fs, channels, streams, coupled_streams, mapping, application); + if (ret != OPUS_OK) + { + opus_free(st); + st = NULL; + } + if (error) + *error = ret; + return st; +} + +OpusMSEncoder *opus_multistream_surround_encoder_create( + opus_int32 Fs, + int channels, + int mapping_family, + int *streams, + int *coupled_streams, + unsigned char *mapping, + int application, + int *error +) +{ + int ret; + OpusMSEncoder *st; + if ((channels>255) || (channels<1)) + { + if (error) + *error = OPUS_BAD_ARG; + return NULL; + } + st = (OpusMSEncoder *)opus_alloc(opus_multistream_surround_encoder_get_size(channels, mapping_family)); + if (st==NULL) + { + if (error) + *error = OPUS_ALLOC_FAIL; + return NULL; + } + ret = opus_multistream_surround_encoder_init(st, Fs, channels, mapping_family, streams, coupled_streams, mapping, application); + if (ret != OPUS_OK) + { + opus_free(st); + st = NULL; + } + if (error) + *error = ret; + return st; +} + +static void surround_rate_allocation( + OpusMSEncoder *st, + opus_int32 *rate, + int frame_size + ) +{ + int i; + opus_int32 channel_rate; + opus_int32 Fs; + char *ptr; + int stream_offset; + int lfe_offset; + int coupled_ratio; /* Q8 */ + int lfe_ratio; /* Q8 */ + + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); + + if (st->bitrate_bps > st->layout.nb_channels*40000) + stream_offset = 20000; + else + stream_offset = st->bitrate_bps/st->layout.nb_channels/2; + stream_offset += 60*(Fs/frame_size-50); + /* We start by giving each stream (coupled or uncoupled) the same bitrate. + This models the main saving of coupled channels over uncoupled. */ + /* The LFE stream is an exception to the above and gets fewer bits. */ + lfe_offset = 3500 + 60*(Fs/frame_size-50); + /* Coupled streams get twice the mono rate after the first 20 kb/s. */ + coupled_ratio = 512; + /* Should depend on the bitrate, for now we assume LFE gets 1/8 the bits of mono */ + lfe_ratio = 32; + + /* Compute bitrate allocation between streams */ + if (st->bitrate_bps==OPUS_AUTO) + { + channel_rate = Fs+60*Fs/frame_size; + } else if (st->bitrate_bps==OPUS_BITRATE_MAX) + { + channel_rate = 300000; + } else { + int nb_lfe; + int nb_uncoupled; + int nb_coupled; + int total; + nb_lfe = (st->lfe_stream!=-1); + nb_coupled = st->layout.nb_coupled_streams; + nb_uncoupled = st->layout.nb_streams-nb_coupled-nb_lfe; + total = (nb_uncoupled<<8) /* mono */ + + coupled_ratio*nb_coupled /* stereo */ + + nb_lfe*lfe_ratio; + channel_rate = 256*(st->bitrate_bps-lfe_offset*nb_lfe-stream_offset*(nb_coupled+nb_uncoupled))/total; + } +#ifndef OPUS_FIXED_POINT + if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != Fs/50) + { + opus_int32 bonus; + bonus = 60*(Fs/frame_size-50); + channel_rate += bonus; + } +#endif + + for (i=0;i<st->layout.nb_streams;i++) + { + if (i<st->layout.nb_coupled_streams) + rate[i] = stream_offset+(channel_rate*coupled_ratio>>8); + else if (i!=st->lfe_stream) + rate[i] = stream_offset+channel_rate; + else + rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8); + } +} + +/* Max size in case the encoder decides to return three frames */ +#define MS_FRAME_TMP (3*1275+7) +static int opus_multistream_encode_native +( + OpusMSEncoder *st, + opus_copy_channel_in_func copy_channel_in, + const void *pcm, + int analysis_frame_size, + unsigned char *data, + opus_int32 max_data_bytes, + int lsb_depth, + downmix_func downmix +) +{ + opus_int32 Fs; + int coupled_size; + int mono_size; + int s; + char *ptr; + int tot_size; + VARDECL(opus_val16, buf); + VARDECL(opus_val16, bandSMR); + unsigned char tmp_data[MS_FRAME_TMP]; + OpusRepacketizer rp; + opus_int32 vbr; + const CELTMode *celt_mode; + opus_int32 bitrates[256]; + opus_val16 bandLogE[42]; + opus_val32 *mem = NULL; + opus_val32 *preemph_mem=NULL; + int frame_size; + ALLOC_STACK; + + if (st->surround) + { + preemph_mem = ms_get_preemph_mem(st); + mem = ms_get_window_mem(st); + } + + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_VBR(&vbr)); + opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode)); + + { + opus_int32 delay_compensation; + int channels; + + channels = st->layout.nb_streams + st->layout.nb_coupled_streams; + opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation)); + delay_compensation -= Fs/400; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, channels, Fs, st->bitrate_bps, + delay_compensation, downmix +#ifndef DISABLE_FLOAT_API + , st->subframe_mem +#endif + ); + } + + if (400*frame_size < Fs) + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } + /* Validate frame_size before using it to allocate stack space. + This mirrors the checks in opus_encode[_float](). */ + if (400*frame_size != Fs && 200*frame_size != Fs && + 100*frame_size != Fs && 50*frame_size != Fs && + 25*frame_size != Fs && 50*frame_size != 3*Fs) + { + RESTORE_STACK; + return OPUS_BAD_ARG; + } + ALLOC(buf, 2*frame_size, opus_val16); + coupled_size = opus_encoder_get_size(2); + mono_size = opus_encoder_get_size(1); + + ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16); + if (st->surround) + { + surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in); + } + + if (max_data_bytes < 4*st->layout.nb_streams-1) + { + RESTORE_STACK; + return OPUS_BUFFER_TOO_SMALL; + } + + /* Compute bitrate allocation between streams (this could be a lot better) */ + surround_rate_allocation(st, bitrates, frame_size); + + if (!vbr) + max_data_bytes = IMIN(max_data_bytes, 3*st->bitrate_bps/(3*8*Fs/frame_size)); + + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + for (s=0;s<st->layout.nb_streams;s++) + { + OpusEncoder *enc; + enc = (OpusEncoder*)ptr; + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s])); + if (st->surround) + { + opus_int32 equiv_rate; + equiv_rate = st->bitrate_bps; + if (frame_size*50 < Fs) + equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels; + if (equiv_rate > 10000*st->layout.nb_channels) + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + else if (equiv_rate > 7000*st->layout.nb_channels) + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND)); + else if (equiv_rate > 5000*st->layout.nb_channels) + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND)); + else + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND)); + if (s < st->layout.nb_coupled_streams) + { + /* To preserve the spatial image, force stereo CELT on coupled streams */ + opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); + opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2)); + } + } + } + + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + /* Counting ToC */ + tot_size = 0; + for (s=0;s<st->layout.nb_streams;s++) + { + OpusEncoder *enc; + int len; + int curr_max; + int c1, c2; + + opus_repacketizer_init(&rp); + enc = (OpusEncoder*)ptr; + if (s < st->layout.nb_coupled_streams) + { + int i; + int left, right; + left = get_left_channel(&st->layout, s, -1); + right = get_right_channel(&st->layout, s, -1); + (*copy_channel_in)(buf, 2, + pcm, st->layout.nb_channels, left, frame_size); + (*copy_channel_in)(buf+1, 2, + pcm, st->layout.nb_channels, right, frame_size); + ptr += align(coupled_size); + if (st->surround) + { + for (i=0;i<21;i++) + { + bandLogE[i] = bandSMR[21*left+i]; + bandLogE[21+i] = bandSMR[21*right+i]; + } + } + c1 = left; + c2 = right; + } else { + int i; + int chan = get_mono_channel(&st->layout, s, -1); + (*copy_channel_in)(buf, 1, + pcm, st->layout.nb_channels, chan, frame_size); + ptr += align(mono_size); + if (st->surround) + { + for (i=0;i<21;i++) + bandLogE[i] = bandSMR[21*chan+i]; + } + c1 = chan; + c2 = -1; + } + if (st->surround) + opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); + /* number of bytes left (+Toc) */ + curr_max = max_data_bytes - tot_size; + /* Reserve three bytes for the last stream and four for the others */ + curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1); + curr_max = IMIN(curr_max,MS_FRAME_TMP); + if (!vbr && s == st->layout.nb_streams-1) + opus_encoder_ctl(enc, OPUS_SET_BITRATE(curr_max*(8*Fs/frame_size))); + len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth, + pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix); + if (len<0) + { + RESTORE_STACK; + return len; + } + /* We need to use the repacketizer to add the self-delimiting lengths + while taking into account the fact that the encoder can now return + more than one frame at a time (e.g. 60 ms CELT-only) */ + opus_repacketizer_cat(&rp, tmp_data, len); + len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp), + data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1); + data += len; + tot_size += len; + } + /*printf("\n");*/ + RESTORE_STACK; + return tot_size; +} + +#if !defined(DISABLE_FLOAT_API) +static void opus_copy_channel_in_float( + opus_val16 *dst, + int dst_stride, + const void *src, + int src_stride, + int src_channel, + int frame_size +) +{ + const float *float_src; + opus_int32 i; + float_src = (const float *)src; + for (i=0;i<frame_size;i++) +#if defined(OPUS_FIXED_POINT) + dst[i*dst_stride] = FLOAT2INT16(float_src[i*src_stride+src_channel]); +#else + dst[i*dst_stride] = float_src[i*src_stride+src_channel]; +#endif +} +#endif + +static void opus_copy_channel_in_short( + opus_val16 *dst, + int dst_stride, + const void *src, + int src_stride, + int src_channel, + int frame_size +) +{ + const opus_int16 *short_src; + opus_int32 i; + short_src = (const opus_int16 *)src; + for (i=0;i<frame_size;i++) +#if defined(OPUS_FIXED_POINT) + dst[i*dst_stride] = short_src[i*src_stride+src_channel]; +#else + dst[i*dst_stride] = (1/32768.f)*short_src[i*src_stride+src_channel]; +#endif +} + + +#ifdef OPUS_FIXED_POINT +int opus_multistream_encode( + OpusMSEncoder *st, + const opus_val16 *pcm, + int frame_size, + unsigned char *data, + opus_int32 max_data_bytes +) +{ + return opus_multistream_encode_native(st, opus_copy_channel_in_short, + pcm, frame_size, data, max_data_bytes, 16, downmix_int); +} + +#ifndef DISABLE_FLOAT_API +int opus_multistream_encode_float( + OpusMSEncoder *st, + const float *pcm, + int frame_size, + unsigned char *data, + opus_int32 max_data_bytes +) +{ + return opus_multistream_encode_native(st, opus_copy_channel_in_float, + pcm, frame_size, data, max_data_bytes, 16, downmix_float); +} +#endif + +#else + +int opus_multistream_encode_float +( + OpusMSEncoder *st, + const opus_val16 *pcm, + int frame_size, + unsigned char *data, + opus_int32 max_data_bytes +) +{ + return opus_multistream_encode_native(st, opus_copy_channel_in_float, + pcm, frame_size, data, max_data_bytes, 24, downmix_float); +} + +int opus_multistream_encode( + OpusMSEncoder *st, + const opus_int16 *pcm, + int frame_size, + unsigned char *data, + opus_int32 max_data_bytes +) +{ + return opus_multistream_encode_native(st, opus_copy_channel_in_short, + pcm, frame_size, data, max_data_bytes, 16, downmix_int); +} +#endif + +int opus_multistream_encoder_ctl(OpusMSEncoder *st, int request, ...) +{ + va_list ap; + int coupled_size, mono_size; + char *ptr; + int ret = OPUS_OK; + + va_start(ap, request); + + coupled_size = opus_encoder_get_size(2); + mono_size = opus_encoder_get_size(1); + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + switch (request) + { + case OPUS_SET_BITRATE_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + if (value<0 && value!=OPUS_AUTO && value!=OPUS_BITRATE_MAX) + { + goto bad_arg; + } + st->bitrate_bps = value; + } + break; + case OPUS_GET_BITRATE_REQUEST: + { + int s; + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = 0; + for (s=0;s<st->layout.nb_streams;s++) + { + opus_int32 rate; + OpusEncoder *enc; + enc = (OpusEncoder*)ptr; + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + opus_encoder_ctl(enc, request, &rate); + *value += rate; + } + } + break; + case OPUS_GET_LSB_DEPTH_REQUEST: + case OPUS_GET_VBR_REQUEST: + case OPUS_GET_APPLICATION_REQUEST: + case OPUS_GET_BANDWIDTH_REQUEST: + case OPUS_GET_COMPLEXITY_REQUEST: + case OPUS_GET_PACKET_LOSS_PERC_REQUEST: + case OPUS_GET_DTX_REQUEST: + case OPUS_GET_VOICE_RATIO_REQUEST: + case OPUS_GET_VBR_CONSTRAINT_REQUEST: + case OPUS_GET_SIGNAL_REQUEST: + case OPUS_GET_LOOKAHEAD_REQUEST: + case OPUS_GET_SAMPLE_RATE_REQUEST: + case OPUS_GET_INBAND_FEC_REQUEST: + case OPUS_GET_FORCE_CHANNELS_REQUEST: + case OPUS_GET_PREDICTION_DISABLED_REQUEST: + { + OpusEncoder *enc; + /* For int32* GET params, just query the first stream */ + opus_int32 *value = va_arg(ap, opus_int32*); + enc = (OpusEncoder*)ptr; + ret = opus_encoder_ctl(enc, request, value); + } + break; + case OPUS_GET_FINAL_RANGE_REQUEST: + { + int s; + opus_uint32 *value = va_arg(ap, opus_uint32*); + opus_uint32 tmp; + if (!value) + { + goto bad_arg; + } + *value=0; + for (s=0;s<st->layout.nb_streams;s++) + { + OpusEncoder *enc; + enc = (OpusEncoder*)ptr; + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + ret = opus_encoder_ctl(enc, request, &tmp); + if (ret != OPUS_OK) break; + *value ^= tmp; + } + } + break; + case OPUS_SET_LSB_DEPTH_REQUEST: + case OPUS_SET_COMPLEXITY_REQUEST: + case OPUS_SET_VBR_REQUEST: + case OPUS_SET_VBR_CONSTRAINT_REQUEST: + case OPUS_SET_MAX_BANDWIDTH_REQUEST: + case OPUS_SET_BANDWIDTH_REQUEST: + case OPUS_SET_SIGNAL_REQUEST: + case OPUS_SET_APPLICATION_REQUEST: + case OPUS_SET_INBAND_FEC_REQUEST: + case OPUS_SET_PACKET_LOSS_PERC_REQUEST: + case OPUS_SET_DTX_REQUEST: + case OPUS_SET_FORCE_MODE_REQUEST: + case OPUS_SET_FORCE_CHANNELS_REQUEST: + case OPUS_SET_PREDICTION_DISABLED_REQUEST: + { + int s; + /* This works for int32 params */ + opus_int32 value = va_arg(ap, opus_int32); + for (s=0;s<st->layout.nb_streams;s++) + { + OpusEncoder *enc; + + enc = (OpusEncoder*)ptr; + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + ret = opus_encoder_ctl(enc, request, value); + if (ret != OPUS_OK) + break; + } + } + break; + case OPUS_MULTISTREAM_GET_ENCODER_STATE_REQUEST: + { + int s; + opus_int32 stream_id; + OpusEncoder **value; + stream_id = va_arg(ap, opus_int32); + if (stream_id<0 || stream_id >= st->layout.nb_streams) + ret = OPUS_BAD_ARG; + value = va_arg(ap, OpusEncoder**); + if (!value) + { + goto bad_arg; + } + for (s=0;s<stream_id;s++) + { + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + } + *value = (OpusEncoder*)ptr; + } + break; + case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->variable_duration = value; + } + break; + case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: + { + opus_int32 *value = va_arg(ap, opus_int32*); + if (!value) + { + goto bad_arg; + } + *value = st->variable_duration; + } + break; + case OPUS_RESET_STATE: + { + int s; + st->subframe_mem[0] = st->subframe_mem[1] = st->subframe_mem[2] = 0; + if (st->surround) + { + OPUS_CLEAR(ms_get_preemph_mem(st), st->layout.nb_channels); + OPUS_CLEAR(ms_get_window_mem(st), st->layout.nb_channels*120); + } + for (s=0;s<st->layout.nb_streams;s++) + { + OpusEncoder *enc; + enc = (OpusEncoder*)ptr; + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + ret = opus_encoder_ctl(enc, OPUS_RESET_STATE); + if (ret != OPUS_OK) + break; + } + } + break; + default: + ret = OPUS_UNIMPLEMENTED; + break; + } + + va_end(ap); + return ret; +bad_arg: + va_end(ap); + return OPUS_BAD_ARG; +} + +void opus_multistream_encoder_destroy(OpusMSEncoder *st) +{ + opus_free(st); +} diff --git a/drivers/opus/opus_private.h b/drivers/opus/opus_private.h new file mode 100644 index 0000000000..83225f2b6c --- /dev/null +++ b/drivers/opus/opus_private.h @@ -0,0 +1,129 @@ +/* Copyright (c) 2012 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifndef OPUS_PRIVATE_H +#define OPUS_PRIVATE_H + +#include "arch.h" +#include "opus.h" +#include "celt.h" + +struct OpusRepacketizer { + unsigned char toc; + int nb_frames; + const unsigned char *frames[48]; + opus_int16 len[48]; + int framesize; +}; + +typedef struct ChannelLayout { + int nb_channels; + int nb_streams; + int nb_coupled_streams; + unsigned char mapping[256]; +} ChannelLayout; + +int validate_layout(const ChannelLayout *layout); +int get_left_channel(const ChannelLayout *layout, int stream_id, int prev); +int get_right_channel(const ChannelLayout *layout, int stream_id, int prev); +int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev); + + + +#define MODE_SILK_ONLY 1000 +#define MODE_HYBRID 1001 +#define MODE_CELT_ONLY 1002 + +#define OPUS_SET_VOICE_RATIO_REQUEST 11018 +#define OPUS_GET_VOICE_RATIO_REQUEST 11019 + +/** Configures the encoder's expected percentage of voice + * opposed to music or other signals. + * + * @note This interface is currently more aspiration than actuality. It's + * ultimately expected to bias an automatic signal classifier, but it currently + * just shifts the static bitrate to mode mapping around a little bit. + * + * @param[in] x <tt>int</tt>: Voice percentage in the range 0-100, inclusive. + * @hideinitializer */ +#define OPUS_SET_VOICE_RATIO(x) OPUS_SET_VOICE_RATIO_REQUEST, __opus_check_int(x) +/** Gets the encoder's configured voice ratio value, @see OPUS_SET_VOICE_RATIO + * + * @param[out] x <tt>int*</tt>: Voice percentage in the range 0-100, inclusive. + * @hideinitializer */ +#define OPUS_GET_VOICE_RATIO(x) OPUS_GET_VOICE_RATIO_REQUEST, __opus_check_int_ptr(x) + + +#define OPUS_SET_FORCE_MODE_REQUEST 11002 +#define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x) + +typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int); +void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); +void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); + +int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, + int bitrate, opus_val16 tonality, float *mem, int buffering, + downmix_func downmix); + +int encode_size(int size, unsigned char *data); + +opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); + +opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, + int variable_duration, int C, opus_int32 Fs, int bitrate_bps, + int delay_compensation, downmix_func downmix +#ifndef DISABLE_FLOAT_API + , float *subframe_mem +#endif + ); + +opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, + unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix); + +int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, + opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited, + opus_int32 *packet_offset, int soft_clip); + +/* Make sure everything's aligned to sizeof(void *) bytes */ +static OPUS_INLINE int align(int i) +{ + return (i+(int)sizeof(void *)-1)&-(int)sizeof(void *); +} + +int opus_packet_parse_impl(const unsigned char *data, opus_int32 len, + int self_delimited, unsigned char *out_toc, + const unsigned char *frames[48], opus_int16 size[48], + int *payload_offset, opus_int32 *packet_offset); + +opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end, + unsigned char *data, opus_int32 maxlen, int self_delimited, int pad); + +int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len); + +#endif /* OPUS_PRIVATE_H */ diff --git a/drivers/opus/opus_types.h b/drivers/opus/opus_types.h new file mode 100644 index 0000000000..b28e03aea2 --- /dev/null +++ b/drivers/opus/opus_types.h @@ -0,0 +1,159 @@ +/* (C) COPYRIGHT 1994-2002 Xiph.Org Foundation */ +/* Modified by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/* opus_types.h based on ogg_types.h from libogg */ + +/** + @file opus_types.h + @brief Opus reference implementation types +*/ +#ifndef OPUS_TYPES_H +#define OPUS_TYPES_H + +/* Use the real stdint.h if it's there (taken from Paul Hsieh's pstdint.h) */ +#if (defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_)) || defined (HAVE_STDINT_H)) +#include <stdint.h> + + typedef int16_t opus_int16; + typedef uint16_t opus_uint16; + typedef int32_t opus_int32; + typedef uint32_t opus_uint32; +#elif defined(_WIN32) + +# if defined(__CYGWIN__) +# include <_G_config.h> + typedef _G_int32_t opus_int32; + typedef _G_uint32_t opus_uint32; + typedef _G_int16 opus_int16; + typedef _G_uint16 opus_uint16; +# elif defined(__MINGW32__) + typedef short opus_int16; + typedef unsigned short opus_uint16; + typedef int opus_int32; + typedef unsigned int opus_uint32; +# elif defined(__MWERKS__) + typedef int opus_int32; + typedef unsigned int opus_uint32; + typedef short opus_int16; + typedef unsigned short opus_uint16; +# else + /* MSVC/Borland */ + typedef __int32 opus_int32; + typedef unsigned __int32 opus_uint32; + typedef __int16 opus_int16; + typedef unsigned __int16 opus_uint16; +# endif + +#elif defined(__MACOS__) + +# include <sys/types.h> + typedef SInt16 opus_int16; + typedef UInt16 opus_uint16; + typedef SInt32 opus_int32; + typedef UInt32 opus_uint32; + +#elif (defined(__APPLE__) && defined(__MACH__)) /* MacOS X Framework build */ + +# include <sys/types.h> + typedef int16_t opus_int16; + typedef u_int16_t opus_uint16; + typedef int32_t opus_int32; + typedef u_int32_t opus_uint32; + +#elif defined(__BEOS__) + + /* Be */ +# include <inttypes.h> + typedef int16 opus_int16; + typedef u_int16 opus_uint16; + typedef int32_t opus_int32; + typedef u_int32_t opus_uint32; + +#elif defined (__EMX__) + + /* OS/2 GCC */ + typedef short opus_int16; + typedef unsigned short opus_uint16; + typedef int opus_int32; + typedef unsigned int opus_uint32; + +#elif defined (DJGPP) + + /* DJGPP */ + typedef short opus_int16; + typedef unsigned short opus_uint16; + typedef int opus_int32; + typedef unsigned int opus_uint32; + +#elif defined(R5900) + + /* PS2 EE */ + typedef int opus_int32; + typedef unsigned opus_uint32; + typedef short opus_int16; + typedef unsigned short opus_uint16; + +#elif defined(__SYMBIAN32__) + + /* Symbian GCC */ + typedef signed short opus_int16; + typedef unsigned short opus_uint16; + typedef signed int opus_int32; + typedef unsigned int opus_uint32; + +#elif defined(CONFIG_TI_C54X) || defined (CONFIG_TI_C55X) + + typedef short opus_int16; + typedef unsigned short opus_uint16; + typedef long opus_int32; + typedef unsigned long opus_uint32; + +#elif defined(CONFIG_TI_C6X) + + typedef short opus_int16; + typedef unsigned short opus_uint16; + typedef int opus_int32; + typedef unsigned int opus_uint32; + +#else + + /* Give up, take a reasonable guess */ + typedef short opus_int16; + typedef unsigned short opus_uint16; + typedef int opus_int32; + typedef unsigned int opus_uint32; + +#endif + +#define opus_int int /* used for counters etc; at least 16 bits */ +#define opus_int64 long long +#define opus_int8 signed char + +#define opus_uint unsigned int /* used for counters etc; at least 16 bits */ +#define opus_uint64 unsigned long long +#define opus_uint8 unsigned char + +#endif /* OPUS_TYPES_H */ diff --git a/drivers/opus/opusfile.c b/drivers/opus/opusfile.c new file mode 100644 index 0000000000..1e7497f6cd --- /dev/null +++ b/drivers/opus/opusfile.c @@ -0,0 +1,3158 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: stdio-based convenience library for opening/seeking/decoding + last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $ + + ********************************************************************/ +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "internal.h" +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <limits.h> +#include <string.h> +#include <math.h> + +#include "opusfile.h" + +/*This implementation is largely based off of libvorbisfile. + All of the Ogg bits work roughly the same, though I have made some + "improvements" that have not been folded back there, yet.*/ + +/*A 'chained bitstream' is an Ogg Opus bitstream that contains more than one + logical bitstream arranged end to end (the only form of Ogg multiplexing + supported by this library. + Grouping (parallel multiplexing) is not supported, except to the extent that + if there are multiple logical Ogg streams in a single link of the chain, we + will ignore all but the first Opus stream we find.*/ + +/*An Ogg Opus file can be played beginning to end (streamed) without worrying + ahead of time about chaining (see opusdec from the opus-tools package). + If we have the whole file, however, and want random access + (seeking/scrubbing) or desire to know the total length/time of a file, we + need to account for the possibility of chaining.*/ + +/*We can handle things a number of ways. + We can determine the entire bitstream structure right off the bat, or find + pieces on demand. + This library determines and caches structure for the entire bitstream, but + builds a virtual decoder on the fly when moving between links in the chain.*/ + +/*There are also different ways to implement seeking. + Enough information exists in an Ogg bitstream to seek to sample-granularity + positions in the output. + Or, one can seek by picking some portion of the stream roughly in the desired + area if we only want coarse navigation through the stream. + We implement and expose both strategies.*/ + +/*The maximum number of bytes in a page (including the page headers).*/ +#define OP_PAGE_SIZE_MAX (65307) +/*The default amount to seek backwards per step when trying to find the + previous page. + This must be at least as large as the maximum size of a page.*/ +#define OP_CHUNK_SIZE (65536) +/*The maximum amount to seek backwards per step when trying to find the + previous page.*/ +#define OP_CHUNK_SIZE_MAX (1024*(opus_int32)1024) +/*A smaller read size is needed for low-rate streaming.*/ +#define OP_READ_SIZE (2048) + +int op_test(OpusHead *_head, + const unsigned char *_initial_data,size_t _initial_bytes){ + ogg_sync_state oy; + char *data; + int err; + /*The first page of a normal Opus file will be at most 57 bytes (27 Ogg + page header bytes + 1 lacing value + 21 Opus header bytes + 8 channel + mapping bytes). + It will be at least 47 bytes (27 Ogg page header bytes + 1 lacing value + + 19 Opus header bytes using channel mapping family 0). + If we don't have at least that much data, give up now.*/ + if(_initial_bytes<47)return OP_FALSE; + /*Only proceed if we start with the magic OggS string. + This is to prevent us spending a lot of time allocating memory and looking + for Ogg pages in non-Ogg files.*/ + if(memcmp(_initial_data,"OggS",4)!=0)return OP_ENOTFORMAT; + ogg_sync_init(&oy); + data=ogg_sync_buffer(&oy,_initial_bytes); + if(data!=NULL){ + ogg_stream_state os; + ogg_page og; + int ret; + memcpy(data,_initial_data,_initial_bytes); + ogg_sync_wrote(&oy,_initial_bytes); + ogg_stream_init(&os,-1); + err=OP_FALSE; + do{ + ogg_packet op; + ret=ogg_sync_pageout(&oy,&og); + /*Ignore holes.*/ + if(ret<0)continue; + /*Stop if we run out of data.*/ + if(!ret)break; + ogg_stream_reset_serialno(&os,ogg_page_serialno(&og)); + ogg_stream_pagein(&os,&og); + /*Only process the first packet on this page (if it's a BOS packet, + it's required to be the only one).*/ + if(ogg_stream_packetout(&os,&op)==1){ + if(op.b_o_s){ + ret=opus_head_parse(_head,op.packet,op.bytes); + /*If this didn't look like Opus, keep going.*/ + if(ret==OP_ENOTFORMAT)continue; + /*Otherwise we're done, one way or another.*/ + err=ret; + } + /*We finished parsing the headers. + There is no Opus to be found.*/ + else err=OP_ENOTFORMAT; + } + } + while(err==OP_FALSE); + ogg_stream_clear(&os); + } + else err=OP_EFAULT; + ogg_sync_clear(&oy); + return err; +} + +/*Many, many internal helpers. + The intention is not to be confusing. + Rampant duplication and monolithic function implementation (though we do have + some large, omnibus functions still) would be harder to understand anyway. + The high level functions are last. + Begin grokking near the end of the file if you prefer to read things + top-down.*/ + +/*The read/seek functions track absolute position within the stream.*/ + +/*Read a little more data from the file/pipe into the ogg_sync framer. + _nbytes: The maximum number of bytes to read. + Return: A positive number of bytes read on success, 0 on end-of-file, or a + negative value on failure.*/ +static int op_get_data(OggOpusFile *_of,int _nbytes){ + unsigned char *buffer; + int nbytes; + OP_ASSERT(_nbytes>0); + buffer=(unsigned char *)ogg_sync_buffer(&_of->oy,_nbytes); + nbytes=(int)(*_of->callbacks.read)(_of->source,buffer,_nbytes); + OP_ASSERT(nbytes<=_nbytes); + if(OP_LIKELY(nbytes>0))ogg_sync_wrote(&_of->oy,nbytes); + return nbytes; +} + +/*Save a tiny smidge of verbosity to make the code more readable.*/ +static int op_seek_helper(OggOpusFile *_of,opus_int64 _offset){ + if(_offset==_of->offset)return 0; + if(_of->callbacks.seek==NULL + ||(*_of->callbacks.seek)(_of->source,_offset,SEEK_SET)){ + return OP_EREAD; + } + _of->offset=_offset; + ogg_sync_reset(&_of->oy); + return 0; +} + +/*Get the current position indicator of the underlying source. + This should be the same as the value reported by tell().*/ +static opus_int64 op_position(const OggOpusFile *_of){ + /*The current position indicator is _not_ simply offset. + We may also have unprocessed, buffered data in the sync state.*/ + return _of->offset+_of->oy.fill-_of->oy.returned; +} + +/*From the head of the stream, get the next page. + _boundary specifies if the function is allowed to fetch more data from the + stream (and how much) or only use internally buffered data. + _boundary: -1: Unbounded search. + 0: Read no additional data. + Use only cached data. + n: Search for the start of a new page up to file position n. + Return: n>=0: Found a page at absolute offset n. + OP_FALSE: Hit the _boundary limit. + OP_EREAD: An underlying read operation failed. + OP_BADLINK: We hit end-of-file before reaching _boundary.*/ +static opus_int64 op_get_next_page(OggOpusFile *_of,ogg_page *_og, + opus_int64 _boundary){ + while(_boundary<=0||_of->offset<_boundary){ + int more; + more=ogg_sync_pageseek(&_of->oy,_og); + /*Skipped (-more) bytes.*/ + if(OP_UNLIKELY(more<0))_of->offset-=more; + else if(more==0){ + int read_nbytes; + int ret; + /*Send more paramedics.*/ + if(!_boundary)return OP_FALSE; + if(_boundary<0)read_nbytes=OP_READ_SIZE; + else{ + opus_int64 position; + position=op_position(_of); + if(position>=_boundary)return OP_FALSE; + read_nbytes=(int)OP_MIN(_boundary-position,OP_READ_SIZE); + } + ret=op_get_data(_of,read_nbytes); + if(OP_UNLIKELY(ret<0))return OP_EREAD; + if(OP_UNLIKELY(ret==0)){ + /*Only fail cleanly on EOF if we didn't have a known boundary. + Otherwise, we should have been able to reach that boundary, and this + is a fatal error.*/ + return OP_UNLIKELY(_boundary<0)?OP_FALSE:OP_EBADLINK; + } + } + else{ + /*Got a page. + Return the page start offset and advance the internal offset past the + page end.*/ + opus_int64 page_offset; + page_offset=_of->offset; + _of->offset+=more; + OP_ASSERT(page_offset>=0); + return page_offset; + } + } + return OP_FALSE; +} + +static int op_add_serialno(const ogg_page *_og, + ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){ + ogg_uint32_t *serialnos; + int nserialnos; + int cserialnos; + ogg_uint32_t s; + s=ogg_page_serialno(_og); + serialnos=*_serialnos; + nserialnos=*_nserialnos; + cserialnos=*_cserialnos; + if(OP_UNLIKELY(nserialnos>=cserialnos)){ + if(OP_UNLIKELY(cserialnos>INT_MAX-1>>1))return OP_EFAULT; + cserialnos=2*cserialnos+1; + OP_ASSERT(nserialnos<cserialnos); + serialnos=(ogg_uint32_t *)_ogg_realloc(serialnos, + sizeof(*serialnos)*cserialnos); + if(OP_UNLIKELY(serialnos==NULL))return OP_EFAULT; + } + serialnos[nserialnos++]=s; + *_serialnos=serialnos; + *_nserialnos=nserialnos; + *_cserialnos=cserialnos; + return 0; +} + +/*Returns nonzero if found.*/ +static int op_lookup_serialno(ogg_uint32_t _s, + const ogg_uint32_t *_serialnos,int _nserialnos){ + int i; + for(i=0;i<_nserialnos&&_serialnos[i]!=_s;i++); + return i<_nserialnos; +} + +static int op_lookup_page_serialno(const ogg_page *_og, + const ogg_uint32_t *_serialnos,int _nserialnos){ + return op_lookup_serialno(ogg_page_serialno(_og),_serialnos,_nserialnos); +} + +typedef struct OpusSeekRecord OpusSeekRecord; + +/*We use this to remember the pages we found while enumerating the links of a + chained stream. + We keep track of the starting and ending offsets, as well as the point we + started searching from, so we know where to bisect. + We also keep the serial number, so we can tell if the page belonged to the + current link or not, as well as the granule position, to aid in estimating + the start of the link.*/ +struct OpusSeekRecord{ + /*The earliest byte we know of such that reading forward from it causes + capture to be regained at this page.*/ + opus_int64 search_start; + /*The offset of this page.*/ + opus_int64 offset; + /*The size of this page.*/ + opus_int32 size; + /*The serial number of this page.*/ + ogg_uint32_t serialno; + /*The granule position of this page.*/ + ogg_int64_t gp; +}; + +/*Find the last page beginning before _offset with a valid granule position. + There is no '_boundary' parameter as it will always have to read more data. + This is much dirtier than the above, as Ogg doesn't have any backward search + linkage. + This search prefers pages of the specified serial number. + If a page of the specified serial number is spotted during the + seek-back-and-read-forward, it will return the info of last page of the + matching serial number, instead of the very last page, unless the very last + page belongs to a different link than preferred serial number. + If no page of the specified serial number is seen, it will return the info of + the last page. + [out] _sr: Returns information about the page that was found on success. + _offset: The _offset before which to find a page. + Any page returned will consist of data entirely before _offset. + _serialno: The preferred serial number. + If a page with this serial number is found, it will be returned + even if another page in the same link is found closer to + _offset. + This is purely opportunistic: there is no guarantee such a page + will be found if it exists. + _serialnos: The list of serial numbers in the link that contains the + preferred serial number. + _nserialnos: The number of serial numbers in the current link. + Return: 0 on success, or a negative value on failure. + OP_EREAD: Failed to read more data (error or EOF). + OP_EBADLINK: We couldn't find a page even after seeking back to the + start of the stream.*/ +static int op_get_prev_page_serial(OggOpusFile *_of,OpusSeekRecord *_sr, + opus_int64 _offset,ogg_uint32_t _serialno, + const ogg_uint32_t *_serialnos,int _nserialnos){ + OpusSeekRecord preferred_sr; + ogg_page og; + opus_int64 begin; + opus_int64 end; + opus_int64 original_end; + opus_int32 chunk_size; + int preferred_found; + original_end=end=begin=_offset; + preferred_found=0; + _offset=-1; + chunk_size=OP_CHUNK_SIZE; + do{ + opus_int64 search_start; + int ret; + OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX); + begin=OP_MAX(begin-chunk_size,0); + ret=op_seek_helper(_of,begin); + if(OP_UNLIKELY(ret<0))return ret; + search_start=begin; + while(_of->offset<end){ + opus_int64 llret; + ogg_uint32_t serialno; + llret=op_get_next_page(_of,&og,end); + if(OP_UNLIKELY(llret<OP_FALSE))return (int)llret; + else if(llret==OP_FALSE)break; + serialno=ogg_page_serialno(&og); + /*Save the information for this page. + We're not interested in the page itself... just the serial number, byte + offset, page size, and granule position.*/ + _sr->search_start=search_start; + _sr->offset=_offset=llret; + _sr->serialno=serialno; + OP_ASSERT(_of->offset-_offset>=0); + OP_ASSERT(_of->offset-_offset<=OP_PAGE_SIZE_MAX); + _sr->size=(opus_int32)(_of->offset-_offset); + _sr->gp=ogg_page_granulepos(&og); + /*If this page is from the stream we're looking for, remember it.*/ + if(serialno==_serialno){ + preferred_found=1; + *&preferred_sr=*_sr; + } + if(!op_lookup_serialno(serialno,_serialnos,_nserialnos)){ + /*We fell off the end of the link, which means we seeked back too far + and shouldn't have been looking in that link to begin with. + If we found the preferred serial number, forget that we saw it.*/ + preferred_found=0; + } + search_start=llret+1; + } + /*We started from the beginning of the stream and found nothing. + This should be impossible unless the contents of the source changed out + from under us after we read from it.*/ + if(OP_UNLIKELY(!begin)&&OP_UNLIKELY(_offset<0))return OP_EBADLINK; + /*Bump up the chunk size. + This is mildly helpful when seeks are very expensive (http).*/ + chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX); + /*Avoid quadratic complexity if we hit an invalid patch of the file.*/ + end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end); + } + while(_offset<0); + if(preferred_found)*_sr=*&preferred_sr; + return 0; +} + +/*Find the last page beginning before _offset with the given serial number and + a valid granule position. + Unlike the above search, this continues until it finds such a page, but does + not stray outside the current link. + We could implement it (inefficiently) by calling op_get_prev_page_serial() + repeatedly until it returned a page that had both our preferred serial + number and a valid granule position, but doing it with a separate function + allows us to avoid repeatedly re-scanning valid pages from other streams as + we seek-back-and-read-forward. + [out] _gp: Returns the granule position of the page that was found on + success. + _offset: The _offset before which to find a page. + Any page returned will consist of data entirely before _offset. + _serialno: The target serial number. + _serialnos: The list of serial numbers in the link that contains the + preferred serial number. + _nserialnos: The number of serial numbers in the current link. + Return: The offset of the page on success, or a negative value on failure. + OP_EREAD: Failed to read more data (error or EOF). + OP_EBADLINK: We couldn't find a page even after seeking back past the + beginning of the link.*/ +static opus_int64 op_get_last_page(OggOpusFile *_of,ogg_int64_t *_gp, + opus_int64 _offset,ogg_uint32_t _serialno, + const ogg_uint32_t *_serialnos,int _nserialnos){ + ogg_page og; + ogg_int64_t gp; + opus_int64 begin; + opus_int64 end; + opus_int64 original_end; + opus_int32 chunk_size; + /*The target serial number must belong to the current link.*/ + OP_ASSERT(op_lookup_serialno(_serialno,_serialnos,_nserialnos)); + original_end=end=begin=_offset; + _offset=-1; + /*We shouldn't have to initialize gp, but gcc is too dumb to figure out that + ret>=0 implies we entered the if(page_gp!=-1) block at least once.*/ + gp=-1; + chunk_size=OP_CHUNK_SIZE; + do{ + int left_link; + int ret; + OP_ASSERT(chunk_size>=OP_PAGE_SIZE_MAX); + begin=OP_MAX(begin-chunk_size,0); + ret=op_seek_helper(_of,begin); + if(OP_UNLIKELY(ret<0))return ret; + left_link=0; + while(_of->offset<end){ + opus_int64 llret; + ogg_uint32_t serialno; + llret=op_get_next_page(_of,&og,end); + if(OP_UNLIKELY(llret<OP_FALSE))return llret; + else if(llret==OP_FALSE)break; + serialno=ogg_page_serialno(&og); + if(serialno==_serialno){ + ogg_int64_t page_gp; + /*The page is from the right stream...*/ + page_gp=ogg_page_granulepos(&og); + if(page_gp!=-1){ + /*And has a valid granule position. + Let's remember it.*/ + _offset=llret; + gp=page_gp; + } + } + else if(OP_UNLIKELY(!op_lookup_serialno(serialno, + _serialnos,_nserialnos))){ + /*We fell off the start of the link, which means we don't need to keep + seeking any farther back.*/ + left_link=1; + } + } + /*We started from at or before the beginning of the link and found nothing. + This should be impossible unless the contents of the source changed out + from under us after we read from it.*/ + if((OP_UNLIKELY(left_link)||OP_UNLIKELY(!begin))&&OP_UNLIKELY(_offset<0)){ + return OP_EBADLINK; + } + /*Bump up the chunk size. + This is mildly helpful when seeks are very expensive (http).*/ + chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX); + /*Avoid quadratic complexity if we hit an invalid patch of the file.*/ + end=OP_MIN(begin+OP_PAGE_SIZE_MAX-1,original_end); + } + while(_offset<0); + *_gp=gp; + return _offset; +} + +/*Uses the local ogg_stream storage in _of. + This is important for non-streaming input sources.*/ +static int op_fetch_headers_impl(OggOpusFile *_of,OpusHead *_head, + OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos, + int *_cserialnos,ogg_page *_og){ + ogg_packet op; + int ret; + if(_serialnos!=NULL)*_nserialnos=0; + /*Extract the serialnos of all BOS pages plus the first set of Opus headers + we see in the link.*/ + while(ogg_page_bos(_og)){ + if(_serialnos!=NULL){ + if(OP_UNLIKELY(op_lookup_page_serialno(_og,*_serialnos,*_nserialnos))){ + /*A dupe serialnumber in an initial header packet set==invalid stream.*/ + return OP_EBADHEADER; + } + ret=op_add_serialno(_og,_serialnos,_nserialnos,_cserialnos); + if(OP_UNLIKELY(ret<0))return ret; + } + if(_of->ready_state<OP_STREAMSET){ + /*We don't have an Opus stream in this link yet, so begin prospective + stream setup. + We need a stream to get packets.*/ + ogg_stream_reset_serialno(&_of->os,ogg_page_serialno(_og)); + ogg_stream_pagein(&_of->os,_og); + if(OP_LIKELY(ogg_stream_packetout(&_of->os,&op)>0)){ + ret=opus_head_parse(_head,op.packet,op.bytes); + /*Found a valid Opus header. + Continue setup.*/ + if(OP_LIKELY(ret>=0))_of->ready_state=OP_STREAMSET; + /*If it's just a stream type we don't recognize, ignore it. + Everything else is fatal.*/ + else if(ret!=OP_ENOTFORMAT)return ret; + } + } + /*Get the next page. + No need to clamp the boundary offset against _of->end, as all errors + become OP_ENOTFORMAT or OP_EBADHEADER.*/ + if(OP_UNLIKELY(op_get_next_page(_of,_og, + OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){ + return _of->ready_state<OP_STREAMSET?OP_ENOTFORMAT:OP_EBADHEADER; + } + } + if(OP_UNLIKELY(_of->ready_state!=OP_STREAMSET))return OP_ENOTFORMAT; + /*If the first non-header page belonged to our Opus stream, submit it.*/ + if(_of->os.serialno==ogg_page_serialno(_og))ogg_stream_pagein(&_of->os,_og); + /*Loop getting packets.*/ + for(;;){ + switch(ogg_stream_packetout(&_of->os,&op)){ + case 0:{ + /*Loop getting pages.*/ + for(;;){ + /*No need to clamp the boundary offset against _of->end, as all + errors become OP_EBADHEADER.*/ + if(OP_UNLIKELY(op_get_next_page(_of,_og, + OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){ + return OP_EBADHEADER; + } + /*If this page belongs to the correct stream, go parse it.*/ + if(_of->os.serialno==ogg_page_serialno(_og)){ + ogg_stream_pagein(&_of->os,_og); + break; + } + /*If the link ends before we see the Opus comment header, abort.*/ + if(OP_UNLIKELY(ogg_page_bos(_og)))return OP_EBADHEADER; + /*Otherwise, keep looking.*/ + } + }break; + /*We shouldn't get a hole in the headers!*/ + case -1:return OP_EBADHEADER; + default:{ + /*Got a packet. + It should be the comment header.*/ + ret=opus_tags_parse(_tags,op.packet,op.bytes); + if(OP_UNLIKELY(ret<0))return ret; + /*Make sure the page terminated at the end of the comment header. + If there is another packet on the page, or part of a packet, then + reject the stream. + Otherwise seekable sources won't be able to seek back to the start + properly.*/ + ret=ogg_stream_packetout(&_of->os,&op); + if(OP_UNLIKELY(ret!=0) + ||OP_UNLIKELY(_og->header[_og->header_len-1]==255)){ + /*If we fail, the caller assumes our tags are uninitialized.*/ + opus_tags_clear(_tags); + return OP_EBADHEADER; + } + return 0; + } + } + } +} + +static int op_fetch_headers(OggOpusFile *_of,OpusHead *_head, + OpusTags *_tags,ogg_uint32_t **_serialnos,int *_nserialnos, + int *_cserialnos,ogg_page *_og){ + ogg_page og; + int ret; + if(!_og){ + /*No need to clamp the boundary offset against _of->end, as all errors + become OP_ENOTFORMAT.*/ + if(OP_UNLIKELY(op_get_next_page(_of,&og, + OP_ADV_OFFSET(_of->offset,OP_CHUNK_SIZE))<0)){ + return OP_ENOTFORMAT; + } + _og=&og; + } + _of->ready_state=OP_OPENED; + ret=op_fetch_headers_impl(_of,_head,_tags,_serialnos,_nserialnos, + _cserialnos,_og); + /*Revert back from OP_STREAMSET to OP_OPENED on failure, to prevent + double-free of the tags in an unseekable stream.*/ + if(OP_UNLIKELY(ret<0))_of->ready_state=OP_OPENED; + return ret; +} + +/*Granule position manipulation routines. + A granule position is defined to be an unsigned 64-bit integer, with the + special value -1 in two's complement indicating an unset or invalid granule + position. + We are not guaranteed to have an unsigned 64-bit type, so we construct the + following routines that + a) Properly order negative numbers as larger than positive numbers, and + b) Check for underflow or overflow past the special -1 value. + This lets us operate on the full, valid range of granule positions in a + consistent and safe manner. + This full range is organized into distinct regions: + [ -1 (invalid) ][ 0 ... OP_INT64_MAX ][ OP_INT64_MIN ... -2 ][-1 (invalid) ] + + No one should actually use granule positions so large that they're negative, + even if they are technically valid, as very little software handles them + correctly (including most of Xiph.Org's). + This library also refuses to support durations so large they won't fit in a + signed 64-bit integer (to avoid exposing this mess to the application, and + to simplify a good deal of internal arithmetic), so the only way to use them + successfully is if pcm_start is very large. + This means there isn't anything you can do with negative granule positions + that you couldn't have done with purely non-negative ones. + The main purpose of these routines is to allow us to think very explicitly + about the possible failure cases of all granule position manipulations.*/ + +/*Safely adds a small signed integer to a valid (not -1) granule position. + The result can use the full 64-bit range of values (both positive and + negative), but will fail on overflow (wrapping past -1; wrapping past + OP_INT64_MAX is explicitly okay). + [out] _dst_gp: The resulting granule position. + Only modified on success. + _src_gp: The granule position to add to. + This must not be -1. + _delta: The amount to add. + This is allowed to be up to 32 bits to support the maximum + duration of a single Ogg page (255 packets * 120 ms per + packet == 1,468,800 samples at 48 kHz). + Return: 0 on success, or OP_EINVAL if the result would wrap around past -1.*/ +static int op_granpos_add(ogg_int64_t *_dst_gp,ogg_int64_t _src_gp, + opus_int32 _delta){ + /*The code below handles this case correctly, but there's no reason we + should ever be called with these values, so make sure we aren't.*/ + OP_ASSERT(_src_gp!=-1); + if(_delta>0){ + /*Adding this amount to the granule position would overflow its 64-bit + range.*/ + if(OP_UNLIKELY(_src_gp<0)&&OP_UNLIKELY(_src_gp>=-1-_delta))return OP_EINVAL; + if(OP_UNLIKELY(_src_gp>OP_INT64_MAX-_delta)){ + /*Adding this amount to the granule position would overflow the positive + half of its 64-bit range. + Since signed overflow is undefined in C, do it in a way the compiler + isn't allowed to screw up.*/ + _delta-=(opus_int32)(OP_INT64_MAX-_src_gp)+1; + _src_gp=OP_INT64_MIN; + } + } + else if(_delta<0){ + /*Subtracting this amount from the granule position would underflow its + 64-bit range.*/ + if(_src_gp>=0&&OP_UNLIKELY(_src_gp<-_delta))return OP_EINVAL; + if(OP_UNLIKELY(_src_gp<OP_INT64_MIN-_delta)){ + /*Subtracting this amount from the granule position would underflow the + negative half of its 64-bit range. + Since signed underflow is undefined in C, do it in a way the compiler + isn't allowed to screw up.*/ + _delta+=(opus_int32)(_src_gp-OP_INT64_MIN)+1; + _src_gp=OP_INT64_MAX; + } + } + *_dst_gp=_src_gp+_delta; + return 0; +} + +/*Safely computes the difference between two granule positions. + The difference must fit in a signed 64-bit integer, or the function fails. + It correctly handles the case where the granule position has wrapped around + from positive values to negative ones. + [out] _delta: The difference between the granule positions. + Only modified on success. + _gp_a: The granule position to subtract from. + This must not be -1. + _gp_b: The granule position to subtract. + This must not be -1. + Return: 0 on success, or OP_EINVAL if the result would not fit in a signed + 64-bit integer.*/ +static int op_granpos_diff(ogg_int64_t *_delta, + ogg_int64_t _gp_a,ogg_int64_t _gp_b){ + int gp_a_negative; + int gp_b_negative; + /*The code below handles these cases correctly, but there's no reason we + should ever be called with these values, so make sure we aren't.*/ + OP_ASSERT(_gp_a!=-1); + OP_ASSERT(_gp_b!=-1); + gp_a_negative=OP_UNLIKELY(_gp_a<0); + gp_b_negative=OP_UNLIKELY(_gp_b<0); + if(OP_UNLIKELY(gp_a_negative^gp_b_negative)){ + ogg_int64_t da; + ogg_int64_t db; + if(gp_a_negative){ + /*_gp_a has wrapped to a negative value but _gp_b hasn't: the difference + should be positive.*/ + /*Step 1: Handle wrapping.*/ + /*_gp_a < 0 => da < 0.*/ + da=(OP_INT64_MIN-_gp_a)-1; + /*_gp_b >= 0 => db >= 0.*/ + db=OP_INT64_MAX-_gp_b; + /*Step 2: Check for overflow.*/ + if(OP_UNLIKELY(OP_INT64_MAX+da<db))return OP_EINVAL; + *_delta=db-da; + } + else{ + /*_gp_b has wrapped to a negative value but _gp_a hasn't: the difference + should be negative.*/ + /*Step 1: Handle wrapping.*/ + /*_gp_a >= 0 => da <= 0*/ + da=_gp_a+OP_INT64_MIN; + /*_gp_b < 0 => db <= 0*/ + db=OP_INT64_MIN-_gp_b; + /*Step 2: Check for overflow.*/ + if(OP_UNLIKELY(da<OP_INT64_MIN-db))return OP_EINVAL; + *_delta=da+db; + } + } + else *_delta=_gp_a-_gp_b; + return 0; +} + +static int op_granpos_cmp(ogg_int64_t _gp_a,ogg_int64_t _gp_b){ + /*The invalid granule position -1 should behave like NaN: neither greater + than nor less than any other granule position, nor equal to any other + granule position, including itself. + However, that means there isn't anything we could sensibly return from this + function for it.*/ + OP_ASSERT(_gp_a!=-1); + OP_ASSERT(_gp_b!=-1); + /*Handle the wrapping cases.*/ + if(OP_UNLIKELY(_gp_a<0)){ + if(_gp_b>=0)return 1; + /*Else fall through.*/ + } + else if(OP_UNLIKELY(_gp_b<0))return -1; + /*No wrapping case.*/ + return (_gp_a>_gp_b)-(_gp_b>_gp_a); +} + +/*Returns the duration of the packet (in samples at 48 kHz), or a negative + value on error.*/ +static int op_get_packet_duration(const unsigned char *_data,int _len){ + int nframes; + int frame_size; + int nsamples; + nframes=opus_packet_get_nb_frames(_data,_len); + if(OP_UNLIKELY(nframes<0))return OP_EBADPACKET; + frame_size=opus_packet_get_samples_per_frame(_data,48000); + nsamples=nframes*frame_size; + if(OP_UNLIKELY(nsamples>120*48))return OP_EBADPACKET; + return nsamples; +} + +/*This function more properly belongs in info.c, but we define it here to allow + the static granule position manipulation functions to remain static.*/ +ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp){ + opus_int32 pre_skip; + pre_skip=_head->pre_skip; + if(_gp!=-1&&op_granpos_add(&_gp,_gp,-pre_skip))_gp=-1; + return _gp; +} + +/*Grab all the packets currently in the stream state, and compute their + durations. + _of->op_count is set to the number of packets collected. + [out] _durations: Returns the durations of the individual packets. + Return: The total duration of all packets, or OP_HOLE if there was a hole.*/ +static opus_int32 op_collect_audio_packets(OggOpusFile *_of, + int _durations[255]){ + opus_int32 total_duration; + int op_count; + /*Count the durations of all packets in the page.*/ + op_count=0; + total_duration=0; + for(;;){ + int ret; + /*This takes advantage of undocumented libogg behavior that returned + ogg_packet buffers are valid at least until the next page is + submitted. + Relying on this is not too terrible, as _none_ of the Ogg memory + ownership/lifetime rules are well-documented. + But I can read its code and know this will work.*/ + ret=ogg_stream_packetout(&_of->os,_of->op+op_count); + if(!ret)break; + if(OP_UNLIKELY(ret<0)){ + /*We shouldn't get holes in the middle of pages.*/ + OP_ASSERT(op_count==0); + /*Set the return value and break out of the loop. + We want to make sure op_count gets set to 0, because we've ingested a + page, so any previously loaded packets are now invalid.*/ + total_duration=OP_HOLE; + break; + } + /*Unless libogg is broken, we can't get more than 255 packets from a + single page.*/ + OP_ASSERT(op_count<255); + _durations[op_count]=op_get_packet_duration(_of->op[op_count].packet, + _of->op[op_count].bytes); + if(OP_LIKELY(_durations[op_count]>0)){ + /*With at most 255 packets on a page, this can't overflow.*/ + total_duration+=_durations[op_count++]; + } + /*Ignore packets with an invalid TOC sequence.*/ + else if(op_count>0){ + /*But save the granule position, if there was one.*/ + _of->op[op_count-1].granulepos=_of->op[op_count].granulepos; + } + } + _of->op_pos=0; + _of->op_count=op_count; + return total_duration; +} + +/*Starting from current cursor position, get the initial PCM offset of the next + page. + This also validates the granule position on the first page with a completed + audio data packet, as required by the spec. + If this link is completely empty (no pages with completed packets), then this + function sets pcm_start=pcm_end=0 and returns the BOS page of the next link + (if any). + In the seekable case, we initialize pcm_end=-1 before calling this function, + so that later we can detect that the link was empty before calling + op_find_final_pcm_offset(). + [inout] _link: The link for which to find pcm_start. + [out] _og: Returns the BOS page of the next link if this link was empty. + In the unseekable case, we can then feed this to + op_fetch_headers() to start the next link. + The caller may pass NULL (e.g., for seekable streams), in + which case this page will be discarded. + Return: 0 on success, 1 if there is a buffered BOS page available, or a + negative value on unrecoverable error.*/ +static int op_find_initial_pcm_offset(OggOpusFile *_of, + OggOpusLink *_link,ogg_page *_og){ + ogg_page og; + ogg_int64_t pcm_start; + ogg_int64_t prev_packet_gp; + ogg_int64_t cur_page_gp; + ogg_uint32_t serialno; + opus_int32 total_duration; + int durations[255]; + int cur_page_eos; + int op_count; + int pi; + if(_og==NULL)_og=&og; + serialno=_of->os.serialno; + op_count=0; + /*We shouldn't have to initialize total_duration, but gcc is too dumb to + figure out that op_count>0 implies we've been through the whole loop at + least once.*/ + total_duration=0; + do{ + opus_int64 llret; + llret=op_get_next_page(_of,_og,_of->end); + /*We should get a page unless the file is truncated or mangled. + Otherwise there are no audio data packets in the whole logical stream.*/ + if(OP_UNLIKELY(llret<0)){ + /*Fail if there was a read error.*/ + if(llret<OP_FALSE)return (int)llret; + /*Fail if the pre-skip is non-zero, since it's asking us to skip more + samples than exist.*/ + if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP; + /*Set pcm_end and end_offset so we can skip the call to + op_find_final_pcm_offset().*/ + _link->pcm_start=_link->pcm_end=0; + _link->end_offset=_link->data_offset; + return 0; + } + /*Similarly, if we hit the next link in the chain, we've gone too far.*/ + if(OP_UNLIKELY(ogg_page_bos(_og))){ + if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP; + /*Set pcm_end and end_offset so we can skip the call to + op_find_final_pcm_offset().*/ + _link->pcm_end=_link->pcm_start=0; + _link->end_offset=_link->data_offset; + /*Tell the caller we've got a buffered page for them.*/ + return 1; + } + /*Ignore pages from other streams (not strictly necessary, because of the + checks in ogg_stream_pagein(), but saves some work).*/ + if(serialno!=(ogg_uint32_t)ogg_page_serialno(_og))continue; + ogg_stream_pagein(&_of->os,_og); + /*Bitrate tracking: add the header's bytes here. + The body bytes are counted when we consume the packets.*/ + _of->bytes_tracked+=_og->header_len; + /*Count the durations of all packets in the page.*/ + do total_duration=op_collect_audio_packets(_of,durations); + /*Ignore holes.*/ + while(OP_UNLIKELY(total_duration<0)); + op_count=_of->op_count; + } + while(op_count<=0); + /*We found the first page with a completed audio data packet: actually look + at the granule position. + RFC 3533 says, "A special value of -1 (in two's complement) indicates that + no packets finish on this page," which does not say that a granule + position that is NOT -1 indicates that some packets DO finish on that page + (even though this was the intention, libogg itself violated this intention + for years before we fixed it). + The Ogg Opus specification only imposes its start-time requirements + on the granule position of the first page with completed packets, + so we ignore any set granule positions until then.*/ + cur_page_gp=_of->op[op_count-1].granulepos; + /*But getting a packet without a valid granule position on the page is not + okay.*/ + if(cur_page_gp==-1)return OP_EBADTIMESTAMP; + cur_page_eos=_of->op[op_count-1].e_o_s; + if(OP_LIKELY(!cur_page_eos)){ + /*The EOS flag wasn't set. + Work backwards from the provided granule position to get the starting PCM + offset.*/ + if(OP_UNLIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){ + /*The starting granule position MUST not be smaller than the amount of + audio on the first page with completed packets.*/ + return OP_EBADTIMESTAMP; + } + } + else{ + /*The first page with completed packets was also the last.*/ + if(OP_LIKELY(op_granpos_add(&pcm_start,cur_page_gp,-total_duration)<0)){ + /*If there's less audio on the page than indicated by the granule + position, then we're doing end-trimming, and the starting PCM offset + is zero by spec mandate.*/ + pcm_start=0; + /*However, the end-trimming MUST not ask us to trim more samples than + exist after applying the pre-skip.*/ + if(OP_UNLIKELY(op_granpos_cmp(cur_page_gp,_link->head.pre_skip)<0)){ + return OP_EBADTIMESTAMP; + } + } + } + /*Timestamp the individual packets.*/ + prev_packet_gp=pcm_start; + for(pi=0;pi<op_count;pi++){ + if(cur_page_eos){ + ogg_int64_t diff; + OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp)); + diff=durations[pi]-diff; + /*If we have samples to trim...*/ + if(diff>0){ + /*If we trimmed the entire packet, stop (the spec says encoders + shouldn't do this, but we support it anyway).*/ + if(OP_UNLIKELY(diff>durations[pi]))break; + _of->op[pi].granulepos=prev_packet_gp=cur_page_gp; + /*Move the EOS flag to this packet, if necessary, so we'll trim the + samples.*/ + _of->op[pi].e_o_s=1; + continue; + } + } + /*Update the granule position as normal.*/ + OP_ALWAYS_TRUE(!op_granpos_add(&_of->op[pi].granulepos, + prev_packet_gp,durations[pi])); + prev_packet_gp=_of->op[pi].granulepos; + } + /*Update the packet count after end-trimming.*/ + _of->op_count=pi; + _of->cur_discard_count=_link->head.pre_skip; + _of->prev_packet_gp=_link->pcm_start=pcm_start; + return 0; +} + +/*Starting from current cursor position, get the final PCM offset of the + previous page. + This also validates the duration of the link, which, while not strictly + required by the spec, we need to ensure duration calculations don't + overflow. + This is only done for seekable sources. + We must validate that op_find_initial_pcm_offset() succeeded for this link + before calling this function, otherwise it will scan the entire stream + backwards until it reaches the start, and then fail.*/ +static int op_find_final_pcm_offset(OggOpusFile *_of, + const ogg_uint32_t *_serialnos,int _nserialnos,OggOpusLink *_link, + opus_int64 _offset,ogg_uint32_t _end_serialno,ogg_int64_t _end_gp, + ogg_int64_t *_total_duration){ + ogg_int64_t total_duration; + ogg_int64_t duration; + ogg_uint32_t cur_serialno; + /*For the time being, fetch end PCM offset the simple way.*/ + cur_serialno=_link->serialno; + if(_end_serialno!=cur_serialno||_end_gp==-1){ + _offset=op_get_last_page(_of,&_end_gp,_offset, + cur_serialno,_serialnos,_nserialnos); + if(OP_UNLIKELY(_offset<0))return (int)_offset; + } + /*At worst we should have found the first page with completed packets.*/ + if(OP_UNLIKELY(_offset<_link->data_offset))return OP_EBADLINK; + /*This implementation requires that the difference between the first and last + granule positions in each link be representable in a signed, 64-bit + number, and that each link also have at least as many samples as the + pre-skip requires.*/ + if(OP_UNLIKELY(op_granpos_diff(&duration,_end_gp,_link->pcm_start)<0) + ||OP_UNLIKELY(duration<_link->head.pre_skip)){ + return OP_EBADTIMESTAMP; + } + /*We also require that the total duration be representable in a signed, + 64-bit number.*/ + duration-=_link->head.pre_skip; + total_duration=*_total_duration; + if(OP_UNLIKELY(OP_INT64_MAX-duration<total_duration))return OP_EBADTIMESTAMP; + *_total_duration=total_duration+duration; + _link->pcm_end=_end_gp; + _link->end_offset=_offset; + return 0; +} + +/*Rescale the number _x from the range [0,_from] to [0,_to]. + _from and _to must be positive.*/ +static opus_int64 op_rescale64(opus_int64 _x,opus_int64 _from,opus_int64 _to){ + opus_int64 frac; + opus_int64 ret; + int i; + if(_x>=_from)return _to; + if(_x<=0)return 0; + frac=0; + for(i=0;i<63;i++){ + frac<<=1; + OP_ASSERT(_x<=_from); + if(_x>=_from>>1){ + _x-=_from-_x; + frac|=1; + } + else _x<<=1; + } + ret=0; + for(i=0;i<63;i++){ + if(frac&1)ret=(ret&_to&1)+(ret>>1)+(_to>>1); + else ret>>=1; + frac>>=1; + } + return ret; +} + +/*The minimum granule position spacing allowed for making predictions. + This corresponds to about 1 second of audio at 48 kHz for both Opus and + Vorbis, or one keyframe interval in Theora with the default keyframe spacing + of 256.*/ +#define OP_GP_SPACING_MIN (48000) + +/*Try to estimate the location of the next link using the current seek + records, assuming the initial granule position of any streams we've found is + 0.*/ +static opus_int64 op_predict_link_start(const OpusSeekRecord *_sr,int _nsr, + opus_int64 _searched,opus_int64 _end_searched,opus_int32 _bias){ + opus_int64 bisect; + int sri; + int srj; + /*Require that we be at least OP_CHUNK_SIZE from the end. + We don't require that we be at least OP_CHUNK_SIZE from the beginning, + because if we are we'll just scan forward without seeking.*/ + _end_searched-=OP_CHUNK_SIZE; + if(_searched>=_end_searched)return -1; + bisect=_end_searched; + for(sri=0;sri<_nsr;sri++){ + ogg_int64_t gp1; + ogg_int64_t gp2_min; + ogg_uint32_t serialno1; + opus_int64 offset1; + /*If the granule position is negative, either it's invalid or we'd cause + overflow.*/ + gp1=_sr[sri].gp; + if(gp1<0)continue; + /*We require some minimum distance between granule positions to make an + estimate. + We don't actually know what granule position scheme is being used, + because we have no idea what kind of stream these came from. + Therefore we require a minimum spacing between them, with the + expectation that while bitrates and granule position increments might + vary locally in quite complex ways, they are globally smooth.*/ + if(OP_UNLIKELY(op_granpos_add(&gp2_min,gp1,OP_GP_SPACING_MIN)<0)){ + /*No granule position would satisfy us.*/ + continue; + } + offset1=_sr[sri].offset; + serialno1=_sr[sri].serialno; + for(srj=sri;srj-->0;){ + ogg_int64_t gp2; + opus_int64 offset2; + opus_int64 num; + ogg_int64_t den; + ogg_int64_t ipart; + gp2=_sr[srj].gp; + if(gp2<gp2_min)continue; + /*Oh, and also make sure these came from the same stream.*/ + if(_sr[srj].serialno!=serialno1)continue; + offset2=_sr[srj].offset; + /*For once, we can subtract with impunity.*/ + den=gp2-gp1; + ipart=gp2/den; + num=offset2-offset1; + OP_ASSERT(num>0); + if(ipart>0&&(offset2-_searched)/ipart<num)continue; + offset2-=ipart*num; + gp2-=ipart*den; + offset2-=op_rescale64(gp2,den,num)-_bias; + if(offset2<_searched)continue; + bisect=OP_MIN(bisect,offset2); + break; + } + } + return bisect>=_end_searched?-1:bisect; +} + +/*Finds each bitstream link, one at a time, using a bisection search. + This has to begin by knowing the offset of the first link's initial page.*/ +static int op_bisect_forward_serialno(OggOpusFile *_of, + opus_int64 _searched,OpusSeekRecord *_sr,int _csr, + ogg_uint32_t **_serialnos,int *_nserialnos,int *_cserialnos){ + ogg_page og; + OggOpusLink *links; + int nlinks; + int clinks; + ogg_uint32_t *serialnos; + int nserialnos; + ogg_int64_t total_duration; + int nsr; + int ret; + links=_of->links; + nlinks=clinks=_of->nlinks; + total_duration=0; + /*We start with one seek record, for the last page in the file. + We build up a list of records for places we seek to during link + enumeration. + This list is kept sorted in reverse order. + We only care about seek locations that were _not_ in the current link, + therefore we can add them one at a time to the end of the list as we + improve the lower bound on the location where the next link starts.*/ + nsr=1; + for(;;){ + opus_int64 end_searched; + opus_int64 bisect; + opus_int64 next; + opus_int64 last; + ogg_int64_t end_offset; + ogg_int64_t end_gp; + int sri; + serialnos=*_serialnos; + nserialnos=*_nserialnos; + if(OP_UNLIKELY(nlinks>=clinks)){ + if(OP_UNLIKELY(clinks>INT_MAX-1>>1))return OP_EFAULT; + clinks=2*clinks+1; + OP_ASSERT(nlinks<clinks); + links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*clinks); + if(OP_UNLIKELY(links==NULL))return OP_EFAULT; + _of->links=links; + } + /*Invariants: + We have the headers and serial numbers for the link beginning at 'begin'. + We have the offset and granule position of the last page in the file + (potentially not a page we care about).*/ + /*Scan the seek records we already have to save us some bisection.*/ + for(sri=0;sri<nsr;sri++){ + if(op_lookup_serialno(_sr[sri].serialno,serialnos,nserialnos))break; + } + /*Is the last page in our current list of serial numbers?*/ + if(sri<=0)break; + /*Last page wasn't found. + We have at least one more link.*/ + last=-1; + end_searched=_sr[sri-1].search_start; + next=_sr[sri-1].offset; + end_gp=-1; + if(sri<nsr){ + _searched=_sr[sri].offset+_sr[sri].size; + if(_sr[sri].serialno==links[nlinks-1].serialno){ + end_gp=_sr[sri].gp; + end_offset=_sr[sri].offset; + } + } + nsr=sri; + bisect=-1; + /*If we've already found the end of at least one link, try to pick the + first bisection point at twice the average link size. + This is a good choice for files with lots of links that are all about the + same size.*/ + if(nlinks>1){ + opus_int64 last_offset; + opus_int64 avg_link_size; + opus_int64 upper_limit; + last_offset=links[nlinks-1].offset; + avg_link_size=last_offset/(nlinks-1); + upper_limit=end_searched-OP_CHUNK_SIZE-avg_link_size; + if(OP_LIKELY(last_offset>_searched-avg_link_size) + &&OP_LIKELY(last_offset<upper_limit)){ + bisect=last_offset+avg_link_size; + if(OP_LIKELY(bisect<upper_limit))bisect+=avg_link_size; + } + } + /*We guard against garbage separating the last and first pages of two + links below.*/ + while(_searched<end_searched){ + opus_int32 next_bias; + /*If we don't have a better estimate, use simple bisection.*/ + if(bisect==-1)bisect=_searched+(end_searched-_searched>>1); + /*If we're within OP_CHUNK_SIZE of the start, scan forward.*/ + if(bisect-_searched<OP_CHUNK_SIZE)bisect=_searched; + /*Otherwise we're skipping data. + Forget the end page, if we saw one, as we might miss a later one.*/ + else end_gp=-1; + ret=op_seek_helper(_of,bisect); + if(OP_UNLIKELY(ret<0))return ret; + last=op_get_next_page(_of,&og,_sr[nsr-1].offset); + if(OP_UNLIKELY(last<OP_FALSE))return (int)last; + next_bias=0; + if(last==OP_FALSE)end_searched=bisect; + else{ + ogg_uint32_t serialno; + ogg_int64_t gp; + serialno=ogg_page_serialno(&og); + gp=ogg_page_granulepos(&og); + if(!op_lookup_serialno(serialno,serialnos,nserialnos)){ + end_searched=bisect; + next=last; + /*In reality we should always have enough room, but be paranoid.*/ + if(OP_LIKELY(nsr<_csr)){ + _sr[nsr].search_start=bisect; + _sr[nsr].offset=last; + OP_ASSERT(_of->offset-last>=0); + OP_ASSERT(_of->offset-last<=OP_PAGE_SIZE_MAX); + _sr[nsr].size=(opus_int32)(_of->offset-last); + _sr[nsr].serialno=serialno; + _sr[nsr].gp=gp; + nsr++; + } + } + else{ + _searched=_of->offset; + next_bias=OP_CHUNK_SIZE; + if(serialno==links[nlinks-1].serialno){ + /*This page was from the stream we want, remember it. + If it's the last such page in the link, we won't have to go back + looking for it later.*/ + end_gp=gp; + end_offset=last; + } + } + } + bisect=op_predict_link_start(_sr,nsr,_searched,end_searched,next_bias); + } + /*Bisection point found. + Get the final granule position of the previous link, assuming + op_find_initial_pcm_offset() didn't already determine the link was + empty.*/ + if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){ + if(end_gp==-1){ + /*If we don't know where the end page is, we'll have to seek back and + look for it, starting from the end of the link.*/ + end_offset=next; + /*Also forget the last page we read. + It won't be available after the seek.*/ + last=-1; + } + ret=op_find_final_pcm_offset(_of,serialnos,nserialnos, + links+nlinks-1,end_offset,links[nlinks-1].serialno,end_gp, + &total_duration); + if(OP_UNLIKELY(ret<0))return ret; + } + if(last!=next){ + /*The last page we read was not the first page the next link. + Move the cursor position to the offset of that first page. + This only performs an actual seek if the first page of the next link + does not start at the end of the last page from the current Opus + stream with a valid granule position.*/ + ret=op_seek_helper(_of,next); + if(OP_UNLIKELY(ret<0))return ret; + } + ret=op_fetch_headers(_of,&links[nlinks].head,&links[nlinks].tags, + _serialnos,_nserialnos,_cserialnos,last!=next?NULL:&og); + if(OP_UNLIKELY(ret<0))return ret; + links[nlinks].offset=next; + links[nlinks].data_offset=_of->offset; + links[nlinks].serialno=_of->os.serialno; + links[nlinks].pcm_end=-1; + /*This might consume a page from the next link, however the next bisection + always starts with a seek.*/ + ret=op_find_initial_pcm_offset(_of,links+nlinks,NULL); + if(OP_UNLIKELY(ret<0))return ret; + _searched=_of->offset; + /*Mark the current link count so it can be cleaned up on error.*/ + _of->nlinks=++nlinks; + } + /*Last page is in the starting serialno list, so we've reached the last link. + Now find the last granule position for it (if we didn't the first time we + looked at the end of the stream, and if op_find_initial_pcm_offset() + didn't already determine the link was empty).*/ + if(OP_LIKELY(links[nlinks-1].pcm_end==-1)){ + ret=op_find_final_pcm_offset(_of,serialnos,nserialnos, + links+nlinks-1,_sr[0].offset,_sr[0].serialno,_sr[0].gp,&total_duration); + if(OP_UNLIKELY(ret<0))return ret; + } + /*Trim back the links array if necessary.*/ + links=(OggOpusLink *)_ogg_realloc(links,sizeof(*links)*nlinks); + if(OP_LIKELY(links!=NULL))_of->links=links; + /*We also don't need these anymore.*/ + _ogg_free(*_serialnos); + *_serialnos=NULL; + *_cserialnos=*_nserialnos=0; + return 0; +} + +static void op_update_gain(OggOpusFile *_of){ + OpusHead *head; + opus_int32 gain_q8; + int li; + /*If decode isn't ready, then we'll apply the gain when we initialize the + decoder.*/ + if(_of->ready_state<OP_INITSET)return; + gain_q8=_of->gain_offset_q8; + li=_of->seekable?_of->cur_link:0; + head=&_of->links[li].head; + /*We don't have to worry about overflow here because the header gain and + track gain must lie in the range [-32768,32767], and the user-supplied + offset has been pre-clamped to [-98302,98303].*/ + switch(_of->gain_type){ + case OP_TRACK_GAIN:{ + int track_gain_q8; + track_gain_q8=0; + opus_tags_get_track_gain(&_of->links[li].tags,&track_gain_q8); + gain_q8+=track_gain_q8; + } + /*Fall through.*/ + case OP_HEADER_GAIN:gain_q8+=head->output_gain;break; + case OP_ABSOLUTE_GAIN:break; + default:OP_ASSERT(0); + } + gain_q8=OP_CLAMP(-32768,gain_q8,32767); + OP_ASSERT(_of->od!=NULL); +#if defined(OPUS_SET_GAIN) + opus_multistream_decoder_ctl(_of->od,OPUS_SET_GAIN(gain_q8)); +#else +/*A fallback that works with both float and fixed-point is a bunch of work, + so just force people to use a sufficiently new version. + This is deployed well enough at this point that this shouldn't be a burden.*/ +# error "libopus 1.0.1 or later required" +#endif +} + +static int op_make_decode_ready(OggOpusFile *_of){ + const OpusHead *head; + int li; + int stream_count; + int coupled_count; + int channel_count; + if(_of->ready_state>OP_STREAMSET)return 0; + if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET))return OP_EFAULT; + li=_of->seekable?_of->cur_link:0; + head=&_of->links[li].head; + stream_count=head->stream_count; + coupled_count=head->coupled_count; + channel_count=head->channel_count; + /*Check to see if the current decoder is compatible with the current link.*/ + if(_of->od!=NULL&&_of->od_stream_count==stream_count + &&_of->od_coupled_count==coupled_count&&_of->od_channel_count==channel_count + &&memcmp(_of->od_mapping,head->mapping, + sizeof(*head->mapping)*channel_count)==0){ + opus_multistream_decoder_ctl(_of->od,OPUS_RESET_STATE); + } + else{ + int err; + opus_multistream_decoder_destroy(_of->od); + _of->od=opus_multistream_decoder_create(48000,channel_count, + stream_count,coupled_count,head->mapping,&err); + if(_of->od==NULL)return OP_EFAULT; + _of->od_stream_count=stream_count; + _of->od_coupled_count=coupled_count; + _of->od_channel_count=channel_count; + memcpy(_of->od_mapping,head->mapping,sizeof(*head->mapping)*channel_count); + } + _of->ready_state=OP_INITSET; + _of->bytes_tracked=0; + _of->samples_tracked=0; +#if !defined(OPUS_FIXED_POINT) + _of->state_channel_count=0; + /*Use the serial number for the PRNG seed to get repeatable output for + straight play-throughs.*/ + _of->dither_seed=_of->links[li].serialno; +#endif + op_update_gain(_of); + return 0; +} + +static int op_open_seekable2_impl(OggOpusFile *_of){ + /*64 seek records should be enough for anybody. + Actually, with a bisection search in a 63-bit range down to OP_CHUNK_SIZE + granularity, much more than enough.*/ + OpusSeekRecord sr[64]; + opus_int64 data_offset; + int ret; + /*We can seek, so set out learning all about this file.*/ + (*_of->callbacks.seek)(_of->source,0,SEEK_END); + _of->offset=_of->end=(*_of->callbacks.tell)(_of->source); + if(OP_UNLIKELY(_of->end<0))return OP_EREAD; + data_offset=_of->links[0].data_offset; + if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK; + /*Get the offset of the last page of the physical bitstream, or, if we're + lucky, the last Opus page of the first link, as most Ogg Opus files will + contain a single logical bitstream.*/ + ret=op_get_prev_page_serial(_of,sr,_of->end, + _of->links[0].serialno,_of->serialnos,_of->nserialnos); + if(OP_UNLIKELY(ret<0))return ret; + /*If there's any trailing junk, forget about it.*/ + _of->end=sr[0].offset+sr[0].size; + if(OP_UNLIKELY(_of->end<data_offset))return OP_EBADLINK; + /*Now enumerate the bitstream structure.*/ + return op_bisect_forward_serialno(_of,data_offset,sr,sizeof(sr)/sizeof(*sr), + &_of->serialnos,&_of->nserialnos,&_of->cserialnos); +} + +static int op_open_seekable2(OggOpusFile *_of){ + ogg_sync_state oy_start; + ogg_stream_state os_start; + ogg_packet *op_start; + opus_int64 start_offset; + int start_op_count; + int ret; + /*We're partially open and have a first link header state in storage in _of. + Save off that stream state so we can come back to it. + It would be simpler to just dump all this state and seek back to + links[0].data_offset when we're done. + But we do the extra work to allow us to seek back to _exactly_ the same + stream position we're at now. + This allows, e.g., the HTTP backend to continue reading from the original + connection (if it's still available), instead of opening a new one. + This means we can open and start playing a normal Opus file with a single + link and reasonable packet sizes using only two HTTP requests.*/ + start_op_count=_of->op_count; + /*This is a bit too large to put on the stack unconditionally.*/ + op_start=(ogg_packet *)_ogg_malloc(sizeof(*op_start)*start_op_count); + if(op_start==NULL)return OP_EFAULT; + *&oy_start=_of->oy; + *&os_start=_of->os; + start_offset=_of->offset; + memcpy(op_start,_of->op,sizeof(*op_start)*start_op_count); + OP_ASSERT((*_of->callbacks.tell)(_of->source)==op_position(_of)); + ogg_sync_init(&_of->oy); + ogg_stream_init(&_of->os,-1); + ret=op_open_seekable2_impl(_of); + /*Restore the old stream state.*/ + ogg_stream_clear(&_of->os); + ogg_sync_clear(&_of->oy); + *&_of->oy=*&oy_start; + *&_of->os=*&os_start; + _of->offset=start_offset; + _of->op_count=start_op_count; + memcpy(_of->op,op_start,sizeof(*_of->op)*start_op_count); + _ogg_free(op_start); + _of->prev_packet_gp=_of->links[0].pcm_start; + _of->cur_discard_count=_of->links[0].head.pre_skip; + if(OP_UNLIKELY(ret<0))return ret; + /*And restore the position indicator.*/ + ret=(*_of->callbacks.seek)(_of->source,op_position(_of),SEEK_SET); + return OP_UNLIKELY(ret<0)?OP_EREAD:0; +} + +/*Clear out the current logical bitstream decoder.*/ +static void op_decode_clear(OggOpusFile *_of){ + /*We don't actually free the decoder. + We might be able to re-use it for the next link.*/ + _of->op_count=0; + _of->od_buffer_size=0; + _of->prev_packet_gp=-1; + if(!_of->seekable){ + OP_ASSERT(_of->ready_state>=OP_INITSET); + opus_tags_clear(&_of->links[0].tags); + } + _of->ready_state=OP_OPENED; +} + +static void op_clear(OggOpusFile *_of){ + OggOpusLink *links; + _ogg_free(_of->od_buffer); + if(_of->od!=NULL)opus_multistream_decoder_destroy(_of->od); + links=_of->links; + if(!_of->seekable){ + if(_of->ready_state>OP_OPENED||_of->ready_state==OP_PARTOPEN){ + opus_tags_clear(&links[0].tags); + } + } + else if(OP_LIKELY(links!=NULL)){ + int nlinks; + int link; + nlinks=_of->nlinks; + for(link=0;link<nlinks;link++)opus_tags_clear(&links[link].tags); + } + _ogg_free(links); + _ogg_free(_of->serialnos); + ogg_stream_clear(&_of->os); + ogg_sync_clear(&_of->oy); + if(_of->callbacks.close!=NULL)(*_of->callbacks.close)(_of->source); +} + +static int op_open1(OggOpusFile *_of, + void *_source,const OpusFileCallbacks *_cb, + const unsigned char *_initial_data,size_t _initial_bytes){ + ogg_page og; + ogg_page *pog; + int seekable; + int ret; + memset(_of,0,sizeof(*_of)); + _of->end=-1; + _of->source=_source; + *&_of->callbacks=*_cb; + /*At a minimum, we need to be able to read data.*/ + if(OP_UNLIKELY(_of->callbacks.read==NULL))return OP_EREAD; + /*Initialize the framing state.*/ + ogg_sync_init(&_of->oy); + /*Perhaps some data was previously read into a buffer for testing against + other stream types. + Allow initialization from this previously read data (especially as we may + be reading from a non-seekable stream). + This requires copying it into a buffer allocated by ogg_sync_buffer() and + doesn't support seeking, so this is not a good mechanism to use for + decoding entire files from RAM.*/ + if(_initial_bytes>0){ + char *buffer; + buffer=ogg_sync_buffer(&_of->oy,_initial_bytes); + memcpy(buffer,_initial_data,_initial_bytes*sizeof(*buffer)); + ogg_sync_wrote(&_of->oy,_initial_bytes); + } + /*Can we seek? + Stevens suggests the seek test is portable.*/ + seekable=_cb->seek!=NULL&&(*_cb->seek)(_source,0,SEEK_CUR)!=-1; + /*If seek is implemented, tell must also be implemented.*/ + if(seekable){ + opus_int64 pos; + if(OP_UNLIKELY(_of->callbacks.tell==NULL))return OP_EINVAL; + pos=(*_of->callbacks.tell)(_of->source); + /*If the current position is not equal to the initial bytes consumed, + absolute seeking will not work.*/ + if(OP_UNLIKELY(pos!=(opus_int64)_initial_bytes))return OP_EINVAL; + } + _of->seekable=seekable; + /*Don't seek yet. + Set up a 'single' (current) logical bitstream entry for partial open.*/ + _of->links=(OggOpusLink *)_ogg_malloc(sizeof(*_of->links)); + /*The serialno gets filled in later by op_fetch_headers().*/ + ogg_stream_init(&_of->os,-1); + pog=NULL; + for(;;){ + /*Fetch all BOS pages, store the Opus header and all seen serial numbers, + and load subsequent Opus setup headers.*/ + ret=op_fetch_headers(_of,&_of->links[0].head,&_of->links[0].tags, + &_of->serialnos,&_of->nserialnos,&_of->cserialnos,pog); + if(OP_UNLIKELY(ret<0))break; + _of->nlinks=1; + _of->links[0].offset=0; + _of->links[0].data_offset=_of->offset; + _of->links[0].pcm_end=-1; + _of->links[0].serialno=_of->os.serialno; + /*Fetch the initial PCM offset.*/ + ret=op_find_initial_pcm_offset(_of,_of->links,&og); + if(seekable||OP_LIKELY(ret<=0))break; + /*This link was empty, but we already have the BOS page for the next one in + og. + We can't seek, so start processing the next link right now.*/ + opus_tags_clear(&_of->links[0].tags); + _of->nlinks=0; + if(!seekable)_of->cur_link++; + pog=&og; + } + if(OP_LIKELY(ret>=0))_of->ready_state=OP_PARTOPEN; + return ret; +} + +static int op_open2(OggOpusFile *_of){ + int ret; + OP_ASSERT(_of->ready_state==OP_PARTOPEN); + if(_of->seekable){ + _of->ready_state=OP_OPENED; + ret=op_open_seekable2(_of); + } + else ret=0; + if(OP_LIKELY(ret>=0)){ + /*We have buffered packets from op_find_initial_pcm_offset(). + Move to OP_INITSET so we can use them.*/ + _of->ready_state=OP_STREAMSET; + ret=op_make_decode_ready(_of); + if(OP_LIKELY(ret>=0))return 0; + } + /*Don't auto-close the stream on failure.*/ + _of->callbacks.close=NULL; + op_clear(_of); + return ret; +} + +OggOpusFile *op_test_callbacks(void *_source,const OpusFileCallbacks *_cb, + const unsigned char *_initial_data,size_t _initial_bytes,int *_error){ + OggOpusFile *of; + int ret; + of=(OggOpusFile *)_ogg_malloc(sizeof(*of)); + ret=OP_EFAULT; + if(OP_LIKELY(of!=NULL)){ + ret=op_open1(of,_source,_cb,_initial_data,_initial_bytes); + if(OP_LIKELY(ret>=0)){ + if(_error!=NULL)*_error=0; + return of; + } + /*Don't auto-close the stream on failure.*/ + of->callbacks.close=NULL; + op_clear(of); + _ogg_free(of); + } + if(_error!=NULL)*_error=ret; + return NULL; +} + +OggOpusFile *op_open_callbacks(void *_source,const OpusFileCallbacks *_cb, + const unsigned char *_initial_data,size_t _initial_bytes,int *_error){ + OggOpusFile *of; + of=op_test_callbacks(_source,_cb,_initial_data,_initial_bytes,_error); + if(OP_LIKELY(of!=NULL)){ + int ret; + ret=op_open2(of); + if(OP_LIKELY(ret>=0))return of; + if(_error!=NULL)*_error=ret; + _ogg_free(of); + } + return NULL; +} + +/*Convenience routine to clean up from failure for the open functions that + create their own streams.*/ +static OggOpusFile *op_open_close_on_failure(void *_source, + const OpusFileCallbacks *_cb,int *_error){ + OggOpusFile *of; + if(OP_UNLIKELY(_source==NULL)){ + if(_error!=NULL)*_error=OP_EFAULT; + return NULL; + } + of=op_open_callbacks(_source,_cb,NULL,0,_error); + if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source); + return of; +} + +OggOpusFile *op_open_file(const char *_path,int *_error){ + OpusFileCallbacks cb; + return op_open_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error); +} + +OggOpusFile *op_open_memory(const unsigned char *_data,size_t _size, + int *_error){ + OpusFileCallbacks cb; + return op_open_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb, + _error); +} + +/*Convenience routine to clean up from failure for the open functions that + create their own streams.*/ +static OggOpusFile *op_test_close_on_failure(void *_source, + const OpusFileCallbacks *_cb,int *_error){ + OggOpusFile *of; + if(OP_UNLIKELY(_source==NULL)){ + if(_error!=NULL)*_error=OP_EFAULT; + return NULL; + } + of=op_test_callbacks(_source,_cb,NULL,0,_error); + if(OP_UNLIKELY(of==NULL))(*_cb->close)(_source); + return of; +} + +OggOpusFile *op_test_file(const char *_path,int *_error){ + OpusFileCallbacks cb; + return op_test_close_on_failure(op_fopen(&cb,_path,"rb"),&cb,_error); +} + +OggOpusFile *op_test_memory(const unsigned char *_data,size_t _size, + int *_error){ + OpusFileCallbacks cb; + return op_test_close_on_failure(op_mem_stream_create(&cb,_data,_size),&cb, + _error); +} + +int op_test_open(OggOpusFile *_of){ + int ret; + if(OP_UNLIKELY(_of->ready_state!=OP_PARTOPEN))return OP_EINVAL; + ret=op_open2(_of); + /*op_open2() will clear this structure on failure. + Reset its contents to prevent double-frees in op_free().*/ + if(OP_UNLIKELY(ret<0))memset(_of,0,sizeof(*_of)); + return ret; +} + +void op_free(OggOpusFile *_of){ + if(OP_LIKELY(_of!=NULL)){ + op_clear(_of); + _ogg_free(_of); + } +} + +int op_seekable(const OggOpusFile *_of){ + return _of->seekable; +} + +int op_link_count(const OggOpusFile *_of){ + return _of->nlinks; +} + +ogg_uint32_t op_serialno(const OggOpusFile *_of,int _li){ + if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1; + if(!_of->seekable)_li=0; + return _of->links[_li<0?_of->cur_link:_li].serialno; +} + +int op_channel_count(const OggOpusFile *_of,int _li){ + return op_head(_of,_li)->channel_count; +} + +opus_int64 op_raw_total(const OggOpusFile *_of,int _li){ + if(OP_UNLIKELY(_of->ready_state<OP_OPENED) + ||OP_UNLIKELY(!_of->seekable) + ||OP_UNLIKELY(_li>=_of->nlinks)){ + return OP_EINVAL; + } + if(_li<0)return _of->end-_of->links[0].offset; + return (_li+1>=_of->nlinks?_of->end:_of->links[_li+1].offset) + -_of->links[_li].offset; +} + +ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li){ + OggOpusLink *links; + ogg_int64_t diff; + int nlinks; + nlinks=_of->nlinks; + if(OP_UNLIKELY(_of->ready_state<OP_OPENED) + ||OP_UNLIKELY(!_of->seekable) + ||OP_UNLIKELY(_li>=nlinks)){ + return OP_EINVAL; + } + links=_of->links; + /*We verify that the granule position differences are larger than the + pre-skip and that the total duration does not overflow during link + enumeration, so we don't have to check here.*/ + if(_li<0){ + ogg_int64_t pcm_total; + int li; + pcm_total=0; + for(li=0;li<nlinks;li++){ + OP_ALWAYS_TRUE(!op_granpos_diff(&diff, + links[li].pcm_end,links[li].pcm_start)); + pcm_total+=diff-links[li].head.pre_skip; + } + return pcm_total; + } + OP_ALWAYS_TRUE(!op_granpos_diff(&diff, + links[_li].pcm_end,links[_li].pcm_start)); + return diff-links[_li].head.pre_skip; +} + +const OpusHead *op_head(const OggOpusFile *_of,int _li){ + if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1; + if(!_of->seekable)_li=0; + return &_of->links[_li<0?_of->cur_link:_li].head; +} + +const OpusTags *op_tags(const OggOpusFile *_of,int _li){ + if(OP_UNLIKELY(_li>=_of->nlinks))_li=_of->nlinks-1; + if(!_of->seekable){ + if(_of->ready_state<OP_STREAMSET&&_of->ready_state!=OP_PARTOPEN){ + return NULL; + } + _li=0; + } + else if(_li<0)_li=_of->ready_state>=OP_STREAMSET?_of->cur_link:0; + return &_of->links[_li].tags; +} + +int op_current_link(const OggOpusFile *_of){ + if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; + return _of->cur_link; +} + +/*Compute an average bitrate given a byte and sample count. + Return: The bitrate in bits per second.*/ +static opus_int32 op_calc_bitrate(opus_int64 _bytes,ogg_int64_t _samples){ + /*These rates are absurd, but let's handle them anyway.*/ + if(OP_UNLIKELY(_bytes>(OP_INT64_MAX-(_samples>>1))/(48000*8))){ + ogg_int64_t den; + if(OP_UNLIKELY(_bytes/(OP_INT32_MAX/(48000*8))>=_samples)){ + return OP_INT32_MAX; + } + den=_samples/(48000*8); + return (opus_int32)((_bytes+(den>>1))/den); + } + if(OP_UNLIKELY(_samples<=0))return OP_INT32_MAX; + /*This can't actually overflow in normal operation: even with a pre-skip of + 545 2.5 ms frames with 8 streams running at 1282*8+1 bytes per packet + (1275 byte frames + Opus framing overhead + Ogg lacing values), that all + produce a single sample of decoded output, we still don't top 45 Mbps. + The only way to get bitrates larger than that is with excessive Opus + padding, more encoded streams than output channels, or lots and lots of + Ogg pages with no packets on them.*/ + return (opus_int32)OP_MIN((_bytes*48000*8+(_samples>>1))/_samples, + OP_INT32_MAX); +} + +opus_int32 op_bitrate(const OggOpusFile *_of,int _li){ + if(OP_UNLIKELY(_of->ready_state<OP_OPENED)||OP_UNLIKELY(!_of->seekable) + ||OP_UNLIKELY(_li>=_of->nlinks)){ + return OP_EINVAL; + } + return op_calc_bitrate(op_raw_total(_of,_li),op_pcm_total(_of,_li)); +} + +opus_int32 op_bitrate_instant(OggOpusFile *_of){ + ogg_int64_t samples_tracked; + opus_int32 ret; + if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; + samples_tracked=_of->samples_tracked; + if(OP_UNLIKELY(samples_tracked==0))return OP_FALSE; + ret=op_calc_bitrate(_of->bytes_tracked,samples_tracked); + _of->bytes_tracked=0; + _of->samples_tracked=0; + return ret; +} + +/*Fetch and process a page. + This handles the case where we're at a bitstream boundary and dumps the + decoding machine. + If the decoding machine is unloaded, it loads it. + It also keeps prev_packet_gp up to date (seek and read both use this; seek + uses a special hack with _readp). + Return: <0) Error, OP_HOLE (lost packet), or OP_EOF. + 0) Need more data (only if _readp==0). + 1) Got at least one audio data packet.*/ +static int op_fetch_and_process_page(OggOpusFile *_of, + ogg_page *_og,opus_int64 _page_pos,int _readp,int _spanp,int _ignore_holes){ + OggOpusLink *links; + ogg_uint32_t cur_serialno; + int seekable; + int cur_link; + int ret; + /*We shouldn't get here if we have unprocessed packets.*/ + OP_ASSERT(_of->ready_state<OP_INITSET||_of->op_pos>=_of->op_count); + if(!_readp)return 0; + seekable=_of->seekable; + links=_of->links; + cur_link=seekable?_of->cur_link:0; + cur_serialno=links[cur_link].serialno; + /*Handle one page.*/ + for(;;){ + ogg_page og; + OP_ASSERT(_of->ready_state>=OP_OPENED); + /*This loop is not strictly necessary, but there's no sense in doing the + extra checks of the larger loop for the common case in a multiplexed + bistream where the page is simply part of a different logical + bitstream.*/ + do{ + /*If we were given a page to use, use it.*/ + if(_og!=NULL){ + *&og=*_og; + _og=NULL; + } + /*Keep reading until we get a page with the correct serialno.*/ + else _page_pos=op_get_next_page(_of,&og,_of->end); + /*EOF: Leave uninitialized.*/ + if(_page_pos<0)return _page_pos<OP_FALSE?(int)_page_pos:OP_EOF; + if(OP_LIKELY(_of->ready_state>=OP_STREAMSET)){ + if(cur_serialno!=(ogg_uint32_t)ogg_page_serialno(&og)){ + /*Two possibilities: + 1) Another stream is multiplexed into this logical section, or*/ + if(OP_LIKELY(!ogg_page_bos(&og)))continue; + /* 2) Our decoding just traversed a bitstream boundary.*/ + if(!_spanp)return OP_EOF; + if(OP_LIKELY(_of->ready_state>=OP_INITSET))op_decode_clear(_of); + break; + } + } + /*Bitrate tracking: add the header's bytes here. + The body bytes are counted when we consume the packets.*/ + _of->bytes_tracked+=og.header_len; + } + while(0); + /*Do we need to load a new machine before submitting the page? + This is different in the seekable and non-seekable cases. + In the seekable case, we already have all the header information loaded + and cached. + We just initialize the machine with it and continue on our merry way. + In the non-seekable (streaming) case, we'll only be at a boundary if we + just left the previous logical bitstream, and we're now nominally at the + header of the next bitstream.*/ + if(OP_UNLIKELY(_of->ready_state<OP_STREAMSET)){ + if(seekable){ + ogg_uint32_t serialno; + int nlinks; + int li; + serialno=ogg_page_serialno(&og); + /*Match the serialno to bitstream section. + We use this rather than offset positions to avoid problems near + logical bitstream boundaries.*/ + nlinks=_of->nlinks; + for(li=0;li<nlinks&&links[li].serialno!=serialno;li++); + /*Not a desired Opus bitstream section. + Keep trying.*/ + if(li>=nlinks)continue; + cur_serialno=serialno; + _of->cur_link=cur_link=li; + ogg_stream_reset_serialno(&_of->os,serialno); + _of->ready_state=OP_STREAMSET; + /*If we're at the start of this link, initialize the granule position + and pre-skip tracking.*/ + if(_page_pos<=links[cur_link].data_offset){ + _of->prev_packet_gp=links[cur_link].pcm_start; + _of->cur_discard_count=links[cur_link].head.pre_skip; + /*Ignore a hole at the start of a new link (this is common for + streams joined in the middle) or after seeking.*/ + _ignore_holes=1; + } + } + else{ + do{ + /*We're streaming. + Fetch the two header packets, build the info struct.*/ + ret=op_fetch_headers(_of,&links[0].head,&links[0].tags, + NULL,NULL,NULL,&og); + if(OP_UNLIKELY(ret<0))return ret; + /*op_find_initial_pcm_offset() will suppress any initial hole for us, + so no need to set _ignore_holes.*/ + ret=op_find_initial_pcm_offset(_of,links,&og); + if(OP_UNLIKELY(ret<0))return ret; + _of->links[0].serialno=cur_serialno=_of->os.serialno; + _of->cur_link++; + } + /*If the link was empty, keep going, because we already have the + BOS page of the next one in og.*/ + while(OP_UNLIKELY(ret>0)); + /*If we didn't get any packets out of op_find_initial_pcm_offset(), + keep going (this is possible if end-trimming trimmed them all).*/ + if(_of->op_count<=0)continue; + /*Otherwise, we're done.*/ + ret=op_make_decode_ready(_of); + if(OP_UNLIKELY(ret<0))return ret; + return 1; + } + } + /*The buffered page is the data we want, and we're ready for it. + Add it to the stream state.*/ + if(OP_UNLIKELY(_of->ready_state==OP_STREAMSET)){ + ret=op_make_decode_ready(_of); + if(OP_UNLIKELY(ret<0))return ret; + } + /*Extract all the packets from the current page.*/ + ogg_stream_pagein(&_of->os,&og); + if(OP_LIKELY(_of->ready_state>=OP_INITSET)){ + opus_int32 total_duration; + int durations[255]; + int op_count; + total_duration=op_collect_audio_packets(_of,durations); + if(OP_UNLIKELY(total_duration<0)){ + /*Drain the packets from the page anyway.*/ + total_duration=op_collect_audio_packets(_of,durations); + OP_ASSERT(total_duration>=0); + /*Report holes to the caller.*/ + if(!_ignore_holes)return OP_HOLE; + } + op_count=_of->op_count; + /*If we found at least one audio data packet, compute per-packet granule + positions for them.*/ + if(op_count>0){ + ogg_int64_t diff; + ogg_int64_t prev_packet_gp; + ogg_int64_t cur_packet_gp; + ogg_int64_t cur_page_gp; + int cur_page_eos; + int pi; + cur_page_gp=_of->op[op_count-1].granulepos; + cur_page_eos=_of->op[op_count-1].e_o_s; + prev_packet_gp=_of->prev_packet_gp; + if(OP_UNLIKELY(prev_packet_gp==-1)){ + opus_int32 cur_discard_count; + /*This is the first call after a raw seek. + Try to reconstruct prev_packet_gp from scratch.*/ + OP_ASSERT(seekable); + if(OP_UNLIKELY(cur_page_eos)){ + /*If the first page we hit after our seek was the EOS page, and + we didn't start from data_offset or before, we don't have + enough information to do end-trimming. + Proceed to the next link, rather than risk playing back some + samples that shouldn't have been played.*/ + _of->op_count=0; + continue; + } + /*By default discard 80 ms of data after a seek, unless we seek + into the pre-skip region.*/ + cur_discard_count=80*48; + cur_page_gp=_of->op[op_count-1].granulepos; + /*Try to initialize prev_packet_gp. + If the current page had packets but didn't have a granule + position, or the granule position it had was too small (both + illegal), just use the starting granule position for the link.*/ + prev_packet_gp=links[cur_link].pcm_start; + if(OP_LIKELY(cur_page_gp!=-1)){ + op_granpos_add(&prev_packet_gp,cur_page_gp,-total_duration); + } + if(OP_LIKELY(!op_granpos_diff(&diff, + prev_packet_gp,links[cur_link].pcm_start))){ + opus_int32 pre_skip; + /*If we start at the beginning of the pre-skip region, or we're + at least 80 ms from the end of the pre-skip region, we discard + to the end of the pre-skip region. + Otherwise, we still use the 80 ms default, which will discard + past the end of the pre-skip region.*/ + pre_skip=links[cur_link].head.pre_skip; + if(diff>=0&&diff<=OP_MAX(0,pre_skip-80*48)){ + cur_discard_count=pre_skip-(int)diff; + } + } + _of->cur_discard_count=cur_discard_count; + } + if(OP_UNLIKELY(cur_page_gp==-1)){ + /*This page had completed packets but didn't have a valid granule + position. + This is illegal, but we'll try to handle it by continuing to count + forwards from the previous page.*/ + if(op_granpos_add(&cur_page_gp,prev_packet_gp,total_duration)<0){ + /*The timestamp for this page overflowed.*/ + cur_page_gp=links[cur_link].pcm_end; + } + } + /*If we hit the last page, handle end-trimming.*/ + if(OP_UNLIKELY(cur_page_eos) + &&OP_LIKELY(!op_granpos_diff(&diff,cur_page_gp,prev_packet_gp)) + &&OP_LIKELY(diff<total_duration)){ + cur_packet_gp=prev_packet_gp; + for(pi=0;pi<op_count;pi++){ + diff=durations[pi]-diff; + /*If we have samples to trim...*/ + if(diff>0){ + /*If we trimmed the entire packet, stop (the spec says encoders + shouldn't do this, but we support it anyway).*/ + if(OP_UNLIKELY(diff>durations[pi]))break; + cur_packet_gp=cur_page_gp; + /*Move the EOS flag to this packet, if necessary, so we'll trim + the samples during decode.*/ + _of->op[pi].e_o_s=1; + } + else{ + /*Update the granule position as normal.*/ + OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp, + cur_packet_gp,durations[pi])); + } + _of->op[pi].granulepos=cur_packet_gp; + OP_ALWAYS_TRUE(!op_granpos_diff(&diff,cur_page_gp,cur_packet_gp)); + } + } + else{ + /*Propagate timestamps to earlier packets. + op_granpos_add(&prev_packet_gp,prev_packet_gp,total_duration) + should succeed and give prev_packet_gp==cur_page_gp. + But we don't bother to check that, as there isn't much we can do + if it's not true. + The only thing we guarantee is that the start and end granule + positions of the packets are valid, and that they are monotonic + within a page. + They might be completely out of range for this link (we'll check + that elsewhere), or non-monotonic between pages.*/ + if(OP_UNLIKELY(op_granpos_add(&prev_packet_gp, + cur_page_gp,-total_duration)<0)){ + /*The starting timestamp for the first packet on this page + underflowed. + This is illegal, but we ignore it.*/ + prev_packet_gp=0; + } + for(pi=0;pi<op_count;pi++){ + if(OP_UNLIKELY(op_granpos_add(&cur_packet_gp, + cur_page_gp,-total_duration)<0)){ + /*The start timestamp for this packet underflowed. + This is illegal, but we ignore it.*/ + cur_packet_gp=0; + } + total_duration-=durations[pi]; + OP_ASSERT(total_duration>=0); + OP_ALWAYS_TRUE(!op_granpos_add(&cur_packet_gp, + cur_packet_gp,durations[pi])); + _of->op[pi].granulepos=cur_packet_gp; + } + OP_ASSERT(total_duration==0); + } + _of->prev_packet_gp=prev_packet_gp; + _of->op_count=pi; + /*If end-trimming didn't trim all the packets, we're done.*/ + if(OP_LIKELY(pi>0))return 1; + } + } + } +} + +int op_raw_seek(OggOpusFile *_of,opus_int64 _pos){ + int ret; + if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; + /*Don't dump the decoder state if we can't seek.*/ + if(OP_UNLIKELY(!_of->seekable))return OP_ENOSEEK; + if(OP_UNLIKELY(_pos<0)||OP_UNLIKELY(_pos>_of->end))return OP_EINVAL; + /*Clear out any buffered, decoded data.*/ + op_decode_clear(_of); + _of->bytes_tracked=0; + _of->samples_tracked=0; + ret=op_seek_helper(_of,_pos); + if(OP_UNLIKELY(ret<0))return OP_EREAD; + ret=op_fetch_and_process_page(_of,NULL,-1,1,1,1); + /*If we hit EOF, op_fetch_and_process_page() leaves us uninitialized. + Instead, jump to the end.*/ + if(ret==OP_EOF){ + int cur_link; + op_decode_clear(_of); + cur_link=_of->nlinks-1; + _of->cur_link=cur_link; + _of->prev_packet_gp=_of->links[cur_link].pcm_end; + _of->cur_discard_count=0; + ret=0; + } + else if(ret>0)ret=0; + return ret; +} + +/*Convert a PCM offset relative to the start of the whole stream to a granule + position in an individual link.*/ +static ogg_int64_t op_get_granulepos(const OggOpusFile *_of, + ogg_int64_t _pcm_offset,int *_li){ + const OggOpusLink *links; + ogg_int64_t duration; + int nlinks; + int li; + OP_ASSERT(_pcm_offset>=0); + nlinks=_of->nlinks; + links=_of->links; + for(li=0;OP_LIKELY(li<nlinks);li++){ + ogg_int64_t pcm_start; + opus_int32 pre_skip; + pcm_start=links[li].pcm_start; + pre_skip=links[li].head.pre_skip; + OP_ALWAYS_TRUE(!op_granpos_diff(&duration,links[li].pcm_end,pcm_start)); + duration-=pre_skip; + if(_pcm_offset<duration){ + _pcm_offset+=pre_skip; + if(OP_UNLIKELY(pcm_start>OP_INT64_MAX-_pcm_offset)){ + /*Adding this amount to the granule position would overflow the positive + half of its 64-bit range. + Since signed overflow is undefined in C, do it in a way the compiler + isn't allowed to screw up.*/ + _pcm_offset-=OP_INT64_MAX-pcm_start+1; + pcm_start=OP_INT64_MIN; + } + pcm_start+=_pcm_offset; + *_li=li; + return pcm_start; + } + _pcm_offset-=duration; + } + return -1; +} + +/*This controls how close the target has to be to use the current stream + position to subdivide the initial range. + Two minutes seems to be a good default.*/ +#define OP_CUR_TIME_THRESH (120*48*(opus_int32)1000) + +/*Note: The OP_SMALL_FOOTPRINT #define doesn't (currently) save much code size, + but it's meant to serve as documentation for portions of the seeking + algorithm that are purely optional, to aid others learning from/porting this + code to other contexts.*/ +/*#define OP_SMALL_FOOTPRINT (1)*/ + +/*Search within link _li for the page with the highest granule position + preceding (or equal to) _target_gp. + There is a danger here: missing pages or incorrect frame number information + in the bitstream could make our task impossible. + Account for that (and report it as an error condition).*/ +static int op_pcm_seek_page(OggOpusFile *_of, + ogg_int64_t _target_gp,int _li){ + const OggOpusLink *link; + ogg_page og; + ogg_int64_t pcm_pre_skip; + ogg_int64_t pcm_start; + ogg_int64_t pcm_end; + ogg_int64_t best_gp; + ogg_int64_t diff; + ogg_uint32_t serialno; + opus_int32 pre_skip; + opus_int64 begin; + opus_int64 end; + opus_int64 boundary; + opus_int64 best; + opus_int64 page_offset; + opus_int64 d0; + opus_int64 d1; + opus_int64 d2; + int force_bisect; + int ret; + _of->bytes_tracked=0; + _of->samples_tracked=0; + link=_of->links+_li; + best_gp=pcm_start=link->pcm_start; + pcm_end=link->pcm_end; + serialno=link->serialno; + best=begin=link->data_offset; + page_offset=-1; + /*We discard the first 80 ms of data after a seek, so seek back that much + farther. + If we can't, simply seek to the beginning of the link.*/ + if(OP_UNLIKELY(op_granpos_add(&_target_gp,_target_gp,-80*48)<0) + ||OP_UNLIKELY(op_granpos_cmp(_target_gp,pcm_start)<0)){ + _target_gp=pcm_start; + } + /*Special case seeking to the start of the link.*/ + pre_skip=link->head.pre_skip; + OP_ALWAYS_TRUE(!op_granpos_add(&pcm_pre_skip,pcm_start,pre_skip)); + if(op_granpos_cmp(_target_gp,pcm_pre_skip)<0)end=boundary=begin; + else{ + end=boundary=link->end_offset; +#if !defined(OP_SMALL_FOOTPRINT) + /*If we were decoding from this link, we can narrow the range a bit.*/ + if(_li==_of->cur_link&&_of->ready_state>=OP_INITSET){ + opus_int64 offset; + int op_count; + op_count=_of->op_count; + /*The only way the offset can be invalid _and_ we can fail the granule + position checks below is if someone changed the contents of the last + page since we read it. + We'd be within our rights to just return OP_EBADLINK in that case, but + we'll simply ignore the current position instead.*/ + offset=_of->offset; + if(op_count>0&&OP_LIKELY(offset<=end)){ + ogg_int64_t gp; + /*Make sure the timestamp is valid. + The granule position might be -1 if we collected the packets from a + page without a granule position after reporting a hole.*/ + gp=_of->op[op_count-1].granulepos; + if(OP_LIKELY(gp!=-1)&&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<0) + &&OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0)){ + OP_ALWAYS_TRUE(!op_granpos_diff(&diff,gp,_target_gp)); + /*We only actually use the current time if either + a) We can cut off at least half the range, or + b) We're seeking sufficiently close to the current position that + it's likely to be informative. + Otherwise it appears using the whole link range to estimate the + first seek location gives better results, on average.*/ + if(diff<0){ + OP_ASSERT(offset>=begin); + if(offset-begin>=end-begin>>1||diff>-OP_CUR_TIME_THRESH){ + best=begin=offset; + best_gp=pcm_start=gp; + } + } + else{ + ogg_int64_t prev_page_gp; + /*We might get lucky and already have the packet with the target + buffered. + Worth checking. + For very small files (with all of the data in a single page, + generally 1 second or less), we can loop them continuously + without seeking at all.*/ + OP_ALWAYS_TRUE(!op_granpos_add(&prev_page_gp,_of->op[0].granulepos, + op_get_packet_duration(_of->op[0].packet,_of->op[0].bytes))); + if(op_granpos_cmp(prev_page_gp,_target_gp)<=0){ + /*Don't call op_decode_clear(), because it will dump our + packets.*/ + _of->op_pos=0; + _of->od_buffer_size=0; + _of->prev_packet_gp=prev_page_gp; + _of->ready_state=OP_STREAMSET; + return op_make_decode_ready(_of); + } + /*No such luck. + Check if we can cut off at least half the range, though.*/ + if(offset-begin<=end-begin>>1||diff<OP_CUR_TIME_THRESH){ + /*We really want the page start here, but this will do.*/ + end=boundary=offset; + pcm_end=gp; + } + } + } + } + } +#endif + } + /*This code was originally based on the "new search algorithm by HB (Nicholas + Vinen)" from libvorbisfile. + It has been modified substantially since.*/ + op_decode_clear(_of); + /*Initialize the interval size history.*/ + d2=d1=d0=end-begin; + force_bisect=0; + while(begin<end){ + opus_int64 bisect; + opus_int64 next_boundary; + opus_int32 chunk_size; + if(end-begin<OP_CHUNK_SIZE)bisect=begin; + else{ + /*Update the interval size history.*/ + d0=d1>>1; + d1=d2>>1; + d2=end-begin>>1; + if(force_bisect)bisect=begin+(end-begin>>1); + else{ + ogg_int64_t diff2; + OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start)); + OP_ALWAYS_TRUE(!op_granpos_diff(&diff2,pcm_end,pcm_start)); + /*Take a (pretty decent) guess.*/ + bisect=begin+op_rescale64(diff,diff2,end-begin)-OP_CHUNK_SIZE; + } + if(bisect-OP_CHUNK_SIZE<begin)bisect=begin; + force_bisect=0; + } + if(bisect!=_of->offset){ + page_offset=-1; + ret=op_seek_helper(_of,bisect); + if(OP_UNLIKELY(ret<0))return ret; + } + chunk_size=OP_CHUNK_SIZE; + next_boundary=boundary; + while(begin<end){ + page_offset=op_get_next_page(_of,&og,boundary); + if(page_offset<0){ + if(page_offset<OP_FALSE)return (int)page_offset; + /*There are no more pages in our interval from our stream with a valid + timestamp that start at position bisect or later.*/ + /*If we scanned the whole interval, we're done.*/ + if(bisect<=begin+1)end=begin; + else{ + /*Otherwise, back up one chunk.*/ + bisect=OP_MAX(bisect-chunk_size,begin); + ret=op_seek_helper(_of,bisect); + if(OP_UNLIKELY(ret<0))return ret; + /*Bump up the chunk size.*/ + chunk_size=OP_MIN(2*chunk_size,OP_CHUNK_SIZE_MAX); + /*If we did find a page from another stream or without a timestamp, + don't read past it.*/ + boundary=next_boundary; + } + } + else{ + ogg_int64_t gp; + /*Save the offset of the first page we found after the seek, regardless + of the stream it came from or whether or not it has a timestamp.*/ + next_boundary=OP_MIN(page_offset,next_boundary); + if(serialno!=(ogg_uint32_t)ogg_page_serialno(&og))continue; + gp=ogg_page_granulepos(&og); + if(gp==-1)continue; + if(op_granpos_cmp(gp,_target_gp)<0){ + /*We found a page that ends before our target. + Advance to the raw offset of the next page.*/ + begin=_of->offset; + if(OP_UNLIKELY(op_granpos_cmp(pcm_start,gp)>0) + ||OP_UNLIKELY(op_granpos_cmp(pcm_end,gp)<0)){ + /*Don't let pcm_start get out of range! + That could happen with an invalid timestamp.*/ + break; + } + /*Save the byte offset of the end of the page with this granule + position.*/ + best=begin; + best_gp=pcm_start=gp; + OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start)); + /*If we're more than a second away from our target, break out and + do another bisection.*/ + if(diff>48000)break; + /*Otherwise, keep scanning forward (do NOT use begin+1).*/ + bisect=begin; + } + else{ + /*We found a page that ends after our target.*/ + /*If we scanned the whole interval before we found it, we're done.*/ + if(bisect<=begin+1)end=begin; + else{ + end=bisect; + /*In later iterations, don't read past the first page we found.*/ + boundary=next_boundary; + /*If we're not making much progress shrinking the interval size, + start forcing straight bisection to limit the worst case.*/ + force_bisect=end-begin>d0*2; + /*Don't let pcm_end get out of range! + That could happen with an invalid timestamp.*/ + if(OP_LIKELY(op_granpos_cmp(pcm_end,gp)>0) + &&OP_LIKELY(op_granpos_cmp(pcm_start,gp)<=0)){ + pcm_end=gp; + } + break; + } + } + } + } + } + /*Found our page. + Seek to the end of it and update prev_packet_gp. + Our caller will set cur_discard_count. + This is an easier case than op_raw_seek(), as we don't need to keep any + packets from the page we found.*/ + /*Seek, if necessary.*/ + if(best!=page_offset){ + page_offset=-1; + ret=op_seek_helper(_of,best); + if(OP_UNLIKELY(ret<0))return ret; + } + OP_ASSERT(op_granpos_cmp(best_gp,pcm_start)>=0); + _of->cur_link=_li; + _of->ready_state=OP_STREAMSET; + _of->prev_packet_gp=best_gp; + ogg_stream_reset_serialno(&_of->os,serialno); + ret=op_fetch_and_process_page(_of,page_offset<0?NULL:&og,page_offset,1,0,1); + if(OP_UNLIKELY(ret<=0))return OP_EBADLINK; + /*Verify result.*/ + if(OP_UNLIKELY(op_granpos_cmp(_of->prev_packet_gp,_target_gp)>0)){ + return OP_EBADLINK; + } + return 0; +} + +int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset){ + const OggOpusLink *link; + ogg_int64_t pcm_start; + ogg_int64_t target_gp; + ogg_int64_t prev_packet_gp; + ogg_int64_t skip; + ogg_int64_t diff; + int op_count; + int op_pos; + int ret; + int li; + if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; + if(OP_UNLIKELY(!_of->seekable))return OP_ENOSEEK; + if(OP_UNLIKELY(_pcm_offset<0))return OP_EINVAL; + target_gp=op_get_granulepos(_of,_pcm_offset,&li); + if(OP_UNLIKELY(target_gp==-1))return OP_EINVAL; + link=_of->links+li; + pcm_start=link->pcm_start; + OP_ALWAYS_TRUE(!op_granpos_diff(&_pcm_offset,target_gp,pcm_start)); +#if !defined(OP_SMALL_FOOTPRINT) + /*For small (90 ms or less) forward seeks within the same link, just decode + forward. + This also optimizes the case of seeking to the current position.*/ + if(li==_of->cur_link&&_of->ready_state>=OP_INITSET){ + ogg_int64_t gp; + gp=_of->prev_packet_gp; + if(OP_LIKELY(gp!=-1)){ + int nbuffered; + nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0); + OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered)); + /*We do _not_ add cur_discard_count to gp. + Otherwise the total amount to discard could grow without bound, and it + would be better just to do a full seek.*/ + if(OP_LIKELY(!op_granpos_diff(&diff,gp,pcm_start))){ + ogg_int64_t discard_count; + discard_count=_pcm_offset-diff; + /*We use a threshold of 90 ms instead of 80, since 80 ms is the + _minimum_ we would have discarded after a full seek. + Assuming 20 ms frames (the default), we'd discard 90 ms on average.*/ + if(discard_count>=0&&OP_UNLIKELY(discard_count<90*48)){ + _of->cur_discard_count=(opus_int32)discard_count; + return 0; + } + } + } + } +#endif + ret=op_pcm_seek_page(_of,target_gp,li); + if(OP_UNLIKELY(ret<0))return ret; + /*Now skip samples until we actually get to our target.*/ + /*Figure out where we should skip to.*/ + if(_pcm_offset<=link->head.pre_skip)skip=0; + else skip=OP_MAX(_pcm_offset-80*48,0); + OP_ASSERT(_pcm_offset-skip>=0); + OP_ASSERT(_pcm_offset-skip<OP_INT32_MAX-120*48); + /*Skip packets until we find one with samples past our skip target.*/ + for(;;){ + op_count=_of->op_count; + prev_packet_gp=_of->prev_packet_gp; + for(op_pos=_of->op_pos;op_pos<op_count;op_pos++){ + ogg_int64_t cur_packet_gp; + cur_packet_gp=_of->op[op_pos].granulepos; + if(OP_LIKELY(!op_granpos_diff(&diff,cur_packet_gp,pcm_start)) + &&diff>skip){ + break; + } + prev_packet_gp=cur_packet_gp; + } + _of->prev_packet_gp=prev_packet_gp; + _of->op_pos=op_pos; + if(op_pos<op_count)break; + /*We skipped all the packets on this page. + Fetch another.*/ + ret=op_fetch_and_process_page(_of,NULL,-1,1,0,1); + if(OP_UNLIKELY(ret<=0))return OP_EBADLINK; + } + OP_ALWAYS_TRUE(!op_granpos_diff(&diff,prev_packet_gp,pcm_start)); + /*We skipped too far. + Either the timestamps were illegal or there was a hole in the data.*/ + if(diff>skip)return OP_EBADLINK; + OP_ASSERT(_pcm_offset-diff<OP_INT32_MAX); + /*TODO: If there are further holes/illegal timestamps, we still won't decode + to the correct sample. + However, at least op_pcm_tell() will report the correct value immediately + after returning.*/ + _of->cur_discard_count=(opus_int32)(_pcm_offset-diff); + return 0; +} + +opus_int64 op_raw_tell(const OggOpusFile *_of){ + if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; + return _of->offset; +} + +/*Convert a granule position from a given link to a PCM offset relative to the + start of the whole stream. + For unseekable sources, this gets reset to 0 at the beginning of each link.*/ +static ogg_int64_t op_get_pcm_offset(const OggOpusFile *_of, + ogg_int64_t _gp,int _li){ + const OggOpusLink *links; + ogg_int64_t pcm_offset; + ogg_int64_t delta; + int li; + links=_of->links; + pcm_offset=0; + OP_ASSERT(_li<_of->nlinks); + for(li=0;li<_li;li++){ + OP_ALWAYS_TRUE(!op_granpos_diff(&delta, + links[li].pcm_end,links[li].pcm_start)); + delta-=links[li].head.pre_skip; + pcm_offset+=delta; + } + OP_ASSERT(_li>=0); + if(_of->seekable&&OP_UNLIKELY(op_granpos_cmp(_gp,links[_li].pcm_end)>0)){ + _gp=links[_li].pcm_end; + } + if(OP_LIKELY(op_granpos_cmp(_gp,links[_li].pcm_start)>0)){ + if(OP_UNLIKELY(op_granpos_diff(&delta,_gp,links[_li].pcm_start)<0)){ + /*This means an unseekable stream claimed to have a page from more than + 2 billion days after we joined.*/ + OP_ASSERT(!_of->seekable); + return OP_INT64_MAX; + } + if(delta<links[_li].head.pre_skip)delta=0; + else delta-=links[_li].head.pre_skip; + /*In the seekable case, _gp was limited by pcm_end. + In the unseekable case, pcm_offset should be 0.*/ + OP_ASSERT(pcm_offset<=OP_INT64_MAX-delta); + pcm_offset+=delta; + } + return pcm_offset; +} + +ogg_int64_t op_pcm_tell(const OggOpusFile *_of){ + ogg_int64_t gp; + int nbuffered; + int li; + if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; + gp=_of->prev_packet_gp; + if(gp==-1)return 0; + nbuffered=OP_MAX(_of->od_buffer_size-_of->od_buffer_pos,0); + OP_ALWAYS_TRUE(!op_granpos_add(&gp,gp,-nbuffered)); + li=_of->seekable?_of->cur_link:0; + if(op_granpos_add(&gp,gp,_of->cur_discard_count)<0){ + gp=_of->links[li].pcm_end; + } + return op_get_pcm_offset(_of,gp,li); +} + +void op_set_decode_callback(OggOpusFile *_of, + op_decode_cb_func _decode_cb,void *_ctx){ + _of->decode_cb=_decode_cb; + _of->decode_cb_ctx=_ctx; +} + +int op_set_gain_offset(OggOpusFile *_of, + int _gain_type,opus_int32 _gain_offset_q8){ + if(_gain_type!=OP_HEADER_GAIN&&_gain_type!=OP_TRACK_GAIN + &&_gain_type!=OP_ABSOLUTE_GAIN){ + return OP_EINVAL; + } + _of->gain_type=_gain_type; + /*The sum of header gain and track gain lies in the range [-65536,65534]. + These bounds allow the offset to set the final value to anywhere in the + range [-32768,32767], which is what we'll clamp it to before applying.*/ + _of->gain_offset_q8=OP_CLAMP(-98302,_gain_offset_q8,98303); + op_update_gain(_of); + return 0; +} + +void op_set_dither_enabled(OggOpusFile *_of,int _enabled){ +#if !defined(OPUS_FIXED_POINT) + _of->dither_disabled=!_enabled; + if(!_enabled)_of->dither_mute=65; +#endif +} + +/*Allocate the decoder scratch buffer. + This is done lazily, since if the user provides large enough buffers, we'll + never need it.*/ +static int op_init_buffer(OggOpusFile *_of){ + int nchannels_max; + if(_of->seekable){ + const OggOpusLink *links; + int nlinks; + int li; + links=_of->links; + nlinks=_of->nlinks; + nchannels_max=1; + for(li=0;li<nlinks;li++){ + nchannels_max=OP_MAX(nchannels_max,links[li].head.channel_count); + } + } + else nchannels_max=OP_NCHANNELS_MAX; + _of->od_buffer=(op_sample *)_ogg_malloc( + sizeof(*_of->od_buffer)*nchannels_max*120*48); + if(_of->od_buffer==NULL)return OP_EFAULT; + return 0; +} + +/*Decode a single packet into the target buffer.*/ +static int op_decode(OggOpusFile *_of,op_sample *_pcm, + const ogg_packet *_op,int _nsamples,int _nchannels){ + int ret; + /*First we try using the application-provided decode callback.*/ + if(_of->decode_cb!=NULL){ +#if defined(OPUS_FIXED_POINT) + ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op, + _nsamples,_nchannels,OP_DEC_FORMAT_SHORT,_of->cur_link); +#else + ret=(*_of->decode_cb)(_of->decode_cb_ctx,_of->od,_pcm,_op, + _nsamples,_nchannels,OP_DEC_FORMAT_FLOAT,_of->cur_link); +#endif + } + else ret=OP_DEC_USE_DEFAULT; + /*If the application didn't want to handle decoding, do it ourselves.*/ + if(ret==OP_DEC_USE_DEFAULT){ +#if defined(OPUS_FIXED_POINT) + ret=opus_multistream_decode(_of->od, + _op->packet,_op->bytes,_pcm,_nsamples,0); +#else + ret=opus_multistream_decode_float(_of->od, + _op->packet,_op->bytes,_pcm,_nsamples,0); +#endif + OP_ASSERT(ret<0||ret==_nsamples); + } + /*If the application returned a positive value other than 0 or + OP_DEC_USE_DEFAULT, fail.*/ + else if(OP_UNLIKELY(ret>0))return OP_EBADPACKET; + if(OP_UNLIKELY(ret<0))return OP_EBADPACKET; + return ret; +} + +/*Read more samples from the stream, using the same API as op_read() or + op_read_float().*/ +static int op_read_native(OggOpusFile *_of, + op_sample *_pcm,int _buf_size,int *_li){ + if(OP_UNLIKELY(_of->ready_state<OP_OPENED))return OP_EINVAL; + for(;;){ + int ret; + if(OP_LIKELY(_of->ready_state>=OP_INITSET)){ + int nchannels; + int od_buffer_pos; + int nsamples; + int op_pos; + nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count; + od_buffer_pos=_of->od_buffer_pos; + nsamples=_of->od_buffer_size-od_buffer_pos; + /*If we have buffered samples, return them.*/ + if(nsamples>0){ + if(nsamples*nchannels>_buf_size)nsamples=_buf_size/nchannels; + memcpy(_pcm,_of->od_buffer+nchannels*od_buffer_pos, + sizeof(*_pcm)*nchannels*nsamples); + od_buffer_pos+=nsamples; + _of->od_buffer_pos=od_buffer_pos; + if(_li!=NULL)*_li=_of->cur_link; + return nsamples; + } + /*If we have buffered packets, decode one.*/ + op_pos=_of->op_pos; + if(OP_LIKELY(op_pos<_of->op_count)){ + const ogg_packet *pop; + ogg_int64_t diff; + opus_int32 cur_discard_count; + int duration; + int trimmed_duration; + pop=_of->op+op_pos++; + _of->op_pos=op_pos; + cur_discard_count=_of->cur_discard_count; + duration=op_get_packet_duration(pop->packet,pop->bytes); + /*We don't buffer packets with an invalid TOC sequence.*/ + OP_ASSERT(duration>0); + trimmed_duration=duration; + /*Perform end-trimming.*/ + if(OP_UNLIKELY(pop->e_o_s)){ + if(OP_UNLIKELY(op_granpos_cmp(pop->granulepos, + _of->prev_packet_gp)<=0)){ + trimmed_duration=0; + } + else if(OP_LIKELY(!op_granpos_diff(&diff, + pop->granulepos,_of->prev_packet_gp))){ + trimmed_duration=(int)OP_MIN(diff,trimmed_duration); + } + } + _of->prev_packet_gp=pop->granulepos; + if(OP_UNLIKELY(duration*nchannels>_buf_size)){ + op_sample *buf; + /*If the user's buffer is too small, decode into a scratch buffer.*/ + buf=_of->od_buffer; + if(OP_UNLIKELY(buf==NULL)){ + ret=op_init_buffer(_of); + if(OP_UNLIKELY(ret<0))return ret; + buf=_of->od_buffer; + } + ret=op_decode(_of,buf,pop,duration,nchannels); + if(OP_UNLIKELY(ret<0))return ret; + /*Perform pre-skip/pre-roll.*/ + od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count); + cur_discard_count-=od_buffer_pos; + _of->cur_discard_count=cur_discard_count; + _of->od_buffer_pos=od_buffer_pos; + _of->od_buffer_size=trimmed_duration; + /*Update bitrate tracking based on the actual samples we used from + what was decoded.*/ + _of->bytes_tracked+=pop->bytes; + _of->samples_tracked+=trimmed_duration-od_buffer_pos; + } + else{ + /*Otherwise decode directly into the user's buffer.*/ + ret=op_decode(_of,_pcm,pop,duration,nchannels); + if(OP_UNLIKELY(ret<0))return ret; + if(OP_LIKELY(trimmed_duration>0)){ + /*Perform pre-skip/pre-roll.*/ + od_buffer_pos=(int)OP_MIN(trimmed_duration,cur_discard_count); + cur_discard_count-=od_buffer_pos; + _of->cur_discard_count=cur_discard_count; + trimmed_duration-=od_buffer_pos; + if(OP_LIKELY(trimmed_duration>0) + &&OP_UNLIKELY(od_buffer_pos>0)){ + memmove(_pcm,_pcm+od_buffer_pos*nchannels, + sizeof(*_pcm)*trimmed_duration*nchannels); + } + /*Update bitrate tracking based on the actual samples we used from + what was decoded.*/ + _of->bytes_tracked+=pop->bytes; + _of->samples_tracked+=trimmed_duration; + if(OP_LIKELY(trimmed_duration>0)){ + if(_li!=NULL)*_li=_of->cur_link; + return trimmed_duration; + } + } + } + /*Don't grab another page yet. + This one might have more packets, or might have buffered data now.*/ + continue; + } + } + /*Suck in another page.*/ + ret=op_fetch_and_process_page(_of,NULL,-1,1,1,0); + if(OP_UNLIKELY(ret==OP_EOF)){ + if(_li!=NULL)*_li=_of->cur_link; + return 0; + } + if(OP_UNLIKELY(ret<0))return ret; + } +} + +/*A generic filter to apply to the decoded audio data. + _src is non-const because we will destructively modify the contents of the + source buffer that we consume in some cases.*/ +typedef int (*op_read_filter_func)(OggOpusFile *_of,void *_dst,int _dst_sz, + op_sample *_src,int _nsamples,int _nchannels); + +/*Decode some samples and then apply a custom filter to them. + This is used to convert to different output formats.*/ +static int op_filter_read_native(OggOpusFile *_of,void *_dst,int _dst_sz, + op_read_filter_func _filter,int *_li){ + int ret; + /*Ensure we have some decoded samples in our buffer.*/ + ret=op_read_native(_of,NULL,0,_li); + /*Now apply the filter to them.*/ + if(OP_LIKELY(ret>=0)&&OP_LIKELY(_of->ready_state>=OP_INITSET)){ + int od_buffer_pos; + od_buffer_pos=_of->od_buffer_pos; + ret=_of->od_buffer_size-od_buffer_pos; + if(OP_LIKELY(ret>0)){ + int nchannels; + nchannels=_of->links[_of->seekable?_of->cur_link:0].head.channel_count; + ret=(*_filter)(_of,_dst,_dst_sz, + _of->od_buffer+nchannels*od_buffer_pos,ret,nchannels); + OP_ASSERT(ret>=0); + OP_ASSERT(ret<=_of->od_buffer_size-od_buffer_pos); + od_buffer_pos+=ret; + _of->od_buffer_pos=od_buffer_pos; + } + } + return ret; +} + +#if !defined(OPUS_FIXED_POINT)||!defined(OP_DISABLE_FLOAT_API) + +/*Matrices for downmixing from the supported channel counts to stereo. + The matrices with 5 or more channels are normalized to a total volume of 2.0, + since most mixes sound too quiet if normalized to 1.0 (as there is generally + little volume in the side/rear channels).*/ +static const float OP_STEREO_DOWNMIX[OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={ + /*3.0*/ + { + {0.5858F,0.0F},{0.4142F,0.4142F},{0.0F,0.5858F} + }, + /*quadrophonic*/ + { + {0.4226F,0.0F},{0.0F,0.4226F},{0.366F,0.2114F},{0.2114F,0.336F} + }, + /*5.0*/ + { + {0.651F,0.0F},{0.46F,0.46F},{0.0F,0.651F},{0.5636F,0.3254F}, + {0.3254F,0.5636F} + }, + /*5.1*/ + { + {0.529F,0.0F},{0.3741F,0.3741F},{0.0F,0.529F},{0.4582F,0.2645F}, + {0.2645F,0.4582F},{0.3741F,0.3741F} + }, + /*6.1*/ + { + {0.4553F,0.0F},{0.322F,0.322F},{0.0F,0.4553F},{0.3943F,0.2277F}, + {0.2277F,0.3943F},{0.2788F,0.2788F},{0.322F,0.322F} + }, + /*7.1*/ + { + {0.3886F,0.0F},{0.2748F,0.2748F},{0.0F,0.3886F},{0.3366F,0.1943F}, + {0.1943F,0.3366F},{0.3366F,0.1943F},{0.1943F,0.3366F},{0.2748F,0.2748F} + } +}; + +#endif + +#if defined(OPUS_FIXED_POINT) + +/*Matrices for downmixing from the supported channel counts to stereo. + The matrices with 5 or more channels are normalized to a total volume of 2.0, + since most mixes sound too quiet if normalized to 1.0 (as there is generally + little volume in the side/rear channels). + Hence we keep the coefficients in Q14, so the downmix values won't overflow a + 32-bit number.*/ +static const opus_int16 OP_STEREO_DOWNMIX_Q14 + [OP_NCHANNELS_MAX-2][OP_NCHANNELS_MAX][2]={ + /*3.0*/ + { + {9598,0},{6786,6786},{0,9598} + }, + /*quadrophonic*/ + { + {6924,0},{0,6924},{5996,3464},{3464,5996} + }, + /*5.0*/ + { + {10666,0},{7537,7537},{0,10666},{9234,5331},{5331,9234} + }, + /*5.1*/ + { + {8668,0},{6129,6129},{0,8668},{7507,4335},{4335,7507},{6129,6129} + }, + /*6.1*/ + { + {7459,0},{5275,5275},{0,7459},{6460,3731},{3731,6460},{4568,4568}, + {5275,5275} + }, + /*7.1*/ + { + {6368,0},{4502,4502},{0,6368},{5515,3183},{3183,5515},{5515,3183}, + {3183,5515},{4502,4502} + } +}; + +int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){ + return op_read_native(_of,_pcm,_buf_size,_li); +} + +static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz, + op_sample *_src,int _nsamples,int _nchannels){ + (void)_of; + _nsamples=OP_MIN(_nsamples,_dst_sz>>1); + if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src)); + else{ + opus_int16 *dst; + int i; + dst=(opus_int16 *)_dst; + if(_nchannels==1){ + for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i]; + } + else{ + for(i=0;i<_nsamples;i++){ + opus_int32 l; + opus_int32 r; + int ci; + l=r=0; + for(ci=0;ci<_nchannels;ci++){ + opus_int32 s; + s=_src[_nchannels*i+ci]; + l+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][0]*s; + r+=OP_STEREO_DOWNMIX_Q14[_nchannels-3][ci][1]*s; + } + /*TODO: For 5 or more channels, we should do soft clipping here.*/ + dst[2*i+0]=(opus_int16)OP_CLAMP(-32768,l+8192>>14,32767); + dst[2*i+1]=(opus_int16)OP_CLAMP(-32768,r+8192>>14,32767); + } + } + } + return _nsamples; +} + +int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){ + return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL); +} + +# if !defined(OP_DISABLE_FLOAT_API) + +static int op_short2float_filter(OggOpusFile *_of,void *_dst,int _dst_sz, + op_sample *_src,int _nsamples,int _nchannels){ + float *dst; + int i; + (void)_of; + dst=(float *)_dst; + if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels; + _dst_sz=_nsamples*_nchannels; + for(i=0;i<_dst_sz;i++)dst[i]=(1.0F/32768)*_src[i]; + return _nsamples; +} + +int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){ + return op_filter_read_native(_of,_pcm,_buf_size,op_short2float_filter,_li); +} + +static int op_short2float_stereo_filter(OggOpusFile *_of, + void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){ + float *dst; + int i; + dst=(float *)_dst; + _nsamples=OP_MIN(_nsamples,_dst_sz>>1); + if(_nchannels==1){ + _nsamples=op_short2float_filter(_of,dst,_nsamples,_src,_nsamples,1); + for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i]; + } + else if(_nchannels<5){ + /*For 3 or 4 channels, we can downmix in fixed point without risk of + clipping.*/ + if(_nchannels>2){ + _nsamples=op_stereo_filter(_of,_src,_nsamples*2, + _src,_nsamples,_nchannels); + } + return op_short2float_filter(_of,dst,_dst_sz,_src,_nsamples,2); + } + else{ + /*For 5 or more channels, we convert to floats and then downmix (so that we + don't risk clipping).*/ + for(i=0;i<_nsamples;i++){ + float l; + float r; + int ci; + l=r=0; + for(ci=0;ci<_nchannels;ci++){ + float s; + s=(1.0F/32768)*_src[_nchannels*i+ci]; + l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*s; + r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*s; + } + dst[2*i+0]=l; + dst[2*i+1]=r; + } + } + return _nsamples; +} + +int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){ + return op_filter_read_native(_of,_pcm,_buf_size, + op_short2float_stereo_filter,NULL); +} + +# endif + +#else + +# if defined(OP_HAVE_LRINTF) +# include <math.h> +# define op_float2int(_x) (lrintf(_x)) +# else +# define op_float2int(_x) ((int)((_x)+((_x)<0?-0.5F:0.5F))) +# endif + +/*The dithering code here is adapted from opusdec, part of opus-tools. + It was originally written by Greg Maxwell.*/ + +static opus_uint32 op_rand(opus_uint32 _seed){ + return _seed*96314165+907633515&0xFFFFFFFFU; +} + +/*This implements 16-bit quantization with full triangular dither and IIR noise + shaping. + The noise shaping filters were designed by Sebastian Gesemann, and are based + on the LAME ATH curves with flattening to limit their peak gain to 20 dB. + Everyone else's noise shaping filters are mildly crazy. + The 48 kHz version of this filter is just a warped version of the 44.1 kHz + filter and probably could be improved by shifting the HF shelf up in + frequency a little bit, since 48 kHz has a bit more room and being more + conservative against bat-ears is probably more important than more noise + suppression. + This process can increase the peak level of the signal (in theory by the peak + error of 1.5 +20 dB, though that is unobservably rare). + To avoid clipping, the signal is attenuated by a couple thousandths of a dB. + Initially, the approach taken here was to only attenuate by the 99.9th + percentile, making clipping rare but not impossible (like SoX), but the + limited gain of the filter means that the worst case was only two + thousandths of a dB more, so this just uses the worst case. + The attenuation is probably also helpful to prevent clipping in the DAC + reconstruction filters or downstream resampling, in any case.*/ + +# define OP_GAIN (32753.0F) + +# define OP_PRNG_GAIN (1.0F/0xFFFFFFFF) + +/*48 kHz noise shaping filter, sd=2.34.*/ + +static const float OP_FCOEF_B[4]={ + 2.2374F,-0.7339F,-0.1251F,-0.6033F +}; + +static const float OP_FCOEF_A[4]={ + 0.9030F,0.0116F,-0.5853F,-0.2571F +}; + +static int op_float2short_filter(OggOpusFile *_of,void *_dst,int _dst_sz, + float *_src,int _nsamples,int _nchannels){ + opus_int16 *dst; + int ci; + int i; + dst=(opus_int16 *)_dst; + if(OP_UNLIKELY(_nsamples*_nchannels>_dst_sz))_nsamples=_dst_sz/_nchannels; +# if defined(OP_SOFT_CLIP) + if(_of->state_channel_count!=_nchannels){ + for(ci=0;ci<_nchannels;ci++)_of->clip_state[ci]=0; + } + opus_pcm_soft_clip(_src,_nsamples,_nchannels,_of->clip_state); +# endif + if(_of->dither_disabled){ + for(i=0;i<_nchannels*_nsamples;i++){ + dst[i]=op_float2int(OP_CLAMP(-32768,32768.0F*_src[i],32767)); + } + } + else{ + opus_uint32 seed; + int mute; + seed=_of->dither_seed; + mute=_of->dither_mute; + if(_of->state_channel_count!=_nchannels)mute=65; + /*In order to avoid replacing digital silence with quiet dither noise, we + mute if the output has been silent for a while.*/ + if(mute>64)memset(_of->dither_a,0,sizeof(*_of->dither_a)*4*_nchannels); + for(i=0;i<_nsamples;i++){ + int silent; + silent=1; + for(ci=0;ci<_nchannels;ci++){ + float r; + float s; + float err; + int si; + int j; + s=_src[_nchannels*i+ci]; + silent&=s==0; + s*=OP_GAIN; + err=0; + for(j=0;j<4;j++){ + err+=OP_FCOEF_B[j]*_of->dither_b[ci*4+j] + -OP_FCOEF_A[j]*_of->dither_a[ci*4+j]; + } + for(j=3;j-->0;)_of->dither_a[ci*4+j+1]=_of->dither_a[ci*4+j]; + for(j=3;j-->0;)_of->dither_b[ci*4+j+1]=_of->dither_b[ci*4+j]; + _of->dither_a[ci*4]=err; + s-=err; + if(mute>16)r=0; + else{ + seed=op_rand(seed); + r=seed*OP_PRNG_GAIN; + seed=op_rand(seed); + r-=seed*OP_PRNG_GAIN; + } + /*Clamp in float out of paranoia that the input will be > 96 dBFS and + wrap if the integer is clamped.*/ + si=op_float2int(OP_CLAMP(-32768,s+r,32767)); + dst[_nchannels*i+ci]=(opus_int16)si; + /*Including clipping in the noise shaping is generally disastrous: the + futile effort to restore the clipped energy results in more clipping. + However, small amounts---at the level which could normally be created + by dither and rounding---are harmless and can even reduce clipping + somewhat due to the clipping sometimes reducing the dither + rounding + error.*/ + _of->dither_b[ci*4]=mute>16?0:OP_CLAMP(-1.5F,si-s,1.5F); + } + mute++; + if(!silent)mute=0; + } + _of->dither_mute=OP_MIN(mute,65); + _of->dither_seed=seed; + } + _of->state_channel_count=_nchannels; + return _nsamples; +} + +int op_read(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size,int *_li){ + return op_filter_read_native(_of,_pcm,_buf_size,op_float2short_filter,_li); +} + +int op_read_float(OggOpusFile *_of,float *_pcm,int _buf_size,int *_li){ + _of->state_channel_count=0; + return op_read_native(_of,_pcm,_buf_size,_li); +} + +static int op_stereo_filter(OggOpusFile *_of,void *_dst,int _dst_sz, + op_sample *_src,int _nsamples,int _nchannels){ + (void)_of; + _nsamples=OP_MIN(_nsamples,_dst_sz>>1); + if(_nchannels==2)memcpy(_dst,_src,_nsamples*2*sizeof(*_src)); + else{ + float *dst; + int i; + dst=(float *)_dst; + if(_nchannels==1){ + for(i=0;i<_nsamples;i++)dst[2*i+0]=dst[2*i+1]=_src[i]; + } + else{ + for(i=0;i<_nsamples;i++){ + float l; + float r; + int ci; + l=r=0; + for(ci=0;ci<_nchannels;ci++){ + l+=OP_STEREO_DOWNMIX[_nchannels-3][ci][0]*_src[_nchannels*i+ci]; + r+=OP_STEREO_DOWNMIX[_nchannels-3][ci][1]*_src[_nchannels*i+ci]; + } + dst[2*i+0]=l; + dst[2*i+1]=r; + } + } + } + return _nsamples; +} + +static int op_float2short_stereo_filter(OggOpusFile *_of, + void *_dst,int _dst_sz,op_sample *_src,int _nsamples,int _nchannels){ + opus_int16 *dst; + dst=(opus_int16 *)_dst; + if(_nchannels==1){ + int i; + _nsamples=op_float2short_filter(_of,dst,_dst_sz>>1,_src,_nsamples,1); + for(i=_nsamples;i-->0;)dst[2*i+0]=dst[2*i+1]=dst[i]; + } + else{ + if(_nchannels>2){ + _nsamples=OP_MIN(_nsamples,_dst_sz>>1); + _nsamples=op_stereo_filter(_of,_src,_nsamples*2, + _src,_nsamples,_nchannels); + } + _nsamples=op_float2short_filter(_of,dst,_dst_sz,_src,_nsamples,2); + } + return _nsamples; +} + +int op_read_stereo(OggOpusFile *_of,opus_int16 *_pcm,int _buf_size){ + return op_filter_read_native(_of,_pcm,_buf_size, + op_float2short_stereo_filter,NULL); +} + +int op_read_float_stereo(OggOpusFile *_of,float *_pcm,int _buf_size){ + _of->state_channel_count=0; + return op_filter_read_native(_of,_pcm,_buf_size,op_stereo_filter,NULL); +} + +#endif diff --git a/drivers/opus/opusfile.h b/drivers/opus/opusfile.h new file mode 100644 index 0000000000..91d06aa9ba --- /dev/null +++ b/drivers/opus/opusfile.h @@ -0,0 +1,2102 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: stdio-based convenience library for opening/seeking/decoding + last mod: $Id: vorbisfile.h 17182 2010-04-29 03:48:32Z xiphmont $ + + ********************************************************************/ +#if !defined(_opusfile_h) +# define _opusfile_h (1) + +/**\mainpage + \section Introduction + + This is the documentation for the <tt>libopusfile</tt> C API. + + The <tt>libopusfile</tt> package provides a convenient high-level API for + decoding and basic manipulation of all Ogg Opus audio streams. + <tt>libopusfile</tt> is implemented as a layer on top of Xiph.Org's + reference + <tt><a href="https://www.xiph.org/ogg/doc/libogg/reference.html">libogg</a></tt> + and + <tt><a href="https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/index.html">libopus</a></tt> + libraries. + + <tt>libopusfile</tt> provides several sets of built-in routines for + file/stream access, and may also use custom stream I/O routines provided by + the embedded environment. + There are built-in I/O routines provided for ANSI-compliant + <code>stdio</code> (<code>FILE *</code>), memory buffers, and URLs + (including <file:> URLs, plus optionally <http:> and <https:> URLs). + + \section Organization + + The main API is divided into several sections: + - \ref stream_open_close + - \ref stream_info + - \ref stream_decoding + - \ref stream_seeking + + Several additional sections are not tied to the main API. + - \ref stream_callbacks + - \ref header_info + - \ref error_codes + + \section Overview + + The <tt>libopusfile</tt> API always decodes files to 48 kHz. + The original sample rate is not preserved by the lossy compression, though + it is stored in the header to allow you to resample to it after decoding + (the <tt>libopusfile</tt> API does not currently provide a resampler, + but the + <a href="http://www.speex.org/docs/manual/speex-manual/node7.html#SECTION00760000000000000000">the + Speex resampler</a> is a good choice if you need one). + In general, if you are playing back the audio, you should leave it at + 48 kHz, provided your audio hardware supports it. + When decoding to a file, it may be worth resampling back to the original + sample rate, so as not to surprise users who might not expect the sample + rate to change after encoding to Opus and decoding. + + Opus files can contain anywhere from 1 to 255 channels of audio. + The channel mappings for up to 8 channels are the same as the + <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis + mappings</a>. + A special stereo API can convert everything to 2 channels, making it simple + to support multichannel files in an application which only has stereo + output. + Although the <tt>libopusfile</tt> ABI provides support for the theoretical + maximum number of channels, the current implementation does not support + files with more than 8 channels, as they do not have well-defined channel + mappings. + + Like all Ogg files, Opus files may be "chained". + That is, multiple Opus files may be combined into a single, longer file just + by concatenating the original files. + This is commonly done in internet radio streaming, as it allows the title + and artist to be updated each time the song changes, since each link in the + chain includes its own set of metadata. + + <tt>libopusfile</tt> fully supports chained files. + It will decode the first Opus stream found in each link of a chained file + (ignoring any other streams that might be concurrently multiplexed with it, + such as a video stream). + + The channel count can also change between links. + If your application is not prepared to deal with this, it can use the stereo + API to ensure the audio from all links will always get decoded into a + common format. + Since <tt>libopusfile</tt> always decodes to 48 kHz, you do not have to + worry about the sample rate changing between links (as was possible with + Vorbis). + This makes application support for chained files with <tt>libopusfile</tt> + very easy.*/ + +# if defined(__cplusplus) +extern "C" { +# endif + +# include <stdarg.h> +# include <stdio.h> +# include <ogg/ogg.h> +# include <opus/opus_multistream.h> + +/**@cond PRIVATE*/ + +/*Enable special features for gcc and gcc-compatible compilers.*/ +# if !defined(OP_GNUC_PREREQ) +# if defined(__GNUC__)&&defined(__GNUC_MINOR__) +# define OP_GNUC_PREREQ(_maj,_min) \ + ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min)) +# else +# define OP_GNUC_PREREQ(_maj,_min) 0 +# endif +# endif + +# if OP_GNUC_PREREQ(4,0) +# pragma GCC visibility push(default) +# endif + +typedef struct OpusHead OpusHead; +typedef struct OpusTags OpusTags; +typedef struct OpusPictureTag OpusPictureTag; +typedef struct OpusServerInfo OpusServerInfo; +typedef struct OpusFileCallbacks OpusFileCallbacks; +typedef struct OggOpusFile OggOpusFile; + +/*Warning attributes for libopusfile functions.*/ +# if OP_GNUC_PREREQ(3,4) +# define OP_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# else +# define OP_WARN_UNUSED_RESULT +# endif +# if OP_GNUC_PREREQ(3,4) +# define OP_ARG_NONNULL(_x) __attribute__((__nonnull__(_x))) +# else +# define OP_ARG_NONNULL(_x) +# endif + +/**@endcond*/ + +/**\defgroup error_codes Error Codes*/ +/*@{*/ +/**\name List of possible error codes + Many of the functions in this library return a negative error code when a + function fails. + This list provides a brief explanation of the common errors. + See each individual function for more details on what a specific error code + means in that context.*/ +/*@{*/ + +/**A request did not succeed.*/ +#define OP_FALSE (-1) +/*Currently not used externally.*/ +#define OP_EOF (-2) +/**There was a hole in the page sequence numbers (e.g., a page was corrupt or + missing).*/ +#define OP_HOLE (-3) +/**An underlying read, seek, or tell operation failed when it should have + succeeded.*/ +#define OP_EREAD (-128) +/**A <code>NULL</code> pointer was passed where one was unexpected, or an + internal memory allocation failed, or an internal library error was + encountered.*/ +#define OP_EFAULT (-129) +/**The stream used a feature that is not implemented, such as an unsupported + channel family.*/ +#define OP_EIMPL (-130) +/**One or more parameters to a function were invalid.*/ +#define OP_EINVAL (-131) +/**A purported Ogg Opus stream did not begin with an Ogg page, a purported + header packet did not start with one of the required strings, "OpusHead" or + "OpusTags", or a link in a chained file was encountered that did not + contain any logical Opus streams.*/ +#define OP_ENOTFORMAT (-132) +/**A required header packet was not properly formatted, contained illegal + values, or was missing altogether.*/ +#define OP_EBADHEADER (-133) +/**The ID header contained an unrecognized version number.*/ +#define OP_EVERSION (-134) +/*Currently not used at all.*/ +#define OP_ENOTAUDIO (-135) +/**An audio packet failed to decode properly. + This is usually caused by a multistream Ogg packet where the durations of + the individual Opus packets contained in it are not all the same.*/ +#define OP_EBADPACKET (-136) +/**We failed to find data we had seen before, or the bitstream structure was + sufficiently malformed that seeking to the target destination was + impossible.*/ +#define OP_EBADLINK (-137) +/**An operation that requires seeking was requested on an unseekable stream.*/ +#define OP_ENOSEEK (-138) +/**The first or last granule position of a link failed basic validity checks.*/ +#define OP_EBADTIMESTAMP (-139) + +/*@}*/ +/*@}*/ + +/**\defgroup header_info Header Information*/ +/*@{*/ + +/**The maximum number of channels in an Ogg Opus stream.*/ +#define OPUS_CHANNEL_COUNT_MAX (255) + +/**Ogg Opus bitstream information. + This contains the basic playback parameters for a stream, and corresponds to + the initial ID header packet of an Ogg Opus stream.*/ +struct OpusHead{ + /**The Ogg Opus format version, in the range 0...255. + The top 4 bits represent a "major" version, and the bottom four bits + represent backwards-compatible "minor" revisions. + The current specification describes version 1. + This library will recognize versions up through 15 as backwards compatible + with the current specification. + An earlier draft of the specification described a version 0, but the only + difference between version 1 and version 0 is that version 0 did + not specify the semantics for handling the version field.*/ + int version; + /**The number of channels, in the range 1...255.*/ + int channel_count; + /**The number of samples that should be discarded from the beginning of the + stream.*/ + unsigned pre_skip; + /**The sampling rate of the original input. + All Opus audio is coded at 48 kHz, and should also be decoded at 48 kHz + for playback (unless the target hardware does not support this sampling + rate). + However, this field may be used to resample the audio back to the original + sampling rate, for example, when saving the output to a file.*/ + opus_uint32 input_sample_rate; + /**The gain to apply to the decoded output, in dB, as a Q8 value in the range + -32768...32767. + The <tt>libopusfile</tt> API will automatically apply this gain to the + decoded output before returning it, scaling it by + <code>pow(10,output_gain/(20.0*256))</code>.*/ + int output_gain; + /**The channel mapping family, in the range 0...255. + Channel mapping family 0 covers mono or stereo in a single stream. + Channel mapping family 1 covers 1 to 8 channels in one or more streams, + using the Vorbis speaker assignments. + Channel mapping family 255 covers 1 to 255 channels in one or more + streams, but without any defined speaker assignment.*/ + int mapping_family; + /**The number of Opus streams in each Ogg packet, in the range 1...255.*/ + int stream_count; + /**The number of coupled Opus streams in each Ogg packet, in the range + 0...127. + This must satisfy <code>0 <= coupled_count <= stream_count</code> and + <code>coupled_count + stream_count <= 255</code>. + The coupled streams appear first, before all uncoupled streams, in an Ogg + Opus packet.*/ + int coupled_count; + /**The mapping from coded stream channels to output channels. + Let <code>index=mapping[k]</code> be the value for channel <code>k</code>. + If <code>index<2*coupled_count</code>, then it refers to the left channel + from stream <code>(index/2)</code> if even, and the right channel from + stream <code>(index/2)</code> if odd. + Otherwise, it refers to the output of the uncoupled stream + <code>(index-coupled_count)</code>.*/ + unsigned char mapping[OPUS_CHANNEL_COUNT_MAX]; +}; + +/**The metadata from an Ogg Opus stream. + + This structure holds the in-stream metadata corresponding to the 'comment' + header packet of an Ogg Opus stream. + The comment header is meant to be used much like someone jotting a quick + note on the label of a CD. + It should be a short, to the point text note that can be more than a couple + words, but not more than a short paragraph. + + The metadata is stored as a series of (tag, value) pairs, in length-encoded + string vectors, using the same format as Vorbis (without the final "framing + bit"), Theora, and Speex, except for the packet header. + The first occurrence of the '=' character delimits the tag and value. + A particular tag may occur more than once, and order is significant. + The character set encoding for the strings is always UTF-8, but the tag + names are limited to ASCII, and treated as case-insensitive. + See <a href="http://www.xiph.org/vorbis/doc/v-comment.html">the Vorbis + comment header specification</a> for details. + + In filling in this structure, <tt>libopusfile</tt> will null-terminate the + #user_comments strings for safety. + However, the bitstream format itself treats them as 8-bit clean vectors, + possibly containing NUL characters, so the #comment_lengths array should be + treated as their authoritative length. + + This structure is binary and source-compatible with a + <code>vorbis_comment</code>, and pointers to it may be freely cast to + <code>vorbis_comment</code> pointers, and vice versa. + It is provided as a separate type to avoid introducing a compile-time + dependency on the libvorbis headers.*/ +struct OpusTags{ + /**The array of comment string vectors.*/ + char **user_comments; + /**An array of the corresponding length of each vector, in bytes.*/ + int *comment_lengths; + /**The total number of comment streams.*/ + int comments; + /**The null-terminated vendor string. + This identifies the software used to encode the stream.*/ + char *vendor; +}; + +/**\name Picture tag image formats*/ +/*@{*/ + +/**The MIME type was not recognized, or the image data did not match the + declared MIME type.*/ +#define OP_PIC_FORMAT_UNKNOWN (-1) +/**The MIME type indicates the image data is really a URL.*/ +#define OP_PIC_FORMAT_URL (0) +/**The image is a JPEG.*/ +#define OP_PIC_FORMAT_JPEG (1) +/**The image is a PNG.*/ +#define OP_PIC_FORMAT_PNG (2) +/**The image is a GIF.*/ +#define OP_PIC_FORMAT_GIF (3) + +/*@}*/ + +/**The contents of a METADATA_BLOCK_PICTURE tag.*/ +struct OpusPictureTag{ + /**The picture type according to the ID3v2 APIC frame: + <ol start="0"> + <li>Other</li> + <li>32x32 pixels 'file icon' (PNG only)</li> + <li>Other file icon</li> + <li>Cover (front)</li> + <li>Cover (back)</li> + <li>Leaflet page</li> + <li>Media (e.g. label side of CD)</li> + <li>Lead artist/lead performer/soloist</li> + <li>Artist/performer</li> + <li>Conductor</li> + <li>Band/Orchestra</li> + <li>Composer</li> + <li>Lyricist/text writer</li> + <li>Recording Location</li> + <li>During recording</li> + <li>During performance</li> + <li>Movie/video screen capture</li> + <li>A bright colored fish</li> + <li>Illustration</li> + <li>Band/artist logotype</li> + <li>Publisher/Studio logotype</li> + </ol> + Others are reserved and should not be used. + There may only be one each of picture type 1 and 2 in a file.*/ + opus_int32 type; + /**The MIME type of the picture, in printable ASCII characters 0x20-0x7E. + The MIME type may also be <code>"-->"</code> to signify that the data part + is a URL pointing to the picture instead of the picture data itself. + In this case, a terminating NUL is appended to the URL string in #data, + but #data_length is set to the length of the string excluding that + terminating NUL.*/ + char *mime_type; + /**The description of the picture, in UTF-8.*/ + char *description; + /**The width of the picture in pixels.*/ + opus_uint32 width; + /**The height of the picture in pixels.*/ + opus_uint32 height; + /**The color depth of the picture in bits-per-pixel (<em>not</em> + bits-per-channel).*/ + opus_uint32 depth; + /**For indexed-color pictures (e.g., GIF), the number of colors used, or 0 + for non-indexed pictures.*/ + opus_uint32 colors; + /**The length of the picture data in bytes.*/ + opus_uint32 data_length; + /**The binary picture data.*/ + unsigned char *data; + /**The format of the picture data, if known. + One of + <ul> + <li>#OP_PIC_FORMAT_UNKNOWN,</li> + <li>#OP_PIC_FORMAT_URL,</li> + <li>#OP_PIC_FORMAT_JPEG,</li> + <li>#OP_PIC_FORMAT_PNG, or</li> + <li>#OP_PIC_FORMAT_GIF.</li> + </ul>*/ + int format; +}; + +/**\name Functions for manipulating header data + + These functions manipulate the #OpusHead and #OpusTags structures, + which describe the audio parameters and tag-value metadata, respectively. + These can be used to query the headers returned by <tt>libopusfile</tt>, or + to parse Opus headers from sources other than an Ogg Opus stream, provided + they use the same format.*/ +/*@{*/ + +/**Parses the contents of the ID header packet of an Ogg Opus stream. + \param[out] _head Returns the contents of the parsed packet. + The contents of this structure are untouched on error. + This may be <code>NULL</code> to merely test the header + for validity. + \param[in] _data The contents of the ID header packet. + \param _len The number of bytes of data in the ID header packet. + \return 0 on success or a negative value on error. + \retval #OP_ENOTFORMAT If the data does not start with the "OpusHead" + string. + \retval #OP_EVERSION If the version field signaled a version this library + does not know how to parse. + \retval #OP_EIMPL If the channel mapping family was 255, which general + purpose players should not attempt to play. + \retval #OP_EBADHEADER If the contents of the packet otherwise violate the + Ogg Opus specification: + <ul> + <li>Insufficient data,</li> + <li>Too much data for the known minor versions,</li> + <li>An unrecognized channel mapping family,</li> + <li>Zero channels or too many channels,</li> + <li>Zero coded streams,</li> + <li>Too many coupled streams, or</li> + <li>An invalid channel mapping index.</li> + </ul>*/ +OP_WARN_UNUSED_RESULT int opus_head_parse(OpusHead *_head, + const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2); + +/**Converts a granule position to a sample offset for a given Ogg Opus stream. + The sample offset is simply <code>_gp-_head->pre_skip</code>. + Granule position values smaller than OpusHead#pre_skip correspond to audio + that should never be played, and thus have no associated sample offset. + This function returns -1 for such values. + This function also correctly handles extremely large granule positions, + which may have wrapped around to a negative number when stored in a signed + ogg_int64_t value. + \param _head The #OpusHead information from the ID header of the stream. + \param _gp The granule position to convert. + \return The sample offset associated with the given granule position + (counting at a 48 kHz sampling rate), or the special value -1 on + error (i.e., the granule position was smaller than the pre-skip + amount).*/ +ogg_int64_t opus_granule_sample(const OpusHead *_head,ogg_int64_t _gp) + OP_ARG_NONNULL(1); + +/**Parses the contents of the 'comment' header packet of an Ogg Opus stream. + \param[out] _tags An uninitialized #OpusTags structure. + This returns the contents of the parsed packet. + The contents of this structure are untouched on error. + This may be <code>NULL</code> to merely test the header + for validity. + \param[in] _data The contents of the 'comment' header packet. + \param _len The number of bytes of data in the 'info' header packet. + \retval 0 Success. + \retval #OP_ENOTFORMAT If the data does not start with the "OpusTags" + string. + \retval #OP_EBADHEADER If the contents of the packet otherwise violate the + Ogg Opus specification. + \retval #OP_EFAULT If there wasn't enough memory to store the tags.*/ +OP_WARN_UNUSED_RESULT int opus_tags_parse(OpusTags *_tags, + const unsigned char *_data,size_t _len) OP_ARG_NONNULL(2); + +/**Performs a deep copy of an #OpusTags structure. + \param _dst The #OpusTags structure to copy into. + If this function fails, the contents of this structure remain + untouched. + \param _src The #OpusTags structure to copy from. + \retval 0 Success. + \retval #OP_EFAULT If there wasn't enough memory to copy the tags.*/ +int opus_tags_copy(OpusTags *_dst,const OpusTags *_src) OP_ARG_NONNULL(1); + +/**Initializes an #OpusTags structure. + This should be called on a freshly allocated #OpusTags structure before + attempting to use it. + \param _tags The #OpusTags structure to initialize.*/ +void opus_tags_init(OpusTags *_tags) OP_ARG_NONNULL(1); + +/**Add a (tag, value) pair to an initialized #OpusTags structure. + \note Neither opus_tags_add() nor opus_tags_add_comment() support values + containing embedded NULs, although the bitstream format does support them. + To add such tags, you will need to manipulate the #OpusTags structure + directly. + \param _tags The #OpusTags structure to add the (tag, value) pair to. + \param _tag A NUL-terminated, case-insensitive, ASCII string containing + the tag to add (without an '=' character). + \param _value A NUL-terminated UTF-8 containing the corresponding value. + \return 0 on success, or a negative value on failure. + \retval #OP_EFAULT An internal memory allocation failed.*/ +int opus_tags_add(OpusTags *_tags,const char *_tag,const char *_value) + OP_ARG_NONNULL(1) OP_ARG_NONNULL(2) OP_ARG_NONNULL(3); + +/**Add a comment to an initialized #OpusTags structure. + \note Neither opus_tags_add_comment() nor opus_tags_add() support comments + containing embedded NULs, although the bitstream format does support them. + To add such tags, you will need to manipulate the #OpusTags structure + directly. + \param _tags The #OpusTags structure to add the comment to. + \param _comment A NUL-terminated UTF-8 string containing the comment in + "TAG=value" form. + \return 0 on success, or a negative value on failure. + \retval #OP_EFAULT An internal memory allocation failed.*/ +int opus_tags_add_comment(OpusTags *_tags,const char *_comment) + OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); + +/**Look up a comment value by its tag. + \param _tags An initialized #OpusTags structure. + \param _tag The tag to look up. + \param _count The instance of the tag. + The same tag can appear multiple times, each with a distinct + value, so an index is required to retrieve them all. + The order in which these values appear is significant and + should be preserved. + Use opus_tags_query_count() to get the legal range for the + \a _count parameter. + \return A pointer to the queried tag's value. + This points directly to data in the #OpusTags structure. + It should not be modified or freed by the application, and + modifications to the structure may invalidate the pointer. + \retval NULL If no matching tag is found.*/ +const char *opus_tags_query(const OpusTags *_tags,const char *_tag,int _count) + OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); + +/**Look up the number of instances of a tag. + Call this first when querying for a specific tag and then iterate over the + number of instances with separate calls to opus_tags_query() to retrieve + all the values for that tag in order. + \param _tags An initialized #OpusTags structure. + \param _tag The tag to look up. + \return The number of instances of this particular tag.*/ +int opus_tags_query_count(const OpusTags *_tags,const char *_tag) + OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); + +/**Get the track gain from an R128_TRACK_GAIN tag, if one was specified. + This searches for the first R128_TRACK_GAIN tag with a valid signed, + 16-bit decimal integer value and returns the value. + This routine is exposed merely for convenience for applications which wish + to do something special with the track gain (i.e., display it). + If you simply wish to apply the track gain instead of the header gain, you + can use op_set_gain_offset() with an #OP_TRACK_GAIN type and no offset. + \param _tags An initialized #OpusTags structure. + \param[out] _gain_q8 The track gain, in 1/256ths of a dB. + This will lie in the range [-32768,32767], and should + be applied in <em>addition</em> to the header gain. + On error, no value is returned, and the previous + contents remain unchanged. + \return 0 on success, or a negative value on error. + \retval #OP_FALSE There was no track gain available in the given tags.*/ +int opus_tags_get_track_gain(const OpusTags *_tags,int *_gain_q8) + OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); + +/**Clears the #OpusTags structure. + This should be called on an #OpusTags structure after it is no longer + needed. + It will free all memory used by the structure members. + \param _tags The #OpusTags structure to clear.*/ +void opus_tags_clear(OpusTags *_tags) OP_ARG_NONNULL(1); + +/**Check if \a _comment is an instance of a \a _tag_name tag. + \see opus_tagncompare + \param _tag_name A NUL-terminated, case-insensitive, ASCII string containing + the name of the tag to check for (without the terminating + '=' character). + \param _comment The comment string to check. + \return An integer less than, equal to, or greater than zero if \a _comment + is found respectively, to be less than, to match, or be greater + than a "tag=value" string whose tag matches \a _tag_name.*/ +int opus_tagcompare(const char *_tag_name,const char *_comment); + +/**Check if \a _comment is an instance of a \a _tag_name tag. + This version is slightly more efficient than opus_tagcompare() if the length + of the tag name is already known (e.g., because it is a constant). + \see opus_tagcompare + \param _tag_name A case-insensitive ASCII string containing the name of the + tag to check for (without the terminating '=' character). + \param _tag_len The number of characters in the tag name. + This must be non-negative. + \param _comment The comment string to check. + \return An integer less than, equal to, or greater than zero if \a _comment + is found respectively, to be less than, to match, or be greater + than a "tag=value" string whose tag matches the first \a _tag_len + characters of \a _tag_name.*/ +int opus_tagncompare(const char *_tag_name,int _tag_len,const char *_comment); + +/**Parse a single METADATA_BLOCK_PICTURE tag. + This decodes the BASE64-encoded content of the tag and returns a structure + with the MIME type, description, image parameters (if known), and the + compressed image data. + If the MIME type indicates the presence of an image format we recognize + (JPEG, PNG, or GIF) and the actual image data contains the magic signature + associated with that format, then the OpusPictureTag::format field will be + set to the corresponding format. + This is provided as a convenience to avoid requiring applications to parse + the MIME type and/or do their own format detection for the commonly used + formats. + In this case, we also attempt to extract the image parameters directly from + the image data (overriding any that were present in the tag, which the + specification says applications are not meant to rely on). + The application must still provide its own support for actually decoding the + image data and, if applicable, retrieving that data from URLs. + \param[out] _pic Returns the parsed picture data. + No sanitation is done on the type, MIME type, or + description fields, so these might return invalid values. + The contents of this structure are left unmodified on + failure. + \param _tag The METADATA_BLOCK_PICTURE tag contents. + The leading "METADATA_BLOCK_PICTURE=" portion is optional, + to allow the function to be used on either directly on the + values in OpusTags::user_comments or on the return value + of opus_tags_query(). + \return 0 on success or a negative value on error. + \retval #OP_ENOTFORMAT The METADATA_BLOCK_PICTURE contents were not valid. + \retval #OP_EFAULT There was not enough memory to store the picture tag + contents.*/ +OP_WARN_UNUSED_RESULT int opus_picture_tag_parse(OpusPictureTag *_pic, + const char *_tag) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); + +/**Initializes an #OpusPictureTag structure. + This should be called on a freshly allocated #OpusPictureTag structure + before attempting to use it. + \param _pic The #OpusPictureTag structure to initialize.*/ +void opus_picture_tag_init(OpusPictureTag *_pic) OP_ARG_NONNULL(1); + +/**Clears the #OpusPictureTag structure. + This should be called on an #OpusPictureTag structure after it is no longer + needed. + It will free all memory used by the structure members. + \param _pic The #OpusPictureTag structure to clear.*/ +void opus_picture_tag_clear(OpusPictureTag *_pic) OP_ARG_NONNULL(1); + +/*@}*/ + +/*@}*/ + +/**\defgroup url_options URL Reading Options*/ +/*@{*/ +/**\name URL reading options + Options for op_url_stream_create() and associated functions. + These allow you to provide proxy configuration parameters, skip SSL + certificate checks, etc. + Options are processed in order, and if the same option is passed multiple + times, only the value specified by the last occurrence has an effect + (unless otherwise specified). + They may be expanded in the future.*/ +/*@{*/ + +/**@cond PRIVATE*/ + +/*These are the raw numbers used to define the request codes. + They should not be used directly.*/ +#define OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST (6464) +#define OP_HTTP_PROXY_HOST_REQUEST (6528) +#define OP_HTTP_PROXY_PORT_REQUEST (6592) +#define OP_HTTP_PROXY_USER_REQUEST (6656) +#define OP_HTTP_PROXY_PASS_REQUEST (6720) +#define OP_GET_SERVER_INFO_REQUEST (6784) + +#define OP_URL_OPT(_request) ((_request)+(char *)0) + +/*These macros trigger compilation errors or warnings if the wrong types are + provided to one of the URL options.*/ +#define OP_CHECK_INT(_x) ((void)((_x)==(opus_int32)0),(opus_int32)(_x)) +#define OP_CHECK_CONST_CHAR_PTR(_x) ((_x)+((_x)-(const char *)(_x))) +#define OP_CHECK_SERVER_INFO_PTR(_x) ((_x)+((_x)-(OpusServerInfo *)(_x))) + +/**@endcond*/ + +/**HTTP/Shoutcast/Icecast server information associated with a URL.*/ +struct OpusServerInfo{ + /**The name of the server (icy-name/ice-name). + This is <code>NULL</code> if there was no <code>icy-name</code> or + <code>ice-name</code> header.*/ + char *name; + /**A short description of the server (icy-description/ice-description). + This is <code>NULL</code> if there was no <code>icy-description</code> or + <code>ice-description</code> header.*/ + char *description; + /**The genre the server falls under (icy-genre/ice-genre). + This is <code>NULL</code> if there was no <code>icy-genre</code> or + <code>ice-genre</code> header.*/ + char *genre; + /**The homepage for the server (icy-url/ice-url). + This is <code>NULL</code> if there was no <code>icy-url</code> or + <code>ice-url</code> header.*/ + char *url; + /**The software used by the origin server (Server). + This is <code>NULL</code> if there was no <code>Server</code> header.*/ + char *server; + /**The media type of the entity sent to the recepient (Content-Type). + This is <code>NULL</code> if there was no <code>Content-Type</code> + header.*/ + char *content_type; + /**The nominal stream bitrate in kbps (icy-br/ice-bitrate). + This is <code>-1</code> if there was no <code>icy-br</code> or + <code>ice-bitrate</code> header.*/ + opus_int32 bitrate_kbps; + /**Flag indicating whether the server is public (<code>1</code>) or not + (<code>0</code>) (icy-pub/ice-public). + This is <code>-1</code> if there was no <code>icy-pub</code> or + <code>ice-public</code> header.*/ + int is_public; + /**Flag indicating whether the server is using HTTPS instead of HTTP. + This is <code>0</code> unless HTTPS is being used. + This may not match the protocol used in the original URL if there were + redirections.*/ + int is_ssl; +}; + +/**Initializes an #OpusServerInfo structure. + All fields are set as if the corresponding header was not available. + \param _info The #OpusServerInfo structure to initialize. + \note If you use this function, you must link against <tt>libopusurl</tt>.*/ +void opus_server_info_init(OpusServerInfo *_info) OP_ARG_NONNULL(1); + +/**Clears the #OpusServerInfo structure. + This should be called on an #OpusServerInfo structure after it is no longer + needed. + It will free all memory used by the structure members. + \param _info The #OpusServerInfo structure to clear. + \note If you use this function, you must link against <tt>libopusurl</tt>.*/ +void opus_server_info_clear(OpusServerInfo *_info) OP_ARG_NONNULL(1); + +/**Skip the certificate check when connecting via TLS/SSL (https). + \param _b <code>opus_int32</code>: Whether or not to skip the certificate + check. + The check will be skipped if \a _b is non-zero, and will not be + skipped if \a _b is zero. + \hideinitializer*/ +#define OP_SSL_SKIP_CERTIFICATE_CHECK(_b) \ + OP_URL_OPT(OP_SSL_SKIP_CERTIFICATE_CHECK_REQUEST),OP_CHECK_INT(_b) + +/**Proxy connections through the given host. + If no port is specified via #OP_HTTP_PROXY_PORT, the port number defaults + to 8080 (http-alt). + All proxy parameters are ignored for non-http and non-https URLs. + \param _host <code>const char *</code>: The proxy server hostname. + This may be <code>NULL</code> to disable the use of a proxy + server. + \hideinitializer*/ +#define OP_HTTP_PROXY_HOST(_host) \ + OP_URL_OPT(OP_HTTP_PROXY_HOST_REQUEST),OP_CHECK_CONST_CHAR_PTR(_host) + +/**Use the given port when proxying connections. + This option only has an effect if #OP_HTTP_PROXY_HOST is specified with a + non-<code>NULL</code> \a _host. + If this option is not provided, the proxy port number defaults to 8080 + (http-alt). + All proxy parameters are ignored for non-http and non-https URLs. + \param _port <code>opus_int32</code>: The proxy server port. + This must be in the range 0...65535 (inclusive), or the + URL function this is passed to will fail. + \hideinitializer*/ +#define OP_HTTP_PROXY_PORT(_port) \ + OP_URL_OPT(OP_HTTP_PROXY_PORT_REQUEST),OP_CHECK_INT(_port) + +/**Use the given user name for authentication when proxying connections. + All proxy parameters are ignored for non-http and non-https URLs. + \param _user const char *: The proxy server user name. + This may be <code>NULL</code> to disable proxy + authentication. + A non-<code>NULL</code> value only has an effect + if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_PASS + are also specified with non-<code>NULL</code> + arguments. + \hideinitializer*/ +#define OP_HTTP_PROXY_USER(_user) \ + OP_URL_OPT(OP_HTTP_PROXY_USER_REQUEST),OP_CHECK_CONST_CHAR_PTR(_user) + +/**Use the given password for authentication when proxying connections. + All proxy parameters are ignored for non-http and non-https URLs. + \param _pass const char *: The proxy server password. + This may be <code>NULL</code> to disable proxy + authentication. + A non-<code>NULL</code> value only has an effect + if #OP_HTTP_PROXY_HOST and #OP_HTTP_PROXY_USER + are also specified with non-<code>NULL</code> + arguments. + \hideinitializer*/ +#define OP_HTTP_PROXY_PASS(_pass) \ + OP_URL_OPT(OP_HTTP_PROXY_PASS_REQUEST),OP_CHECK_CONST_CHAR_PTR(_pass) + +/**Parse information about the streaming server (if any) and return it. + Very little validation is done. + In particular, OpusServerInfo::url may not be a valid URL, + OpusServerInfo::bitrate_kbps may not really be in kbps, and + OpusServerInfo::content_type may not be a valid MIME type. + The character set of the string fields is not specified anywhere, and should + not be assumed to be valid UTF-8. + \param _info OpusServerInfo *: Returns information about the server. + If there is any error opening the stream, the + contents of this structure remain + unmodified. + On success, fills in the structure with the + server information that was available, if + any. + After a successful return, the contents of + this structure should be freed by calling + opus_server_info_clear(). + \hideinitializer*/ +#define OP_GET_SERVER_INFO(_info) \ + OP_URL_OPT(OP_GET_SERVER_INFO_REQUEST),OP_CHECK_SERVER_INFO_PTR(_info) + +/*@}*/ +/*@}*/ + +/**\defgroup stream_callbacks Abstract Stream Reading Interface*/ +/*@{*/ +/**\name Functions for reading from streams + These functions define the interface used to read from and seek in a stream + of data. + A stream does not need to implement seeking, but the decoder will not be + able to seek if it does not do so. + These functions also include some convenience routines for working with + standard <code>FILE</code> pointers, complete streams stored in a single + block of memory, or URLs.*/ +/*@{*/ + +/**Reads up to \a _nbytes bytes of data from \a _stream. + \param _stream The stream to read from. + \param[out] _ptr The buffer to store the data in. + \param _nbytes The maximum number of bytes to read. + This function may return fewer, though it will not + return zero unless it reaches end-of-file. + \return The number of bytes successfully read, or a negative value on + error.*/ +typedef int (*op_read_func)(void *_stream,unsigned char *_ptr,int _nbytes); + +/**Sets the position indicator for \a _stream. + The new position, measured in bytes, is obtained by adding \a _offset + bytes to the position specified by \a _whence. + If \a _whence is set to <code>SEEK_SET</code>, <code>SEEK_CUR</code>, or + <code>SEEK_END</code>, the offset is relative to the start of the stream, + the current position indicator, or end-of-file, respectively. + \retval 0 Success. + \retval -1 Seeking is not supported or an error occurred. + <code>errno</code> need not be set.*/ +typedef int (*op_seek_func)(void *_stream,opus_int64 _offset,int _whence); + +/**Obtains the current value of the position indicator for \a _stream. + \return The current position indicator.*/ +typedef opus_int64 (*op_tell_func)(void *_stream); + +/**Closes the underlying stream. + \retval 0 Success. + \retval EOF An error occurred. + <code>errno</code> need not be set.*/ +typedef int (*op_close_func)(void *_stream); + +/**The callbacks used to access non-<code>FILE</code> stream resources. + The function prototypes are basically the same as for the stdio functions + <code>fread()</code>, <code>fseek()</code>, <code>ftell()</code>, and + <code>fclose()</code>. + The differences are that the <code>FILE *</code> arguments have been + replaced with a <code>void *</code>, which is to be used as a pointer to + whatever internal data these functions might need, that #seek and #tell + take and return 64-bit offsets, and that #seek <em>must</em> return -1 if + the stream is unseekable.*/ +struct OpusFileCallbacks{ + /**Used to read data from the stream. + This must not be <code>NULL</code>.*/ + op_read_func read; + /**Used to seek in the stream. + This may be <code>NULL</code> if seeking is not implemented.*/ + op_seek_func seek; + /**Used to return the current read position in the stream. + This may be <code>NULL</code> if seeking is not implemented.*/ + op_tell_func tell; + /**Used to close the stream when the decoder is freed. + This may be <code>NULL</code> to leave the stream open.*/ + op_close_func close; +}; + +/**Opens a stream with <code>fopen()</code> and fills in a set of callbacks + that can be used to access it. + This is useful to avoid writing your own portable 64-bit seeking wrappers, + and also avoids cross-module linking issues on Windows, where a + <code>FILE *</code> must be accessed by routines defined in the same module + that opened it. + \param[out] _cb The callbacks to use for this file. + If there is an error opening the file, nothing will be + filled in here. + \param _path The path to the file to open. + On Windows, this string must be UTF-8 (to allow access to + files whose names cannot be represented in the current + MBCS code page). + All other systems use the native character encoding. + \param _mode The mode to open the file in. + \return A stream handle to use with the callbacks, or <code>NULL</code> on + error.*/ +OP_WARN_UNUSED_RESULT void *op_fopen(OpusFileCallbacks *_cb, + const char *_path,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2) + OP_ARG_NONNULL(3); + +/**Opens a stream with <code>fdopen()</code> and fills in a set of callbacks + that can be used to access it. + This is useful to avoid writing your own portable 64-bit seeking wrappers, + and also avoids cross-module linking issues on Windows, where a + <code>FILE *</code> must be accessed by routines defined in the same module + that opened it. + \param[out] _cb The callbacks to use for this file. + If there is an error opening the file, nothing will be + filled in here. + \param _fd The file descriptor to open. + \param _mode The mode to open the file in. + \return A stream handle to use with the callbacks, or <code>NULL</code> on + error.*/ +OP_WARN_UNUSED_RESULT void *op_fdopen(OpusFileCallbacks *_cb, + int _fd,const char *_mode) OP_ARG_NONNULL(1) OP_ARG_NONNULL(3); + +/**Opens a stream with <code>freopen()</code> and fills in a set of callbacks + that can be used to access it. + This is useful to avoid writing your own portable 64-bit seeking wrappers, + and also avoids cross-module linking issues on Windows, where a + <code>FILE *</code> must be accessed by routines defined in the same module + that opened it. + \param[out] _cb The callbacks to use for this file. + If there is an error opening the file, nothing will be + filled in here. + \param _path The path to the file to open. + On Windows, this string must be UTF-8 (to allow access + to files whose names cannot be represented in the + current MBCS code page). + All other systems use the native character encoding. + \param _mode The mode to open the file in. + \param _stream A stream previously returned by op_fopen(), op_fdopen(), + or op_freopen(). + \return A stream handle to use with the callbacks, or <code>NULL</code> on + error.*/ +OP_WARN_UNUSED_RESULT void *op_freopen(OpusFileCallbacks *_cb, + const char *_path,const char *_mode,void *_stream) OP_ARG_NONNULL(1) + OP_ARG_NONNULL(2) OP_ARG_NONNULL(3) OP_ARG_NONNULL(4); + +/**Creates a stream that reads from the given block of memory. + This block of memory must contain the complete stream to decode. + This is useful for caching small streams (e.g., sound effects) in RAM. + \param[out] _cb The callbacks to use for this stream. + If there is an error creating the stream, nothing will be + filled in here. + \param _data The block of memory to read from. + \param _size The size of the block of memory. + \return A stream handle to use with the callbacks, or <code>NULL</code> on + error.*/ +OP_WARN_UNUSED_RESULT void *op_mem_stream_create(OpusFileCallbacks *_cb, + const unsigned char *_data,size_t _size) OP_ARG_NONNULL(1); + +/**Creates a stream that reads from the given URL. + This function behaves identically to op_url_stream_create(), except that it + takes a va_list instead of a variable number of arguments. + It does not call the <code>va_end</code> macro, and because it invokes the + <code>va_arg</code> macro, the value of \a _ap is undefined after the call. + \note If you use this function, you must link against <tt>libopusurl</tt>. + \param[out] _cb The callbacks to use for this stream. + If there is an error creating the stream, nothing will + be filled in here. + \param _url The URL to read from. + Currently only the <file:>, <http:>, and <https:> + schemes are supported. + Both <http:> and <https:> may be disabled at compile + time, in which case opening such URLs will always fail. + Currently this only supports URIs. + IRIs should be converted to UTF-8 and URL-escaped, with + internationalized domain names encoded in punycode, + before passing them to this function. + \param[in,out] _ap A list of the \ref url_options "optional flags" to use. + This is a variable-length list of options terminated + with <code>NULL</code>. + \return A stream handle to use with the callbacks, or <code>NULL</code> on + error.*/ +OP_WARN_UNUSED_RESULT void *op_url_stream_vcreate(OpusFileCallbacks *_cb, + const char *_url,va_list _ap) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); + +/**Creates a stream that reads from the given URL. + \note If you use this function, you must link against <tt>libopusurl</tt>. + \param[out] _cb The callbacks to use for this stream. + If there is an error creating the stream, nothing will be + filled in here. + \param _url The URL to read from. + Currently only the <file:>, <http:>, and <https:> schemes + are supported. + Both <http:> and <https:> may be disabled at compile time, + in which case opening such URLs will always fail. + Currently this only supports URIs. + IRIs should be converted to UTF-8 and URL-escaped, with + internationalized domain names encoded in punycode, before + passing them to this function. + \param ... The \ref url_options "optional flags" to use. + This is a variable-length list of options terminated with + <code>NULL</code>. + \return A stream handle to use with the callbacks, or <code>NULL</code> on + error.*/ +OP_WARN_UNUSED_RESULT void *op_url_stream_create(OpusFileCallbacks *_cb, + const char *_url,...) OP_ARG_NONNULL(1) OP_ARG_NONNULL(2); + +/*@}*/ +/*@}*/ + +/**\defgroup stream_open_close Opening and Closing*/ +/*@{*/ +/**\name Functions for opening and closing streams + + These functions allow you to test a stream to see if it is Opus, open it, + and close it. + Several flavors are provided for each of the built-in stream types, plus a + more general version which takes a set of application-provided callbacks.*/ +/*@{*/ + +/**Test to see if this is an Opus stream. + For good results, you will need at least 57 bytes (for a pure Opus-only + stream). + Something like 512 bytes will give more reliable results for multiplexed + streams. + This function is meant to be a quick-rejection filter. + Its purpose is not to guarantee that a stream is a valid Opus stream, but to + ensure that it looks enough like Opus that it isn't going to be recognized + as some other format (except possibly an Opus stream that is also + multiplexed with other codecs, such as video). + \param[out] _head The parsed ID header contents. + You may pass <code>NULL</code> if you do not need + this information. + If the function fails, the contents of this structure + remain untouched. + \param _initial_data An initial buffer of data from the start of the + stream. + \param _initial_bytes The number of bytes in \a _initial_data. + \return 0 if the data appears to be Opus, or a negative value on error. + \retval #OP_FALSE There was not enough data to tell if this was an Opus + stream or not. + \retval #OP_EFAULT An internal memory allocation failed. + \retval #OP_EIMPL The stream used a feature that is not implemented, + such as an unsupported channel family. + \retval #OP_ENOTFORMAT If the data did not contain a recognizable ID + header for an Opus stream. + \retval #OP_EVERSION If the version field signaled a version this library + does not know how to parse. + \retval #OP_EBADHEADER The ID header was not properly formatted or contained + illegal values.*/ +int op_test(OpusHead *_head, + const unsigned char *_initial_data,size_t _initial_bytes); + +/**Open a stream from the given file path. + \param _path The path to the file to open. + \param[out] _error Returns 0 on success, or a failure code on error. + You may pass in <code>NULL</code> if you don't want the + failure code. + The failure code will be #OP_EFAULT if the file could not + be opened, or one of the other failure codes from + op_open_callbacks() otherwise. + \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/ +OP_WARN_UNUSED_RESULT OggOpusFile *op_open_file(const char *_path,int *_error) + OP_ARG_NONNULL(1); + +/**Open a stream from a memory buffer. + \param _data The memory buffer to open. + \param _size The number of bytes in the buffer. + \param[out] _error Returns 0 on success, or a failure code on error. + You may pass in <code>NULL</code> if you don't want the + failure code. + See op_open_callbacks() for a full list of failure codes. + \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/ +OP_WARN_UNUSED_RESULT OggOpusFile *op_open_memory(const unsigned char *_data, + size_t _size,int *_error); + +/**Open a stream from a URL. + This function behaves identically to op_open_url(), except that it + takes a va_list instead of a variable number of arguments. + It does not call the <code>va_end</code> macro, and because it invokes the + <code>va_arg</code> macro, the value of \a _ap is undefined after the call. + \note If you use this function, you must link against <tt>libopusurl</tt>. + \param _url The URL to open. + Currently only the <file:>, <http:>, and <https:> + schemes are supported. + Both <http:> and <https:> may be disabled at compile + time, in which case opening such URLs will always + fail. + Currently this only supports URIs. + IRIs should be converted to UTF-8 and URL-escaped, + with internationalized domain names encoded in + punycode, before passing them to this function. + \param[out] _error Returns 0 on success, or a failure code on error. + You may pass in <code>NULL</code> if you don't want + the failure code. + See op_open_callbacks() for a full list of failure + codes. + \param[in,out] _ap A list of the \ref url_options "optional flags" to + use. + This is a variable-length list of options terminated + with <code>NULL</code>. + \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/ +OP_WARN_UNUSED_RESULT OggOpusFile *op_vopen_url(const char *_url, + int *_error,va_list _ap) OP_ARG_NONNULL(1); + +/**Open a stream from a URL. + \note If you use this function, you must link against <tt>libopusurl</tt>. + \param _url The URL to open. + Currently only the <file:>, <http:>, and <https:> schemes + are supported. + Both <http:> and <https:> may be disabled at compile + time, in which case opening such URLs will always fail. + Currently this only supports URIs. + IRIs should be converted to UTF-8 and URL-escaped, with + internationalized domain names encoded in punycode, + before passing them to this function. + \param[out] _error Returns 0 on success, or a failure code on error. + You may pass in <code>NULL</code> if you don't want the + failure code. + See op_open_callbacks() for a full list of failure codes. + \param ... The \ref url_options "optional flags" to use. + This is a variable-length list of options terminated with + <code>NULL</code>. + \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error.*/ +OP_WARN_UNUSED_RESULT OggOpusFile *op_open_url(const char *_url, + int *_error,...) OP_ARG_NONNULL(1); + +/**Open a stream using the given set of callbacks to access it. + \param _source The stream to read from (e.g., a <code>FILE *</code>). + \param _cb The callbacks with which to access the stream. + <code><a href="#op_read_func">read()</a></code> must + be implemented. + <code><a href="#op_seek_func">seek()</a></code> and + <code><a href="#op_tell_func">tell()</a></code> may + be <code>NULL</code>, or may always return -1 to + indicate a source is unseekable, but if + <code><a href="#op_seek_func">seek()</a></code> is + implemented and succeeds on a particular source, then + <code><a href="#op_tell_func">tell()</a></code> must + also. + <code><a href="#op_close_func">close()</a></code> may + be <code>NULL</code>, but if it is not, it will be + called when the \c OggOpusFile is destroyed by + op_free(). + It will not be called if op_open_callbacks() fails + with an error. + \param _initial_data An initial buffer of data from the start of the + stream. + Applications can read some number of bytes from the + start of the stream to help identify this as an Opus + stream, and then provide them here to allow the + stream to be opened, even if it is unseekable. + \param _initial_bytes The number of bytes in \a _initial_data. + If the stream is seekable, its current position (as + reported by + <code><a href="#opus_tell_func">tell()</a></code> + at the start of this function) must be equal to + \a _initial_bytes. + Otherwise, seeking to absolute positions will + generate inconsistent results. + \param[out] _error Returns 0 on success, or a failure code on error. + You may pass in <code>NULL</code> if you don't want + the failure code. + The failure code will be one of + <dl> + <dt>#OP_EREAD</dt> + <dd>An underlying read, seek, or tell operation + failed when it should have succeeded, or we failed + to find data in the stream we had seen before.</dd> + <dt>#OP_EFAULT</dt> + <dd>There was a memory allocation failure, or an + internal library error.</dd> + <dt>#OP_EIMPL</dt> + <dd>The stream used a feature that is not + implemented, such as an unsupported channel + family.</dd> + <dt>#OP_EINVAL</dt> + <dd><code><a href="#op_seek_func">seek()</a></code> + was implemented and succeeded on this source, but + <code><a href="#op_tell_func">tell()</a></code> + did not, or the starting position indicator was + not equal to \a _initial_bytes.</dd> + <dt>#OP_ENOTFORMAT</dt> + <dd>The stream contained a link that did not have + any logical Opus streams in it.</dd> + <dt>#OP_EBADHEADER</dt> + <dd>A required header packet was not properly + formatted, contained illegal values, or was missing + altogether.</dd> + <dt>#OP_EVERSION</dt> + <dd>An ID header contained an unrecognized version + number.</dd> + <dt>#OP_EBADLINK</dt> + <dd>We failed to find data we had seen before after + seeking.</dd> + <dt>#OP_EBADTIMESTAMP</dt> + <dd>The first or last timestamp in a link failed + basic validity checks.</dd> + </dl> + \return A freshly opened \c OggOpusFile, or <code>NULL</code> on error. + <tt>libopusfile</tt> does <em>not</em> take ownership of the source + if the call fails. + The calling application is responsible for closing the source if + this call returns an error.*/ +OP_WARN_UNUSED_RESULT OggOpusFile *op_open_callbacks(void *_source, + const OpusFileCallbacks *_cb,const unsigned char *_initial_data, + size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2); + +/**Partially open a stream from the given file path. + \see op_test_callbacks + \param _path The path to the file to open. + \param[out] _error Returns 0 on success, or a failure code on error. + You may pass in <code>NULL</code> if you don't want the + failure code. + The failure code will be #OP_EFAULT if the file could not + be opened, or one of the other failure codes from + op_open_callbacks() otherwise. + \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/ +OP_WARN_UNUSED_RESULT OggOpusFile *op_test_file(const char *_path,int *_error) + OP_ARG_NONNULL(1); + +/**Partially open a stream from a memory buffer. + \see op_test_callbacks + \param _data The memory buffer to open. + \param _size The number of bytes in the buffer. + \param[out] _error Returns 0 on success, or a failure code on error. + You may pass in <code>NULL</code> if you don't want the + failure code. + See op_open_callbacks() for a full list of failure codes. + \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/ +OP_WARN_UNUSED_RESULT OggOpusFile *op_test_memory(const unsigned char *_data, + size_t _size,int *_error); + +/**Partially open a stream from a URL. + This function behaves identically to op_test_url(), except that it + takes a va_list instead of a variable number of arguments. + It does not call the <code>va_end</code> macro, and because it invokes the + <code>va_arg</code> macro, the value of \a _ap is undefined after the call. + \note If you use this function, you must link against <tt>libopusurl</tt>. + \see op_test_url + \see op_test_callbacks + \param _url The URL to open. + Currently only the <file:>, <http:>, and <https:> + schemes are supported. + Both <http:> and <https:> may be disabled at compile + time, in which case opening such URLs will always + fail. + Currently this only supports URIs. + IRIs should be converted to UTF-8 and URL-escaped, + with internationalized domain names encoded in + punycode, before passing them to this function. + \param[out] _error Returns 0 on success, or a failure code on error. + You may pass in <code>NULL</code> if you don't want + the failure code. + See op_open_callbacks() for a full list of failure + codes. + \param[in,out] _ap A list of the \ref url_options "optional flags" to + use. + This is a variable-length list of options terminated + with <code>NULL</code>. + \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/ +OP_WARN_UNUSED_RESULT OggOpusFile *op_vtest_url(const char *_url, + int *_error,va_list _ap) OP_ARG_NONNULL(1); + +/**Partially open a stream from a URL. + \note If you use this function, you must link against <tt>libopusurl</tt>. + \see op_test_callbacks + \param _url The URL to open. + Currently only the <file:>, <http:>, and <https:> + schemes are supported. + Both <http:> and <https:> may be disabled at compile + time, in which case opening such URLs will always fail. + Currently this only supports URIs. + IRIs should be converted to UTF-8 and URL-escaped, with + internationalized domain names encoded in punycode, + before passing them to this function. + \param[out] _error Returns 0 on success, or a failure code on error. + You may pass in <code>NULL</code> if you don't want the + failure code. + See op_open_callbacks() for a full list of failure + codes. + \param ... The \ref url_options "optional flags" to use. + This is a variable-length list of options terminated + with <code>NULL</code>. + \return A partially opened \c OggOpusFile, or <code>NULL</code> on error.*/ +OP_WARN_UNUSED_RESULT OggOpusFile *op_test_url(const char *_url, + int *_error,...) OP_ARG_NONNULL(1); + +/**Partially open a stream using the given set of callbacks to access it. + This tests for Opusness and loads the headers for the first link. + It does not seek (although it tests for seekability). + You can query a partially open stream for the few pieces of basic + information returned by op_serialno(), op_channel_count(), op_head(), and + op_tags() (but only for the first link). + You may also determine if it is seekable via a call to op_seekable(). + You cannot read audio from the stream, seek, get the size or duration, + get information from links other than the first one, or even get the total + number of links until you finish opening the stream with op_test_open(). + If you do not need to do any of these things, you can dispose of it with + op_free() instead. + + This function is provided mostly to simplify porting existing code that used + <tt>libvorbisfile</tt>. + For new code, you are likely better off using op_test() instead, which + is less resource-intensive, requires less data to succeed, and imposes a + hard limit on the amount of data it examines (important for unseekable + sources, where all such data must be buffered until you are sure of the + stream type). + \param _source The stream to read from (e.g., a <code>FILE *</code>). + \param _cb The callbacks with which to access the stream. + <code><a href="#op_read_func">read()</a></code> must + be implemented. + <code><a href="#op_seek_func">seek()</a></code> and + <code><a href="#op_tell_func">tell()</a></code> may + be <code>NULL</code>, or may always return -1 to + indicate a source is unseekable, but if + <code><a href="#op_seek_func">seek()</a></code> is + implemented and succeeds on a particular source, then + <code><a href="#op_tell_func">tell()</a></code> must + also. + <code><a href="#op_close_func">close()</a></code> may + be <code>NULL</code>, but if it is not, it will be + called when the \c OggOpusFile is destroyed by + op_free(). + It will not be called if op_open_callbacks() fails + with an error. + \param _initial_data An initial buffer of data from the start of the + stream. + Applications can read some number of bytes from the + start of the stream to help identify this as an Opus + stream, and then provide them here to allow the + stream to be tested more thoroughly, even if it is + unseekable. + \param _initial_bytes The number of bytes in \a _initial_data. + If the stream is seekable, its current position (as + reported by + <code><a href="#opus_tell_func">tell()</a></code> + at the start of this function) must be equal to + \a _initial_bytes. + Otherwise, seeking to absolute positions will + generate inconsistent results. + \param[out] _error Returns 0 on success, or a failure code on error. + You may pass in <code>NULL</code> if you don't want + the failure code. + See op_open_callbacks() for a full list of failure + codes. + \return A partially opened \c OggOpusFile, or <code>NULL</code> on error. + <tt>libopusfile</tt> does <em>not</em> take ownership of the source + if the call fails. + The calling application is responsible for closing the source if + this call returns an error.*/ +OP_WARN_UNUSED_RESULT OggOpusFile *op_test_callbacks(void *_source, + const OpusFileCallbacks *_cb,const unsigned char *_initial_data, + size_t _initial_bytes,int *_error) OP_ARG_NONNULL(2); + +/**Finish opening a stream partially opened with op_test_callbacks() or one of + the associated convenience functions. + If this function fails, you are still responsible for freeing the + \c OggOpusFile with op_free(). + \param _of The \c OggOpusFile to finish opening. + \return 0 on success, or a negative value on error. + \retval #OP_EREAD An underlying read, seek, or tell operation failed + when it should have succeeded. + \retval #OP_EFAULT There was a memory allocation failure, or an + internal library error. + \retval #OP_EIMPL The stream used a feature that is not implemented, + such as an unsupported channel family. + \retval #OP_EINVAL The stream was not partially opened with + op_test_callbacks() or one of the associated + convenience functions. + \retval #OP_ENOTFORMAT The stream contained a link that did not have any + logical Opus streams in it. + \retval #OP_EBADHEADER A required header packet was not properly + formatted, contained illegal values, or was + missing altogether. + \retval #OP_EVERSION An ID header contained an unrecognized version + number. + \retval #OP_EBADLINK We failed to find data we had seen before after + seeking. + \retval #OP_EBADTIMESTAMP The first or last timestamp in a link failed basic + validity checks.*/ +int op_test_open(OggOpusFile *_of) OP_ARG_NONNULL(1); + +/**Release all memory used by an \c OggOpusFile. + \param _of The \c OggOpusFile to free.*/ +void op_free(OggOpusFile *_of); + +/*@}*/ +/*@}*/ + +/**\defgroup stream_info Stream Information*/ +/*@{*/ +/**\name Functions for obtaining information about streams + + These functions allow you to get basic information about a stream, including + seekability, the number of links (for chained streams), plus the size, + duration, bitrate, header parameters, and meta information for each link + (or, where available, the stream as a whole). + Some of these (size, duration) are only available for seekable streams. + You can also query the current stream position, link, and playback time, + and instantaneous bitrate during playback. + + Some of these functions may be used successfully on the partially open + streams returned by op_test_callbacks() or one of the associated + convenience functions. + Their documention will indicate so explicitly.*/ +/*@{*/ + +/**Returns whether or not the data source being read is seekable. + This is true if + <ol> + <li>The <code><a href="#op_seek_func">seek()</a></code> and + <code><a href="#op_tell_func">tell()</a></code> callbacks are both + non-<code>NULL</code>,</li> + <li>The <code><a href="#op_seek_func">seek()</a></code> callback was + successfully executed at least once, and</li> + <li>The <code><a href="#op_tell_func">tell()</a></code> callback was + successfully able to report the position indicator afterwards.</li> + </ol> + This function may be called on partially-opened streams. + \param _of The \c OggOpusFile whose seekable status is to be returned. + \return A non-zero value if seekable, and 0 if unseekable.*/ +int op_seekable(const OggOpusFile *_of) OP_ARG_NONNULL(1); + +/**Returns the number of links in this chained stream. + This function may be called on partially-opened streams, but it will always + return 1. + The actual number of links is not known until the stream is fully opened. + \param _of The \c OggOpusFile from which to retrieve the link count. + \return For fully-open seekable sources, this returns the total number of + links in the whole stream, which will be at least 1. + For partially-open or unseekable sources, this always returns 1.*/ +int op_link_count(const OggOpusFile *_of) OP_ARG_NONNULL(1); + +/**Get the serial number of the given link in a (possibly-chained) Ogg Opus + stream. + This function may be called on partially-opened streams, but it will always + return the serial number of the Opus stream in the first link. + \param _of The \c OggOpusFile from which to retrieve the serial number. + \param _li The index of the link whose serial number should be retrieved. + Use a negative number to get the serial number of the current + link. + \return The serial number of the given link. + If \a _li is greater than the total number of links, this returns + the serial number of the last link. + If the source is not seekable, this always returns the serial number + of the current link.*/ +opus_uint32 op_serialno(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); + +/**Get the channel count of the given link in a (possibly-chained) Ogg Opus + stream. + This is equivalent to <code>op_head(_of,_li)->channel_count</code>, but + is provided for convenience. + This function may be called on partially-opened streams, but it will always + return the channel count of the Opus stream in the first link. + \param _of The \c OggOpusFile from which to retrieve the channel count. + \param _li The index of the link whose channel count should be retrieved. + Use a negative number to get the channel count of the current + link. + \return The channel count of the given link. + If \a _li is greater than the total number of links, this returns + the channel count of the last link. + If the source is not seekable, this always returns the channel count + of the current link.*/ +int op_channel_count(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); + +/**Get the total (compressed) size of the stream, or of an individual link in + a (possibly-chained) Ogg Opus stream, including all headers and Ogg muxing + overhead. + \warning If the Opus stream (or link) is concurrently multiplexed with other + logical streams (e.g., video), this returns the size of the entire stream + (or link), not just the number of bytes in the first logical Opus stream. + Returning the latter would require scanning the entire file. + \param _of The \c OggOpusFile from which to retrieve the compressed size. + \param _li The index of the link whose compressed size should be computed. + Use a negative number to get the compressed size of the entire + stream. + \return The compressed size of the entire stream if \a _li is negative, the + compressed size of link \a _li if it is non-negative, or a negative + value on error. + The compressed size of the entire stream may be smaller than that + of the underlying source if trailing garbage was detected in the + file. + \retval #OP_EINVAL The source is not seekable (so we can't know the length), + \a _li wasn't less than the total number of links in + the stream, or the stream was only partially open.*/ +opus_int64 op_raw_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); + +/**Get the total PCM length (number of samples at 48 kHz) of the stream, or of + an individual link in a (possibly-chained) Ogg Opus stream. + Users looking for <code>op_time_total()</code> should use op_pcm_total() + instead. + Because timestamps in Opus are fixed at 48 kHz, there is no need for a + separate function to convert this to seconds (and leaving it out avoids + introducing floating point to the API, for those that wish to avoid it). + \param _of The \c OggOpusFile from which to retrieve the PCM offset. + \param _li The index of the link whose PCM length should be computed. + Use a negative number to get the PCM length of the entire stream. + \return The PCM length of the entire stream if \a _li is negative, the PCM + length of link \a _li if it is non-negative, or a negative value on + error. + \retval #OP_EINVAL The source is not seekable (so we can't know the length), + \a _li wasn't less than the total number of links in + the stream, or the stream was only partially open.*/ +ogg_int64_t op_pcm_total(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); + +/**Get the ID header information for the given link in a (possibly chained) Ogg + Opus stream. + This function may be called on partially-opened streams, but it will always + return the ID header information of the Opus stream in the first link. + \param _of The \c OggOpusFile from which to retrieve the ID header + information. + \param _li The index of the link whose ID header information should be + retrieved. + Use a negative number to get the ID header information of the + current link. + For an unseekable stream, \a _li is ignored, and the ID header + information for the current link is always returned, if + available. + \return The contents of the ID header for the given link.*/ +const OpusHead *op_head(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); + +/**Get the comment header information for the given link in a (possibly + chained) Ogg Opus stream. + This function may be called on partially-opened streams, but it will always + return the tags from the Opus stream in the first link. + \param _of The \c OggOpusFile from which to retrieve the comment header + information. + \param _li The index of the link whose comment header information should be + retrieved. + Use a negative number to get the comment header information of + the current link. + For an unseekable stream, \a _li is ignored, and the comment + header information for the current link is always returned, if + available. + \return The contents of the comment header for the given link, or + <code>NULL</code> if this is an unseekable stream that encountered + an invalid link.*/ +const OpusTags *op_tags(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); + +/**Retrieve the index of the current link. + This is the link that produced the data most recently read by + op_read_float() or its associated functions, or, after a seek, the link + that the seek target landed in. + Reading more data may advance the link index (even on the first read after a + seek). + \param _of The \c OggOpusFile from which to retrieve the current link index. + \return The index of the current link on success, or a negative value on + failure. + For seekable streams, this is a number between 0 and the value + returned by op_link_count(). + For unseekable streams, this value starts at 0 and increments by one + each time a new link is encountered (even though op_link_count() + always returns 1). + \retval #OP_EINVAL The stream was only partially open.*/ +int op_current_link(const OggOpusFile *_of) OP_ARG_NONNULL(1); + +/**Computes the bitrate of the stream, or of an individual link in a + (possibly-chained) Ogg Opus stream. + The stream must be seekable to compute the bitrate. + For unseekable streams, use op_bitrate_instant() to get periodic estimates. + \warning If the Opus stream (or link) is concurrently multiplexed with other + logical streams (e.g., video), this uses the size of the entire stream (or + link) to compute the bitrate, not just the number of bytes in the first + logical Opus stream. + Returning the latter requires scanning the entire file, but this may be done + by decoding the whole file and calling op_bitrate_instant() once at the + end. + Install a trivial decoding callback with op_set_decode_callback() if you + wish to skip actual decoding during this process. + \param _of The \c OggOpusFile from which to retrieve the bitrate. + \param _li The index of the link whose bitrate should be computed. + Use a negative number to get the bitrate of the whole stream. + \return The bitrate on success, or a negative value on error. + \retval #OP_EINVAL The stream was only partially open, the stream was not + seekable, or \a _li was larger than the number of + links.*/ +opus_int32 op_bitrate(const OggOpusFile *_of,int _li) OP_ARG_NONNULL(1); + +/**Compute the instantaneous bitrate, measured as the ratio of bits to playable + samples decoded since a) the last call to op_bitrate_instant(), b) the last + seek, or c) the start of playback, whichever was most recent. + This will spike somewhat after a seek or at the start/end of a chain + boundary, as pre-skip, pre-roll, and end-trimming causes samples to be + decoded but not played. + \param _of The \c OggOpusFile from which to retrieve the bitrate. + \return The bitrate, in bits per second, or a negative value on error. + \retval #OP_FALSE No data has been decoded since any of the events + described above. + \retval #OP_EINVAL The stream was only partially open.*/ +opus_int32 op_bitrate_instant(OggOpusFile *_of) OP_ARG_NONNULL(1); + +/**Obtain the current value of the position indicator for \a _of. + \param _of The \c OggOpusFile from which to retrieve the position indicator. + \return The byte position that is currently being read from. + \retval #OP_EINVAL The stream was only partially open.*/ +opus_int64 op_raw_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1); + +/**Obtain the PCM offset of the next sample to be read. + If the stream is not properly timestamped, this might not increment by the + proper amount between reads, or even return monotonically increasing + values. + \param _of The \c OggOpusFile from which to retrieve the PCM offset. + \return The PCM offset of the next sample to be read. + \retval #OP_EINVAL The stream was only partially open.*/ +ogg_int64_t op_pcm_tell(const OggOpusFile *_of) OP_ARG_NONNULL(1); + +/*@}*/ +/*@}*/ + +/**\defgroup stream_seeking Seeking*/ +/*@{*/ +/**\name Functions for seeking in Opus streams + + These functions let you seek in Opus streams, if the underlying source + support it. + Seeking is implemented for all built-in stream I/O routines, though some + individual sources may not be seekable (pipes, live HTTP streams, or HTTP + streams from a server that does not support <code>Range</code> requests). + + op_raw_seek() is the fastest: it is guaranteed to perform at most one + physical seek, but, since the target is a byte position, makes no guarantee + how close to a given time it will come. + op_pcm_seek() provides sample-accurate seeking. + The number of physical seeks it requires is still quite small (often 1 or + 2, even in highly variable bitrate streams). + + Seeking in Opus requires decoding some pre-roll amount before playback to + allow the internal state to converge (as if recovering from packet loss). + This is handled internally by <tt>libopusfile</tt>, but means there is + little extra overhead for decoding up to the exact position requested + (since it must decode some amount of audio anyway). + It also means that decoding after seeking may not return exactly the same + values as would be obtained by decoding the stream straight through. + However, such differences are expected to be smaller than the loss + introduced by Opus's lossy compression.*/ +/*@{*/ + +/**Seek to a byte offset relative to the <b>compressed</b> data. + This also scans packets to update the PCM cursor. + It will cross a logical bitstream boundary, but only if it can't get any + packets out of the tail of the link to which it seeks. + \param _of The \c OggOpusFile in which to seek. + \param _byte_offset The byte position to seek to. + \return 0 on success, or a negative error code on failure. + \retval #OP_EREAD The underlying seek operation failed. + \retval #OP_EINVAL The stream was only partially open, or the target was + outside the valid range for the stream. + \retval #OP_ENOSEEK This stream is not seekable. + \retval #OP_EBADLINK Failed to initialize a decoder for a stream for an + unknown reason.*/ +int op_raw_seek(OggOpusFile *_of,opus_int64 _byte_offset) OP_ARG_NONNULL(1); + +/**Seek to the specified PCM offset, such that decoding will begin at exactly + the requested position. + \param _of The \c OggOpusFile in which to seek. + \param _pcm_offset The PCM offset to seek to. + This is in samples at 48 kHz relative to the start of the + stream. + \return 0 on success, or a negative value on error. + \retval #OP_EREAD An underlying read or seek operation failed. + \retval #OP_EINVAL The stream was only partially open, or the target was + outside the valid range for the stream. + \retval #OP_ENOSEEK This stream is not seekable. + \retval #OP_EBADLINK We failed to find data we had seen before, or the + bitstream structure was sufficiently malformed that + seeking to the target destination was impossible.*/ +int op_pcm_seek(OggOpusFile *_of,ogg_int64_t _pcm_offset) OP_ARG_NONNULL(1); + +/*@}*/ +/*@}*/ + +/**\defgroup stream_decoding Decoding*/ +/*@{*/ +/**\name Functions for decoding audio data + + These functions retrieve actual decoded audio data from the stream. + The general functions, op_read() and op_read_float() return 16-bit or + floating-point output, both using native endian ordering. + The number of channels returned can change from link to link in a chained + stream. + There are special functions, op_read_stereo() and op_read_float_stereo(), + which always output two channels, to simplify applications which do not + wish to handle multichannel audio. + These downmix multichannel files to two channels, so they can always return + samples in the same format for every link in a chained file. + + If the rest of your audio processing chain can handle floating point, those + routines should be preferred, as floating point output avoids introducing + clipping and other issues which might be avoided entirely if, e.g., you + scale down the volume at some other stage. + However, if you intend to direct consume 16-bit samples, the conversion in + <tt>libopusfile</tt> provides noise-shaping dithering and, if compiled + against <tt>libopus</tt> 1.1 or later, soft-clipping prevention. + + <tt>libopusfile</tt> can also be configured at compile time to use the + fixed-point <tt>libopus</tt> API. + If so, <tt>libopusfile</tt>'s floating-point API may also be disabled. + In that configuration, nothing in <tt>libopusfile</tt> will use any + floating-point operations, to simplify support on devices without an + adequate FPU. + + \warning HTTPS streams may be be vulnerable to truncation attacks if you do + not check the error return code from op_read_float() or its associated + functions. + If the remote peer does not close the connection gracefully (with a TLS + "close notify" message), these functions will return #OP_EREAD instead of 0 + when they reach the end of the file. + If you are reading from an <https:> URL (particularly if seeking is not + supported), you should make sure to check for this error and warn the user + appropriately.*/ +/*@{*/ + +/**Indicates that the decoding callback should produce signed 16-bit + native-endian output samples.*/ +#define OP_DEC_FORMAT_SHORT (7008) +/**Indicates that the decoding callback should produce 32-bit native-endian + float samples.*/ +#define OP_DEC_FORMAT_FLOAT (7040) + +/**Indicates that the decoding callback did not decode anything, and that + <tt>libopusfile</tt> should decode normally instead.*/ +#define OP_DEC_USE_DEFAULT (6720) + +/**Called to decode an Opus packet. + This should invoke the functional equivalent of opus_multistream_decode() or + opus_multistream_decode_float(), except that it returns 0 on success + instead of the number of decoded samples (which is known a priori). + \param _ctx The application-provided callback context. + \param _decoder The decoder to use to decode the packet. + \param[out] _pcm The buffer to decode into. + This will always have enough room for \a _nchannels of + \a _nsamples samples, which should be placed into this + buffer interleaved. + \param _op The packet to decode. + This will always have its granule position set to a valid + value. + \param _nsamples The number of samples expected from the packet. + \param _nchannels The number of channels expected from the packet. + \param _format The desired sample output format. + This is either #OP_DEC_FORMAT_SHORT or + #OP_DEC_FORMAT_FLOAT. + \param _li The index of the link from which this packet was decoded. + \return A non-negative value on success, or a negative value on error. + The error codes should be the same as those returned by + opus_multistream_decode() or opus_multistream_decode_float(). + \retval 0 Decoding was successful. + The application has filled the buffer with + exactly <code>\a _nsamples*\a + _nchannels</code> samples in the requested + format. + \retval #OP_DEC_USE_DEFAULT No decoding was done. + <tt>libopusfile</tt> should decode normally + instead.*/ +typedef int (*op_decode_cb_func)(void *_ctx,OpusMSDecoder *_decoder,void *_pcm, + const ogg_packet *_op,int _nsamples,int _nchannels,int _format,int _li); + +/**Sets the packet decode callback function. + This is called once for each packet that needs to be decoded. + A call to this function is no guarantee that the audio will eventually be + delivered to the application. + Some or all of the data from the packet may be discarded (i.e., at the + beginning or end of a link, or after a seek), however the callback is + required to provide all of it. + \param _of The \c OggOpusFile on which to set the decode callback. + \param _decode_cb The callback function to call. + This may be <code>NULL</code> to disable calling the + callback. + \param _ctx The application-provided context pointer to pass to the + callback on each call.*/ +void op_set_decode_callback(OggOpusFile *_of, + op_decode_cb_func _decode_cb,void *_ctx) OP_ARG_NONNULL(1); + +/**Gain offset type that indicates that the provided offset is relative to the + header gain. + This is the default.*/ +#define OP_HEADER_GAIN (0) + +/**Gain offset type that indicates that the provided offset is relative to the + R128_TRACK_GAIN value (if any), in addition to the header gain.*/ +#define OP_TRACK_GAIN (3008) + +/**Gain offset type that indicates that the provided offset should be used as + the gain directly, without applying any the header or track gains.*/ +#define OP_ABSOLUTE_GAIN (3009) + +/**Sets the gain to be used for decoded output. + By default, the gain in the header is applied with no additional offset. + The total gain (including header gain and/or track gain, if applicable, and + this offset), will be clamped to [-32768,32767]/256 dB. + This is more than enough to saturate or underflow 16-bit PCM. + \note The new gain will not be applied to any already buffered, decoded + output. + This means you cannot change it sample-by-sample, as at best it will be + updated packet-by-packet. + It is meant for setting a target volume level, rather than applying smooth + fades, etc. + \param _of The \c OggOpusFile on which to set the gain offset. + \param _gain_type One of #OP_HEADER_GAIN, #OP_TRACK_GAIN, or + #OP_ABSOLUTE_GAIN. + \param _gain_offset_q8 The gain offset to apply, in 1/256ths of a dB. + \return 0 on success or a negative value on error. + \retval #OP_EINVAL The \a _gain_type was unrecognized.*/ +int op_set_gain_offset(OggOpusFile *_of, + int _gain_type,opus_int32 _gain_offset_q8) OP_ARG_NONNULL(1); + +/**Sets whether or not dithering is enabled for 16-bit decoding. + By default, when <tt>libopusfile</tt> is compiled to use floating-point + internally, calling op_read() or op_read_stereo() will first decode to + float, and then convert to fixed-point using noise-shaping dithering. + This flag can be used to disable that dithering. + When the application uses op_read_float() or op_read_float_stereo(), or when + the library has been compiled to decode directly to fixed point, this flag + has no effect. + \param _of The \c OggOpusFile on which to enable or disable dithering. + \param _enabled A non-zero value to enable dithering, or 0 to disable it.*/ +void op_set_dither_enabled(OggOpusFile *_of,int _enabled) OP_ARG_NONNULL(1); + +/**Reads more samples from the stream. + \note Although \a _buf_size must indicate the total number of values that + can be stored in \a _pcm, the return value is the number of samples + <em>per channel</em>. + This is done because + <ol> + <li>The channel count cannot be known a priori (reading more samples might + advance us into the next link, with a different channel count), so + \a _buf_size cannot also be in units of samples per channel,</li> + <li>Returning the samples per channel matches the <code>libopus</code> API + as closely as we're able,</li> + <li>Returning the total number of values instead of samples per channel + would mean the caller would need a division to compute the samples per + channel, and might worry about the possibility of getting back samples + for some channels and not others, and</li> + <li>This approach is relatively fool-proof: if an application passes too + small a value to \a _buf_size, they will simply get fewer samples back, + and if they assume the return value is the total number of values, then + they will simply read too few (rather than reading too many and going + off the end of the buffer).</li> + </ol> + \param _of The \c OggOpusFile from which to read. + \param[out] _pcm A buffer in which to store the output PCM samples, as + signed native-endian 16-bit values at 48 kHz + with a nominal range of <code>[-32768,32767)</code>. + Multiple channels are interleaved using the + <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis + channel ordering</a>. + This must have room for at least \a _buf_size values. + \param _buf_size The number of values that can be stored in \a _pcm. + It is recommended that this be large enough for at + least 120 ms of data at 48 kHz per channel (5760 + values per channel). + Smaller buffers will simply return less data, possibly + consuming more memory to buffer the data internally. + <tt>libopusfile</tt> may return less data than + requested. + If so, there is no guarantee that the remaining data + in \a _pcm will be unmodified. + \param[out] _li The index of the link this data was decoded from. + You may pass <code>NULL</code> if you do not need this + information. + If this function fails (returning a negative value), + this parameter is left unset. + \return The number of samples read per channel on success, or a negative + value on failure. + The channel count can be retrieved on success by calling + <code>op_head(_of,*_li)</code>. + The number of samples returned may be 0 if the buffer was too small + to store even a single sample for all channels, or if end-of-file + was reached. + The list of possible failure codes follows. + Most of them can only be returned by unseekable, chained streams + that encounter a new link. + \retval #OP_HOLE There was a hole in the data, and some samples + may have been skipped. + Call this function again to continue decoding + past the hole. + \retval #OP_EREAD An underlying read operation failed. + This may signal a truncation attack from an + <https:> source. + \retval #OP_EFAULT An internal memory allocation failed. + \retval #OP_EIMPL An unseekable stream encountered a new link that + used a feature that is not implemented, such as + an unsupported channel family. + \retval #OP_EINVAL The stream was only partially open. + \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that + did not have any logical Opus streams in it. + \retval #OP_EBADHEADER An unseekable stream encountered a new link with a + required header packet that was not properly + formatted, contained illegal values, or was + missing altogether. + \retval #OP_EVERSION An unseekable stream encountered a new link with + an ID header that contained an unrecognized + version number. + \retval #OP_EBADPACKET Failed to properly decode the next packet. + \retval #OP_EBADLINK We failed to find data we had seen before. + \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with + a starting timestamp that failed basic validity + checks.*/ +OP_WARN_UNUSED_RESULT int op_read(OggOpusFile *_of, + opus_int16 *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1); + +/**Reads more samples from the stream. + \note Although \a _buf_size must indicate the total number of values that + can be stored in \a _pcm, the return value is the number of samples + <em>per channel</em>. + <ol> + <li>The channel count cannot be known a priori (reading more samples might + advance us into the next link, with a different channel count), so + \a _buf_size cannot also be in units of samples per channel,</li> + <li>Returning the samples per channel matches the <code>libopus</code> API + as closely as we're able,</li> + <li>Returning the total number of values instead of samples per channel + would mean the caller would need a division to compute the samples per + channel, and might worry about the possibility of getting back samples + for some channels and not others, and</li> + <li>This approach is relatively fool-proof: if an application passes too + small a value to \a _buf_size, they will simply get fewer samples back, + and if they assume the return value is the total number of values, then + they will simply read too few (rather than reading too many and going + off the end of the buffer).</li> + </ol> + \param _of The \c OggOpusFile from which to read. + \param[out] _pcm A buffer in which to store the output PCM samples as + signed floats at 48 kHz with a nominal range of + <code>[-1.0,1.0]</code>. + Multiple channels are interleaved using the + <a href="http://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-800004.3.9">Vorbis + channel ordering</a>. + This must have room for at least \a _buf_size floats. + \param _buf_size The number of floats that can be stored in \a _pcm. + It is recommended that this be large enough for at + least 120 ms of data at 48 kHz per channel (5760 + samples per channel). + Smaller buffers will simply return less data, possibly + consuming more memory to buffer the data internally. + If less than \a _buf_size values are returned, + <tt>libopusfile</tt> makes no guarantee that the + remaining data in \a _pcm will be unmodified. + \param[out] _li The index of the link this data was decoded from. + You may pass <code>NULL</code> if you do not need this + information. + If this function fails (returning a negative value), + this parameter is left unset. + \return The number of samples read per channel on success, or a negative + value on failure. + The channel count can be retrieved on success by calling + <code>op_head(_of,*_li)</code>. + The number of samples returned may be 0 if the buffer was too small + to store even a single sample for all channels, or if end-of-file + was reached. + The list of possible failure codes follows. + Most of them can only be returned by unseekable, chained streams + that encounter a new link. + \retval #OP_HOLE There was a hole in the data, and some samples + may have been skipped. + Call this function again to continue decoding + past the hole. + \retval #OP_EREAD An underlying read operation failed. + This may signal a truncation attack from an + <https:> source. + \retval #OP_EFAULT An internal memory allocation failed. + \retval #OP_EIMPL An unseekable stream encountered a new link that + used a feature that is not implemented, such as + an unsupported channel family. + \retval #OP_EINVAL The stream was only partially open. + \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that + did not have any logical Opus streams in it. + \retval #OP_EBADHEADER An unseekable stream encountered a new link with a + required header packet that was not properly + formatted, contained illegal values, or was + missing altogether. + \retval #OP_EVERSION An unseekable stream encountered a new link with + an ID header that contained an unrecognized + version number. + \retval #OP_EBADPACKET Failed to properly decode the next packet. + \retval #OP_EBADLINK We failed to find data we had seen before. + \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with + a starting timestamp that failed basic validity + checks.*/ +OP_WARN_UNUSED_RESULT int op_read_float(OggOpusFile *_of, + float *_pcm,int _buf_size,int *_li) OP_ARG_NONNULL(1); + +/**Reads more samples from the stream and downmixes to stereo, if necessary. + This function is intended for simple players that want a uniform output + format, even if the channel count changes between links in a chained + stream. + \note \a _buf_size indicates the total number of values that can be stored + in \a _pcm, while the return value is the number of samples <em>per + channel</em>, even though the channel count is known, for consistency with + op_read(). + \param _of The \c OggOpusFile from which to read. + \param[out] _pcm A buffer in which to store the output PCM samples, as + signed native-endian 16-bit values at 48 kHz + with a nominal range of <code>[-32768,32767)</code>. + The left and right channels are interleaved in the + buffer. + This must have room for at least \a _buf_size values. + \param _buf_size The number of values that can be stored in \a _pcm. + It is recommended that this be large enough for at + least 120 ms of data at 48 kHz per channel (11520 + values total). + Smaller buffers will simply return less data, possibly + consuming more memory to buffer the data internally. + If less than \a _buf_size values are returned, + <tt>libopusfile</tt> makes no guarantee that the + remaining data in \a _pcm will be unmodified. + \return The number of samples read per channel on success, or a negative + value on failure. + The number of samples returned may be 0 if the buffer was too small + to store even a single sample for both channels, or if end-of-file + was reached. + The list of possible failure codes follows. + Most of them can only be returned by unseekable, chained streams + that encounter a new link. + \retval #OP_HOLE There was a hole in the data, and some samples + may have been skipped. + Call this function again to continue decoding + past the hole. + \retval #OP_EREAD An underlying read operation failed. + This may signal a truncation attack from an + <https:> source. + \retval #OP_EFAULT An internal memory allocation failed. + \retval #OP_EIMPL An unseekable stream encountered a new link that + used a feature that is not implemented, such as + an unsupported channel family. + \retval #OP_EINVAL The stream was only partially open. + \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that + did not have any logical Opus streams in it. + \retval #OP_EBADHEADER An unseekable stream encountered a new link with a + required header packet that was not properly + formatted, contained illegal values, or was + missing altogether. + \retval #OP_EVERSION An unseekable stream encountered a new link with + an ID header that contained an unrecognized + version number. + \retval #OP_EBADPACKET Failed to properly decode the next packet. + \retval #OP_EBADLINK We failed to find data we had seen before. + \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with + a starting timestamp that failed basic validity + checks.*/ +OP_WARN_UNUSED_RESULT int op_read_stereo(OggOpusFile *_of, + opus_int16 *_pcm,int _buf_size) OP_ARG_NONNULL(1); + +/**Reads more samples from the stream and downmixes to stereo, if necessary. + This function is intended for simple players that want a uniform output + format, even if the channel count changes between links in a chained + stream. + \note \a _buf_size indicates the total number of values that can be stored + in \a _pcm, while the return value is the number of samples <em>per + channel</em>, even though the channel count is known, for consistency with + op_read_float(). + \param _of The \c OggOpusFile from which to read. + \param[out] _pcm A buffer in which to store the output PCM samples, as + signed floats at 48 kHz with a nominal range of + <code>[-1.0,1.0]</code>. + The left and right channels are interleaved in the + buffer. + This must have room for at least \a _buf_size values. + \param _buf_size The number of values that can be stored in \a _pcm. + It is recommended that this be large enough for at + least 120 ms of data at 48 kHz per channel (11520 + values total). + Smaller buffers will simply return less data, possibly + consuming more memory to buffer the data internally. + If less than \a _buf_size values are returned, + <tt>libopusfile</tt> makes no guarantee that the + remaining data in \a _pcm will be unmodified. + \return The number of samples read per channel on success, or a negative + value on failure. + The number of samples returned may be 0 if the buffer was too small + to store even a single sample for both channels, or if end-of-file + was reached. + The list of possible failure codes follows. + Most of them can only be returned by unseekable, chained streams + that encounter a new link. + \retval #OP_HOLE There was a hole in the data, and some samples + may have been skipped. + Call this function again to continue decoding + past the hole. + \retval #OP_EREAD An underlying read operation failed. + This may signal a truncation attack from an + <https:> source. + \retval #OP_EFAULT An internal memory allocation failed. + \retval #OP_EIMPL An unseekable stream encountered a new link that + used a feature that is not implemented, such as + an unsupported channel family. + \retval #OP_EINVAL The stream was only partially open. + \retval #OP_ENOTFORMAT An unseekable stream encountered a new link that + that did not have any logical Opus streams in it. + \retval #OP_EBADHEADER An unseekable stream encountered a new link with a + required header packet that was not properly + formatted, contained illegal values, or was + missing altogether. + \retval #OP_EVERSION An unseekable stream encountered a new link with + an ID header that contained an unrecognized + version number. + \retval #OP_EBADPACKET Failed to properly decode the next packet. + \retval #OP_EBADLINK We failed to find data we had seen before. + \retval #OP_EBADTIMESTAMP An unseekable stream encountered a new link with + a starting timestamp that failed basic validity + checks.*/ +OP_WARN_UNUSED_RESULT int op_read_float_stereo(OggOpusFile *_of, + float *_pcm,int _buf_size) OP_ARG_NONNULL(1); + +/*@}*/ +/*@}*/ + +# if OP_GNUC_PREREQ(4,0) +# pragma GCC visibility pop +# endif + +# if defined(__cplusplus) +} +# endif + +#endif diff --git a/drivers/opus/repacketizer.c b/drivers/opus/repacketizer.c new file mode 100644 index 0000000000..01406bb39b --- /dev/null +++ b/drivers/opus/repacketizer.c @@ -0,0 +1,345 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "opus.h" +#include "opus_private.h" +#include "os_support.h" + + +int opus_repacketizer_get_size(void) +{ + return sizeof(OpusRepacketizer); +} + +OpusRepacketizer *opus_repacketizer_init(OpusRepacketizer *rp) +{ + rp->nb_frames = 0; + return rp; +} + +OpusRepacketizer *opus_repacketizer_create(void) +{ + OpusRepacketizer *rp; + rp=(OpusRepacketizer *)opus_alloc(opus_repacketizer_get_size()); + if(rp==NULL)return NULL; + return opus_repacketizer_init(rp); +} + +void opus_repacketizer_destroy(OpusRepacketizer *rp) +{ + opus_free(rp); +} + +static int opus_repacketizer_cat_impl(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len, int self_delimited) +{ + unsigned char tmp_toc; + int curr_nb_frames,ret; + /* Set of check ToC */ + if (len<1) return OPUS_INVALID_PACKET; + if (rp->nb_frames == 0) + { + rp->toc = data[0]; + rp->framesize = opus_packet_get_samples_per_frame(data, 8000); + } else if ((rp->toc&0xFC) != (data[0]&0xFC)) + { + /*fprintf(stderr, "toc mismatch: 0x%x vs 0x%x\n", rp->toc, data[0]);*/ + return OPUS_INVALID_PACKET; + } + curr_nb_frames = opus_packet_get_nb_frames(data, len); + if(curr_nb_frames<1) return OPUS_INVALID_PACKET; + + /* Check the 120 ms maximum packet size */ + if ((curr_nb_frames+rp->nb_frames)*rp->framesize > 960) + { + return OPUS_INVALID_PACKET; + } + + ret=opus_packet_parse_impl(data, len, self_delimited, &tmp_toc, &rp->frames[rp->nb_frames], &rp->len[rp->nb_frames], NULL, NULL); + if(ret<1)return ret; + + rp->nb_frames += curr_nb_frames; + return OPUS_OK; +} + +int opus_repacketizer_cat(OpusRepacketizer *rp, const unsigned char *data, opus_int32 len) +{ + return opus_repacketizer_cat_impl(rp, data, len, 0); +} + +int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp) +{ + return rp->nb_frames; +} + +opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end, + unsigned char *data, opus_int32 maxlen, int self_delimited, int pad) +{ + int i, count; + opus_int32 tot_size; + opus_int16 *len; + const unsigned char **frames; + unsigned char * ptr; + + if (begin<0 || begin>=end || end>rp->nb_frames) + { + /*fprintf(stderr, "%d %d %d\n", begin, end, rp->nb_frames);*/ + return OPUS_BAD_ARG; + } + count = end-begin; + + len = rp->len+begin; + frames = rp->frames+begin; + if (self_delimited) + tot_size = 1 + (len[count-1]>=252); + else + tot_size = 0; + + ptr = data; + if (count==1) + { + /* Code 0 */ + tot_size += len[0]+1; + if (tot_size > maxlen) + return OPUS_BUFFER_TOO_SMALL; + *ptr++ = rp->toc&0xFC; + } else if (count==2) + { + if (len[1] == len[0]) + { + /* Code 1 */ + tot_size += 2*len[0]+1; + if (tot_size > maxlen) + return OPUS_BUFFER_TOO_SMALL; + *ptr++ = (rp->toc&0xFC) | 0x1; + } else { + /* Code 2 */ + tot_size += len[0]+len[1]+2+(len[0]>=252); + if (tot_size > maxlen) + return OPUS_BUFFER_TOO_SMALL; + *ptr++ = (rp->toc&0xFC) | 0x2; + ptr += encode_size(len[0], ptr); + } + } + if (count > 2 || (pad && tot_size < maxlen)) + { + /* Code 3 */ + int vbr; + int pad_amount=0; + + /* Restart the process for the padding case */ + ptr = data; + if (self_delimited) + tot_size = 1 + (len[count-1]>=252); + else + tot_size = 0; + vbr = 0; + for (i=1;i<count;i++) + { + if (len[i] != len[0]) + { + vbr=1; + break; + } + } + if (vbr) + { + tot_size += 2; + for (i=0;i<count-1;i++) + tot_size += 1 + (len[i]>=252) + len[i]; + tot_size += len[count-1]; + + if (tot_size > maxlen) + return OPUS_BUFFER_TOO_SMALL; + *ptr++ = (rp->toc&0xFC) | 0x3; + *ptr++ = count | 0x80; + } else { + tot_size += count*len[0]+2; + if (tot_size > maxlen) + return OPUS_BUFFER_TOO_SMALL; + *ptr++ = (rp->toc&0xFC) | 0x3; + *ptr++ = count; + } + pad_amount = pad ? (maxlen-tot_size) : 0; + if (pad_amount != 0) + { + int nb_255s; + data[1] |= 0x40; + nb_255s = (pad_amount-1)/255; + for (i=0;i<nb_255s;i++) + *ptr++ = 255; + *ptr++ = pad_amount-255*nb_255s-1; + tot_size += pad_amount; + } + if (vbr) + { + for (i=0;i<count-1;i++) + ptr += encode_size(len[i], ptr); + } + } + if (self_delimited) { + int sdlen = encode_size(len[count-1], ptr); + ptr += sdlen; + } + /* Copy the actual data */ + for (i=0;i<count;i++) + { + /* Using OPUS_MOVE() instead of OPUS_COPY() in case we're doing in-place + padding from opus_packet_pad or opus_packet_unpad(). */ + celt_assert(frames[i] + len[i] <= data || ptr <= frames[i]); + OPUS_MOVE(ptr, frames[i], len[i]); + ptr += len[i]; + } + if (pad) + { + for (i=ptr-data;i<maxlen;i++) + data[i] = 0; + } + return tot_size; +} + +opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen) +{ + return opus_repacketizer_out_range_impl(rp, begin, end, data, maxlen, 0, 0); +} + +opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen) +{ + return opus_repacketizer_out_range_impl(rp, 0, rp->nb_frames, data, maxlen, 0, 0); +} + +int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len) +{ + OpusRepacketizer rp; + opus_int32 ret; + if (len < 1) + return OPUS_BAD_ARG; + if (len==new_len) + return OPUS_OK; + else if (len > new_len) + return OPUS_BAD_ARG; + opus_repacketizer_init(&rp); + /* Moving payload to the end of the packet so we can do in-place padding */ + OPUS_MOVE(data+new_len-len, data, len); + opus_repacketizer_cat(&rp, data+new_len-len, len); + ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, 1); + if (ret > 0) + return OPUS_OK; + else + return ret; +} + +opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len) +{ + OpusRepacketizer rp; + opus_int32 ret; + if (len < 1) + return OPUS_BAD_ARG; + opus_repacketizer_init(&rp); + ret = opus_repacketizer_cat(&rp, data, len); + if (ret < 0) + return ret; + ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, len, 0, 0); + celt_assert(ret > 0 && ret <= len); + return ret; +} + +int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len, int nb_streams) +{ + int s; + int count; + unsigned char toc; + opus_int16 size[48]; + opus_int32 packet_offset; + opus_int32 amount; + + if (len < 1) + return OPUS_BAD_ARG; + if (len==new_len) + return OPUS_OK; + else if (len > new_len) + return OPUS_BAD_ARG; + amount = new_len - len; + /* Seek to last stream */ + for (s=0;s<nb_streams-1;s++) + { + if (len<=0) + return OPUS_INVALID_PACKET; + count = opus_packet_parse_impl(data, len, 1, &toc, NULL, + size, NULL, &packet_offset); + if (count<0) + return count; + data += packet_offset; + len -= packet_offset; + } + return opus_packet_pad(data, len, len+amount); +} + +opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, int nb_streams) +{ + int s; + unsigned char toc; + opus_int16 size[48]; + opus_int32 packet_offset; + OpusRepacketizer rp; + unsigned char *dst; + opus_int32 dst_len; + + if (len < 1) + return OPUS_BAD_ARG; + dst = data; + dst_len = 0; + /* Unpad all frames */ + for (s=0;s<nb_streams;s++) + { + opus_int32 ret; + int self_delimited = s!=nb_streams-1; + if (len<=0) + return OPUS_INVALID_PACKET; + opus_repacketizer_init(&rp); + ret = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL, + size, NULL, &packet_offset); + if (ret<0) + return ret; + ret = opus_repacketizer_cat_impl(&rp, data, packet_offset, self_delimited); + if (ret < 0) + return ret; + ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, dst, len, self_delimited, 0); + if (ret < 0) + return ret; + else + dst_len += ret; + dst += ret; + data += packet_offset; + len -= packet_offset; + } + return dst_len; +} + diff --git a/drivers/opus/repacketizer_demo.c b/drivers/opus/repacketizer_demo.c new file mode 100644 index 0000000000..1ca9cc3c96 --- /dev/null +++ b/drivers/opus/repacketizer_demo.c @@ -0,0 +1,217 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "opus.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define MAX_PACKETOUT 32000 + +void usage(char *argv0) +{ + fprintf(stderr, "usage: %s [options] input_file output_file\n", argv0); +} + +static void int_to_char(opus_uint32 i, unsigned char ch[4]) +{ + ch[0] = i>>24; + ch[1] = (i>>16)&0xFF; + ch[2] = (i>>8)&0xFF; + ch[3] = i&0xFF; +} + +static opus_uint32 char_to_int(unsigned char ch[4]) +{ + return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16) + | ((opus_uint32)ch[2]<< 8) | (opus_uint32)ch[3]; +} + +int main(int argc, char *argv[]) +{ + int i, eof=0; + FILE *fin, *fout; + unsigned char packets[48][1500]; + int len[48]; + int rng[48]; + OpusRepacketizer *rp; + unsigned char output_packet[MAX_PACKETOUT]; + int merge = 1, split=0; + + if (argc < 3) + { + usage(argv[0]); + return EXIT_FAILURE; + } + for (i=1;i<argc-2;i++) + { + if (strcmp(argv[i], "-merge")==0) + { + merge = atoi(argv[i+1]); + if(merge<1) + { + fprintf(stderr, "-merge parameter must be at least 1.\n"); + return EXIT_FAILURE; + } + if(merge>48) + { + fprintf(stderr, "-merge parameter must be less than 48.\n"); + return EXIT_FAILURE; + } + i++; + } else if (strcmp(argv[i], "-split")==0) + split = 1; + else + { + fprintf(stderr, "Unknown option: %s\n", argv[i]); + usage(argv[0]); + return EXIT_FAILURE; + } + } + fin = fopen(argv[argc-2], "r"); + if(fin==NULL) + { + fprintf(stderr, "Error opening input file: %s\n", argv[argc-2]); + return EXIT_FAILURE; + } + fout = fopen(argv[argc-1], "w"); + if(fout==NULL) + { + fprintf(stderr, "Error opening output file: %s\n", argv[argc-1]); + fclose(fin); + return EXIT_FAILURE; + } + + rp = opus_repacketizer_create(); + while (!eof) + { + int err; + int nb_packets=merge; + opus_repacketizer_init(rp); + for (i=0;i<nb_packets;i++) + { + unsigned char ch[4]; + err = fread(ch, 1, 4, fin); + len[i] = char_to_int(ch); + /*fprintf(stderr, "in len = %d\n", len[i]);*/ + if (len[i]>1500 || len[i]<0) + { + if (feof(fin)) + { + eof = 1; + } else { + fprintf(stderr, "Invalid payload length\n"); + fclose(fin); + fclose(fout); + return EXIT_FAILURE; + } + break; + } + err = fread(ch, 1, 4, fin); + rng[i] = char_to_int(ch); + err = fread(packets[i], 1, len[i], fin); + if (feof(fin)) + { + eof = 1; + break; + } + err = opus_repacketizer_cat(rp, packets[i], len[i]); + if (err!=OPUS_OK) + { + fprintf(stderr, "opus_repacketizer_cat() failed: %s\n", opus_strerror(err)); + break; + } + } + nb_packets = i; + + if (eof) + break; + + if (!split) + { + err = opus_repacketizer_out(rp, output_packet, MAX_PACKETOUT); + if (err>0) { + unsigned char int_field[4]; + int_to_char(err, int_field); + if(fwrite(int_field, 1, 4, fout)!=4){ + fprintf(stderr, "Error writing.\n"); + return EXIT_FAILURE; + } + int_to_char(rng[nb_packets-1], int_field); + if (fwrite(int_field, 1, 4, fout)!=4) { + fprintf(stderr, "Error writing.\n"); + return EXIT_FAILURE; + } + if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) { + fprintf(stderr, "Error writing.\n"); + return EXIT_FAILURE; + } + /*fprintf(stderr, "out len = %d\n", err);*/ + } else { + fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err)); + } + } else { + int nb_frames = opus_repacketizer_get_nb_frames(rp); + for (i=0;i<nb_frames;i++) + { + err = opus_repacketizer_out_range(rp, i, i+1, output_packet, MAX_PACKETOUT); + if (err>0) { + unsigned char int_field[4]; + int_to_char(err, int_field); + if (fwrite(int_field, 1, 4, fout)!=4) { + fprintf(stderr, "Error writing.\n"); + return EXIT_FAILURE; + } + if (i==nb_frames-1) + int_to_char(rng[nb_packets-1], int_field); + else + int_to_char(0, int_field); + if (fwrite(int_field, 1, 4, fout)!=4) { + fprintf(stderr, "Error writing.\n"); + return EXIT_FAILURE; + } + if (fwrite(output_packet, 1, err, fout)!=(unsigned)err) { + fprintf(stderr, "Error writing.\n"); + return EXIT_FAILURE; + } + /*fprintf(stderr, "out len = %d\n", err);*/ + } else { + fprintf(stderr, "opus_repacketizer_out() failed: %s\n", opus_strerror(err)); + } + + } + } + } + + fclose(fin); + fclose(fout); + return EXIT_SUCCESS; +} diff --git a/drivers/opus/silk/A2NLSF.c b/drivers/opus/silk/A2NLSF.c new file mode 100644 index 0000000000..cec53a5cd8 --- /dev/null +++ b/drivers/opus/silk/A2NLSF.c @@ -0,0 +1,252 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +/* Conversion between prediction filter coefficients and NLSFs */ +/* Requires the order to be an even number */ +/* A piecewise linear approximation maps LSF <-> cos(LSF) */ +/* Therefore the result is not accurate NLSFs, but the two */ +/* functions are accurate inverses of each other */ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "tables.h" + +/* Number of binary divisions, when not in low complexity mode */ +#define BIN_DIV_STEPS_A2NLSF_FIX 3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */ +#define MAX_ITERATIONS_A2NLSF_FIX 30 + +/* Helper function for A2NLSF(..) */ +/* Transforms polynomials from cos(n*f) to cos(f)^n */ +static OPUS_INLINE void silk_A2NLSF_trans_poly( + opus_int32 *p, /* I/O Polynomial */ + const opus_int dd /* I Polynomial order (= filter order / 2 ) */ +) +{ + opus_int k, n; + + for( k = 2; k <= dd; k++ ) { + for( n = dd; n > k; n-- ) { + p[ n - 2 ] -= p[ n ]; + } + p[ k - 2 ] -= silk_LSHIFT( p[ k ], 1 ); + } +} +/* Helper function for A2NLSF(..) */ +/* Polynomial evaluation */ +static OPUS_INLINE opus_int32 silk_A2NLSF_eval_poly( /* return the polynomial evaluation, in Q16 */ + opus_int32 *p, /* I Polynomial, Q16 */ + const opus_int32 x, /* I Evaluation point, Q12 */ + const opus_int dd /* I Order */ +) +{ + opus_int n; + opus_int32 x_Q16, y32; + + y32 = p[ dd ]; /* Q16 */ + x_Q16 = silk_LSHIFT( x, 4 ); + for( n = dd - 1; n >= 0; n-- ) { + y32 = silk_SMLAWW( p[ n ], y32, x_Q16 ); /* Q16 */ + } + return y32; +} + +static OPUS_INLINE void silk_A2NLSF_init( + const opus_int32 *a_Q16, + opus_int32 *P, + opus_int32 *Q, + const opus_int dd +) +{ + opus_int k; + + /* Convert filter coefs to even and odd polynomials */ + P[dd] = silk_LSHIFT( 1, 16 ); + Q[dd] = silk_LSHIFT( 1, 16 ); + for( k = 0; k < dd; k++ ) { + P[ k ] = -a_Q16[ dd - k - 1 ] - a_Q16[ dd + k ]; /* Q16 */ + Q[ k ] = -a_Q16[ dd - k - 1 ] + a_Q16[ dd + k ]; /* Q16 */ + } + + /* Divide out zeros as we have that for even filter orders, */ + /* z = 1 is always a root in Q, and */ + /* z = -1 is always a root in P */ + for( k = dd; k > 0; k-- ) { + P[ k - 1 ] -= P[ k ]; + Q[ k - 1 ] += Q[ k ]; + } + + /* Transform polynomials from cos(n*f) to cos(f)^n */ + silk_A2NLSF_trans_poly( P, dd ); + silk_A2NLSF_trans_poly( Q, dd ); +} + +/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */ +/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */ +void silk_A2NLSF( + opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */ + opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */ + const opus_int d /* I Filter order (must be even) */ +) +{ + opus_int i, k, m, dd, root_ix, ffrac; + opus_int32 xlo, xhi, xmid; + opus_int32 ylo, yhi, ymid, thr; + opus_int32 nom, den; + opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ]; + opus_int32 Q[ SILK_MAX_ORDER_LPC / 2 + 1 ]; + opus_int32 *PQ[ 2 ]; + opus_int32 *p; + + /* Store pointers to array */ + PQ[ 0 ] = P; + PQ[ 1 ] = Q; + + dd = silk_RSHIFT( d, 1 ); + + silk_A2NLSF_init( a_Q16, P, Q, dd ); + + /* Find roots, alternating between P and Q */ + p = P; /* Pointer to polynomial */ + + xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/ + ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); + + if( ylo < 0 ) { + /* Set the first NLSF to zero and move on to the next */ + NLSF[ 0 ] = 0; + p = Q; /* Pointer to polynomial */ + ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); + root_ix = 1; /* Index of current root */ + } else { + root_ix = 0; /* Index of current root */ + } + k = 1; /* Loop counter */ + i = 0; /* Counter for bandwidth expansions applied */ + thr = 0; + while( 1 ) { + /* Evaluate polynomial */ + xhi = silk_LSFCosTab_FIX_Q12[ k ]; /* Q12 */ + yhi = silk_A2NLSF_eval_poly( p, xhi, dd ); + + /* Detect zero crossing */ + if( ( ylo <= 0 && yhi >= thr ) || ( ylo >= 0 && yhi <= -thr ) ) { + if( yhi == 0 ) { + /* If the root lies exactly at the end of the current */ + /* interval, look for the next root in the next interval */ + thr = 1; + } else { + thr = 0; + } + /* Binary division */ + ffrac = -256; + for( m = 0; m < BIN_DIV_STEPS_A2NLSF_FIX; m++ ) { + /* Evaluate polynomial */ + xmid = silk_RSHIFT_ROUND( xlo + xhi, 1 ); + ymid = silk_A2NLSF_eval_poly( p, xmid, dd ); + + /* Detect zero crossing */ + if( ( ylo <= 0 && ymid >= 0 ) || ( ylo >= 0 && ymid <= 0 ) ) { + /* Reduce frequency */ + xhi = xmid; + yhi = ymid; + } else { + /* Increase frequency */ + xlo = xmid; + ylo = ymid; + ffrac = silk_ADD_RSHIFT( ffrac, 128, m ); + } + } + + /* Interpolate */ + if( silk_abs( ylo ) < 65536 ) { + /* Avoid dividing by zero */ + den = ylo - yhi; + nom = silk_LSHIFT( ylo, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) + silk_RSHIFT( den, 1 ); + if( den != 0 ) { + ffrac += silk_DIV32( nom, den ); + } + } else { + /* No risk of dividing by zero because abs(ylo - yhi) >= abs(ylo) >= 65536 */ + ffrac += silk_DIV32( ylo, silk_RSHIFT( ylo - yhi, 8 - BIN_DIV_STEPS_A2NLSF_FIX ) ); + } + NLSF[ root_ix ] = (opus_int16)silk_min_32( silk_LSHIFT( (opus_int32)k, 8 ) + ffrac, silk_int16_MAX ); + + silk_assert( NLSF[ root_ix ] >= 0 ); + + root_ix++; /* Next root */ + if( root_ix >= d ) { + /* Found all roots */ + break; + } + /* Alternate pointer to polynomial */ + p = PQ[ root_ix & 1 ]; + + /* Evaluate polynomial */ + xlo = silk_LSFCosTab_FIX_Q12[ k - 1 ]; /* Q12*/ + ylo = silk_LSHIFT( 1 - ( root_ix & 2 ), 12 ); + } else { + /* Increment loop counter */ + k++; + xlo = xhi; + ylo = yhi; + thr = 0; + + if( k > LSF_COS_TAB_SZ_FIX ) { + i++; + if( i > MAX_ITERATIONS_A2NLSF_FIX ) { + /* Set NLSFs to white spectrum and exit */ + NLSF[ 0 ] = (opus_int16)silk_DIV32_16( 1 << 15, d + 1 ); + for( k = 1; k < d; k++ ) { + NLSF[ k ] = (opus_int16)silk_SMULBB( k + 1, NLSF[ 0 ] ); + } + return; + } + + /* Error: Apply progressively more bandwidth expansion and run again */ + silk_bwexpander_32( a_Q16, d, 65536 - silk_SMULBB( 10 + i, i ) ); /* 10_Q16 = 0.00015*/ + + silk_A2NLSF_init( a_Q16, P, Q, dd ); + p = P; /* Pointer to polynomial */ + xlo = silk_LSFCosTab_FIX_Q12[ 0 ]; /* Q12*/ + ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); + if( ylo < 0 ) { + /* Set the first NLSF to zero and move on to the next */ + NLSF[ 0 ] = 0; + p = Q; /* Pointer to polynomial */ + ylo = silk_A2NLSF_eval_poly( p, xlo, dd ); + root_ix = 1; /* Index of current root */ + } else { + root_ix = 0; /* Index of current root */ + } + k = 1; /* Reset loop counter */ + } + } + } +} diff --git a/drivers/opus/silk/API.h b/drivers/opus/silk/API.h new file mode 100644 index 0000000000..f0601bcf6b --- /dev/null +++ b/drivers/opus/silk/API.h @@ -0,0 +1,133 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_API_H +#define SILK_API_H + +#include "control.h" +#include "typedef.h" +#include "errors.h" +#include "entenc.h" +#include "entdec.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define SILK_MAX_FRAMES_PER_PACKET 3 + +/* Struct for TOC (Table of Contents) */ +typedef struct { + opus_int VADFlag; /* Voice activity for packet */ + opus_int VADFlags[ SILK_MAX_FRAMES_PER_PACKET ]; /* Voice activity for each frame in packet */ + opus_int inbandFECFlag; /* Flag indicating if packet contains in-band FEC */ +} silk_TOC_struct; + +/****************************************/ +/* Encoder functions */ +/****************************************/ + +/***********************************************/ +/* Get size in bytes of the Silk encoder state */ +/***********************************************/ +opus_int silk_Get_Encoder_Size( /* O Returns error code */ + opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */ +); + +/*************************/ +/* Init or reset encoder */ +/*************************/ +opus_int silk_InitEncoder( /* O Returns error code */ + void *encState, /* I/O State */ + int arch, /* I Run-time architecture */ + silk_EncControlStruct *encStatus /* O Encoder Status */ +); + +/**************************/ +/* Encode frame with Silk */ +/**************************/ +/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */ +/* encControl->payloadSize_ms is set to */ +opus_int silk_Encode( /* O Returns error code */ + void *encState, /* I/O State */ + silk_EncControlStruct *encControl, /* I Control status */ + const opus_int16 *samplesIn, /* I Speech sample input vector */ + opus_int nSamplesIn, /* I Number of samples in input vector */ + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ + const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */ +); + +/****************************************/ +/* Decoder functions */ +/****************************************/ + +/***********************************************/ +/* Get size in bytes of the Silk decoder state */ +/***********************************************/ +opus_int silk_Get_Decoder_Size( /* O Returns error code */ + opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ +); + +/*************************/ +/* Init or Reset decoder */ +/*************************/ +opus_int silk_InitDecoder( /* O Returns error code */ + void *decState /* I/O State */ +); + +/******************/ +/* Decode a frame */ +/******************/ +opus_int silk_Decode( /* O Returns error code */ + void* decState, /* I/O State */ + silk_DecControlStruct* decControl, /* I/O Control Structure */ + opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ + opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int16 *samplesOut, /* O Decoded output speech vector */ + opus_int32 *nSamplesOut /* O Number of samples decoded */ +); + +#if 0 +/**************************************/ +/* Get table of contents for a packet */ +/**************************************/ +opus_int silk_get_TOC( + const opus_uint8 *payload, /* I Payload data */ + const opus_int nBytesIn, /* I Number of input bytes */ + const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ + silk_TOC_struct *Silk_TOC /* O Type of content */ +); +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/opus/silk/CNG.c b/drivers/opus/silk/CNG.c new file mode 100644 index 0000000000..8b8dbf882c --- /dev/null +++ b/drivers/opus/silk/CNG.c @@ -0,0 +1,172 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "stack_alloc.h" + +/* Generates excitation for CNG LPC synthesis */ +static OPUS_INLINE void silk_CNG_exc( + opus_int32 residual_Q10[], /* O CNG residual signal Q10 */ + opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */ + opus_int32 Gain_Q16, /* I Gain to apply */ + opus_int length, /* I Length */ + opus_int32 *rand_seed /* I/O Seed to random index generator */ +) +{ + opus_int32 seed; + opus_int i, idx, exc_mask; + + exc_mask = CNG_BUF_MASK_MAX; + while( exc_mask > length ) { + exc_mask = silk_RSHIFT( exc_mask, 1 ); + } + + seed = *rand_seed; + for( i = 0; i < length; i++ ) { + seed = silk_RAND( seed ); + idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask ); + silk_assert( idx >= 0 ); + silk_assert( idx <= CNG_BUF_MASK_MAX ); + residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); + } + *rand_seed = seed; +} + +void silk_CNG_Reset( + silk_decoder_state *psDec /* I/O Decoder state */ +) +{ + opus_int i, NLSF_step_Q15, NLSF_acc_Q15; + + NLSF_step_Q15 = silk_DIV32_16( silk_int16_MAX, psDec->LPC_order + 1 ); + NLSF_acc_Q15 = 0; + for( i = 0; i < psDec->LPC_order; i++ ) { + NLSF_acc_Q15 += NLSF_step_Q15; + psDec->sCNG.CNG_smth_NLSF_Q15[ i ] = NLSF_acc_Q15; + } + psDec->sCNG.CNG_smth_Gain_Q16 = 0; + psDec->sCNG.rand_seed = 3176576; +} + +/* Updates CNG estimate, and applies the CNG when packet was lost */ +void silk_CNG( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I/O Decoder control */ + opus_int16 frame[], /* I/O Signal */ + opus_int length /* I Length of residual */ +) +{ + opus_int i, subfr; + opus_int32 sum_Q6, max_Gain_Q16; + opus_int16 A_Q12[ MAX_LPC_ORDER ]; + silk_CNG_struct *psCNG = &psDec->sCNG; + SAVE_STACK; + + if( psDec->fs_kHz != psCNG->fs_kHz ) { + /* Reset state */ + silk_CNG_Reset( psDec ); + + psCNG->fs_kHz = psDec->fs_kHz; + } + if( psDec->lossCnt == 0 && psDec->prevSignalType == TYPE_NO_VOICE_ACTIVITY ) { + /* Update CNG parameters */ + + /* Smoothing of LSF's */ + for( i = 0; i < psDec->LPC_order; i++ ) { + psCNG->CNG_smth_NLSF_Q15[ i ] += silk_SMULWB( (opus_int32)psDec->prevNLSF_Q15[ i ] - (opus_int32)psCNG->CNG_smth_NLSF_Q15[ i ], CNG_NLSF_SMTH_Q16 ); + } + /* Find the subframe with the highest gain */ + max_Gain_Q16 = 0; + subfr = 0; + for( i = 0; i < psDec->nb_subfr; i++ ) { + if( psDecCtrl->Gains_Q16[ i ] > max_Gain_Q16 ) { + max_Gain_Q16 = psDecCtrl->Gains_Q16[ i ]; + subfr = i; + } + } + /* Update CNG excitation buffer with excitation from this subframe */ + silk_memmove( &psCNG->CNG_exc_buf_Q14[ psDec->subfr_length ], psCNG->CNG_exc_buf_Q14, ( psDec->nb_subfr - 1 ) * psDec->subfr_length * sizeof( opus_int32 ) ); + silk_memcpy( psCNG->CNG_exc_buf_Q14, &psDec->exc_Q14[ subfr * psDec->subfr_length ], psDec->subfr_length * sizeof( opus_int32 ) ); + + /* Smooth gains */ + for( i = 0; i < psDec->nb_subfr; i++ ) { + psCNG->CNG_smth_Gain_Q16 += silk_SMULWB( psDecCtrl->Gains_Q16[ i ] - psCNG->CNG_smth_Gain_Q16, CNG_GAIN_SMTH_Q16 ); + } + } + + /* Add CNG when packet is lost or during DTX */ + if( psDec->lossCnt ) { + VARDECL( opus_int32, CNG_sig_Q10 ); + + ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 ); + + /* Generate CNG excitation */ + silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed ); + + /* Convert CNG NLSF to filter representation */ + silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); + + /* Generate CNG signal, by synthesis filtering */ + silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) ); + for( i = 0; i < length; i++ ) { + silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] ); + if( psDec->LPC_order == 16 ) { + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] ); + sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] ); + } + + /* Update states */ + CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 ); + + frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) ); + } + silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); + } else { + silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( opus_int32 ) ); + } + RESTORE_STACK; +} diff --git a/drivers/opus/silk/HP_variable_cutoff.c b/drivers/opus/silk/HP_variable_cutoff.c new file mode 100644 index 0000000000..379752bb19 --- /dev/null +++ b/drivers/opus/silk/HP_variable_cutoff.c @@ -0,0 +1,77 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif +#ifdef OPUS_FIXED_POINT +#include "main_FIX.h" +#else +#include "main_FLP.h" +#endif +#include "tuning_parameters.h" + +/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ +void silk_HP_variable_cutoff( + silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */ +) +{ + opus_int quality_Q15; + opus_int32 pitch_freq_Hz_Q16, pitch_freq_log_Q7, delta_freq_Q7; + silk_encoder_state *psEncC1 = &state_Fxx[ 0 ].sCmn; + + /* Adaptive cutoff frequency: estimate low end of pitch frequency range */ + if( psEncC1->prevSignalType == TYPE_VOICED ) { + /* difference, in log domain */ + pitch_freq_Hz_Q16 = silk_DIV32_16( silk_LSHIFT( silk_MUL( psEncC1->fs_kHz, 1000 ), 16 ), psEncC1->prevLag ); + pitch_freq_log_Q7 = silk_lin2log( pitch_freq_Hz_Q16 ) - ( 16 << 7 ); + + /* adjustment based on quality */ + quality_Q15 = psEncC1->input_quality_bands_Q15[ 0 ]; + pitch_freq_log_Q7 = silk_SMLAWB( pitch_freq_log_Q7, silk_SMULWB( silk_LSHIFT( -quality_Q15, 2 ), quality_Q15 ), + pitch_freq_log_Q7 - ( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ) ) ); + + /* delta_freq = pitch_freq_log - psEnc->variable_HP_smth1; */ + delta_freq_Q7 = pitch_freq_log_Q7 - silk_RSHIFT( psEncC1->variable_HP_smth1_Q15, 8 ); + if( delta_freq_Q7 < 0 ) { + /* less smoothing for decreasing pitch frequency, to track something close to the minimum */ + delta_freq_Q7 = silk_MUL( delta_freq_Q7, 3 ); + } + + /* limit delta, to reduce impact of outliers in pitch estimation */ + delta_freq_Q7 = silk_LIMIT_32( delta_freq_Q7, -SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ), SILK_FIX_CONST( VARIABLE_HP_MAX_DELTA_FREQ, 7 ) ); + + /* update smoother */ + psEncC1->variable_HP_smth1_Q15 = silk_SMLAWB( psEncC1->variable_HP_smth1_Q15, + silk_SMULBB( psEncC1->speech_activity_Q8, delta_freq_Q7 ), SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF1, 16 ) ); + + /* limit frequency range */ + psEncC1->variable_HP_smth1_Q15 = silk_LIMIT_32( psEncC1->variable_HP_smth1_Q15, + silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ), + silk_LSHIFT( silk_lin2log( VARIABLE_HP_MAX_CUTOFF_HZ ), 8 ) ); + } +} diff --git a/drivers/opus/silk/Inlines.h b/drivers/opus/silk/Inlines.h new file mode 100644 index 0000000000..ec986cdfdd --- /dev/null +++ b/drivers/opus/silk/Inlines.h @@ -0,0 +1,188 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +/*! \file silk_Inlines.h + * \brief silk_Inlines.h defines OPUS_INLINE signal processing functions. + */ + +#ifndef SILK_FIX_INLINES_H +#define SILK_FIX_INLINES_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* count leading zeros of opus_int64 */ +static OPUS_INLINE opus_int32 silk_CLZ64( opus_int64 in ) +{ + opus_int32 in_upper; + + in_upper = (opus_int32)silk_RSHIFT64(in, 32); + if (in_upper == 0) { + /* Search in the lower 32 bits */ + return 32 + silk_CLZ32( (opus_int32) in ); + } else { + /* Search in the upper 32 bits */ + return silk_CLZ32( in_upper ); + } +} + +/* get number of leading zeros and fractional part (the bits right after the leading one */ +static OPUS_INLINE void silk_CLZ_FRAC( + opus_int32 in, /* I input */ + opus_int32 *lz, /* O number of leading zeros */ + opus_int32 *frac_Q7 /* O the 7 bits right after the leading one */ +) +{ + opus_int32 lzeros = silk_CLZ32(in); + + * lz = lzeros; + * frac_Q7 = silk_ROR32(in, 24 - lzeros) & 0x7f; +} + +/* Approximation of square root */ +/* Accuracy: < +/- 10% for output values > 15 */ +/* < +/- 2.5% for output values > 120 */ +static OPUS_INLINE opus_int32 silk_SQRT_APPROX( opus_int32 x ) +{ + opus_int32 y, lz, frac_Q7; + + if( x <= 0 ) { + return 0; + } + + silk_CLZ_FRAC(x, &lz, &frac_Q7); + + if( lz & 1 ) { + y = 32768; + } else { + y = 46214; /* 46214 = sqrt(2) * 32768 */ + } + + /* get scaling right */ + y >>= silk_RSHIFT(lz, 1); + + /* increment using fractional part of input */ + y = silk_SMLAWB(y, y, silk_SMULBB(213, frac_Q7)); + + return y; +} + +/* Divide two int32 values and return result as int32 in a given Q-domain */ +static OPUS_INLINE opus_int32 silk_DIV32_varQ( /* O returns a good approximation of "(a32 << Qres) / b32" */ + const opus_int32 a32, /* I numerator (Q0) */ + const opus_int32 b32, /* I denominator (Q0) */ + const opus_int Qres /* I Q-domain of result (>= 0) */ +) +{ + opus_int a_headrm, b_headrm, lshift; + opus_int32 b32_inv, a32_nrm, b32_nrm, result; + + silk_assert( b32 != 0 ); + silk_assert( Qres >= 0 ); + + /* Compute number of bits head room and normalize inputs */ + a_headrm = silk_CLZ32( silk_abs(a32) ) - 1; + a32_nrm = silk_LSHIFT(a32, a_headrm); /* Q: a_headrm */ + b_headrm = silk_CLZ32( silk_abs(b32) ) - 1; + b32_nrm = silk_LSHIFT(b32, b_headrm); /* Q: b_headrm */ + + /* Inverse of b32, with 14 bits of precision */ + b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) ); /* Q: 29 + 16 - b_headrm */ + + /* First approximation */ + result = silk_SMULWB(a32_nrm, b32_inv); /* Q: 29 + a_headrm - b_headrm */ + + /* Compute residual by subtracting product of denominator and first approximation */ + /* It's OK to overflow because the final value of a32_nrm should always be small */ + a32_nrm = silk_SUB32_ovflw(a32_nrm, silk_LSHIFT_ovflw( silk_SMMUL(b32_nrm, result), 3 )); /* Q: a_headrm */ + + /* Refinement */ + result = silk_SMLAWB(result, a32_nrm, b32_inv); /* Q: 29 + a_headrm - b_headrm */ + + /* Convert to Qres domain */ + lshift = 29 + a_headrm - b_headrm - Qres; + if( lshift < 0 ) { + return silk_LSHIFT_SAT32(result, -lshift); + } else { + if( lshift < 32){ + return silk_RSHIFT(result, lshift); + } else { + /* Avoid undefined result */ + return 0; + } + } +} + +/* Invert int32 value and return result as int32 in a given Q-domain */ +static OPUS_INLINE opus_int32 silk_INVERSE32_varQ( /* O returns a good approximation of "(1 << Qres) / b32" */ + const opus_int32 b32, /* I denominator (Q0) */ + const opus_int Qres /* I Q-domain of result (> 0) */ +) +{ + opus_int b_headrm, lshift; + opus_int32 b32_inv, b32_nrm, err_Q32, result; + + silk_assert( b32 != 0 ); + silk_assert( Qres > 0 ); + + /* Compute number of bits head room and normalize input */ + b_headrm = silk_CLZ32( silk_abs(b32) ) - 1; + b32_nrm = silk_LSHIFT(b32, b_headrm); /* Q: b_headrm */ + + /* Inverse of b32, with 14 bits of precision */ + b32_inv = silk_DIV32_16( silk_int32_MAX >> 2, silk_RSHIFT(b32_nrm, 16) ); /* Q: 29 + 16 - b_headrm */ + + /* First approximation */ + result = silk_LSHIFT(b32_inv, 16); /* Q: 61 - b_headrm */ + + /* Compute residual by subtracting product of denominator and first approximation from one */ + err_Q32 = silk_LSHIFT( ((opus_int32)1<<29) - silk_SMULWB(b32_nrm, b32_inv), 3 ); /* Q32 */ + + /* Refinement */ + result = silk_SMLAWW(result, err_Q32, b32_inv); /* Q: 61 - b_headrm */ + + /* Convert to Qres domain */ + lshift = 61 - b_headrm - Qres; + if( lshift <= 0 ) { + return silk_LSHIFT_SAT32(result, -lshift); + } else { + if( lshift < 32){ + return silk_RSHIFT(result, lshift); + }else{ + /* Avoid undefined result */ + return 0; + } + } +} + +#ifdef __cplusplus +} +#endif + +#endif /* SILK_FIX_INLINES_H */ diff --git a/drivers/opus/silk/LPC_analysis_filter.c b/drivers/opus/silk/LPC_analysis_filter.c new file mode 100644 index 0000000000..98ef509e4e --- /dev/null +++ b/drivers/opus/silk/LPC_analysis_filter.c @@ -0,0 +1,106 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "celt_lpc.h" + +/*******************************************/ +/* LPC analysis filter */ +/* NB! State is kept internally and the */ +/* filter always starts with zero state */ +/* first d output samples are set to zero */ +/*******************************************/ + +void silk_LPC_analysis_filter( + opus_int16 *out, /* O Output signal */ + const opus_int16 *in, /* I Input signal */ + const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */ + const opus_int32 len, /* I Signal length */ + const opus_int32 d /* I Filter order */ +) +{ + opus_int j; +#ifdef OPUS_FIXED_POINT + opus_int16 mem[SILK_MAX_ORDER_LPC]; + opus_int16 num[SILK_MAX_ORDER_LPC]; +#else + int ix; + opus_int32 out32_Q12, out32; + const opus_int16 *in_ptr; +#endif + + silk_assert( d >= 6 ); + silk_assert( (d & 1) == 0 ); + silk_assert( d <= len ); + +#ifdef OPUS_FIXED_POINT + silk_assert( d <= SILK_MAX_ORDER_LPC ); + for ( j = 0; j < d; j++ ) { + num[ j ] = -B[ j ]; + } + for (j=0;j<d;j++) { + mem[ j ] = in[ d - j - 1 ]; + } + celt_fir( in + d, num, out + d, len - d, d, mem ); + for ( j = 0; j < d; j++ ) { + out[ j ] = 0; + } +#else + for( ix = d; ix < len; ix++ ) { + in_ptr = &in[ ix - 1 ]; + + out32_Q12 = silk_SMULBB( in_ptr[ 0 ], B[ 0 ] ); + /* Allowing wrap around so that two wraps can cancel each other. The rare + cases where the result wraps around can only be triggered by invalid streams*/ + out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -1 ], B[ 1 ] ); + out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -2 ], B[ 2 ] ); + out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -3 ], B[ 3 ] ); + out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -4 ], B[ 4 ] ); + out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -5 ], B[ 5 ] ); + for( j = 6; j < d; j += 2 ) { + out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -j ], B[ j ] ); + out32_Q12 = silk_SMLABB_ovflw( out32_Q12, in_ptr[ -j - 1 ], B[ j + 1 ] ); + } + + /* Subtract prediction */ + out32_Q12 = silk_SUB32_ovflw( silk_LSHIFT( (opus_int32)in_ptr[ 1 ], 12 ), out32_Q12 ); + + /* Scale to Q0 */ + out32 = silk_RSHIFT_ROUND( out32_Q12, 12 ); + + /* Saturate output */ + out[ ix ] = (opus_int16)silk_SAT16( out32 ); + } + + /* Set first d output samples to zero */ + silk_memset( out, 0, d * sizeof( opus_int16 ) ); +#endif +} diff --git a/drivers/opus/silk/LPC_inv_pred_gain.c b/drivers/opus/silk/LPC_inv_pred_gain.c new file mode 100644 index 0000000000..6dc9a49861 --- /dev/null +++ b/drivers/opus/silk/LPC_inv_pred_gain.c @@ -0,0 +1,154 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +#define QA 24 +#define A_LIMIT SILK_FIX_CONST( 0.99975, QA ) + +#define MUL32_FRAC_Q(a32, b32, Q) ((opus_int32)(silk_RSHIFT_ROUND64(silk_SMULL(a32, b32), Q))) + +/* Compute inverse of LPC prediction gain, and */ +/* test if LPC coefficients are stable (all poles within unit circle) */ +static opus_int32 LPC_inverse_pred_gain_QA( /* O Returns inverse prediction gain in energy domain, Q30 */ + opus_int32 A_QA[ 2 ][ SILK_MAX_ORDER_LPC ], /* I Prediction coefficients */ + const opus_int order /* I Prediction order */ +) +{ + opus_int k, n, mult2Q; + opus_int32 invGain_Q30, rc_Q31, rc_mult1_Q30, rc_mult2, tmp_QA; + opus_int32 *Aold_QA, *Anew_QA; + + Anew_QA = A_QA[ order & 1 ]; + + invGain_Q30 = (opus_int32)1 << 30; + for( k = order - 1; k > 0; k-- ) { + /* Check for stability */ + if( ( Anew_QA[ k ] > A_LIMIT ) || ( Anew_QA[ k ] < -A_LIMIT ) ) { + return 0; + } + + /* Set RC equal to negated AR coef */ + rc_Q31 = -silk_LSHIFT( Anew_QA[ k ], 31 - QA ); + + /* rc_mult1_Q30 range: [ 1 : 2^30 ] */ + rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); + silk_assert( rc_mult1_Q30 > ( 1 << 15 ) ); /* reduce A_LIMIT if fails */ + silk_assert( rc_mult1_Q30 <= ( 1 << 30 ) ); + + /* rc_mult2 range: [ 2^30 : silk_int32_MAX ] */ + mult2Q = 32 - silk_CLZ32( silk_abs( rc_mult1_Q30 ) ); + rc_mult2 = silk_INVERSE32_varQ( rc_mult1_Q30, mult2Q + 30 ); + + /* Update inverse gain */ + /* invGain_Q30 range: [ 0 : 2^30 ] */ + invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); + silk_assert( invGain_Q30 >= 0 ); + silk_assert( invGain_Q30 <= ( 1 << 30 ) ); + + /* Swap pointers */ + Aold_QA = Anew_QA; + Anew_QA = A_QA[ k & 1 ]; + + /* Update AR coefficient */ + for( n = 0; n < k; n++ ) { + tmp_QA = Aold_QA[ n ] - MUL32_FRAC_Q( Aold_QA[ k - n - 1 ], rc_Q31, 31 ); + Anew_QA[ n ] = MUL32_FRAC_Q( tmp_QA, rc_mult2 , mult2Q ); + } + } + + /* Check for stability */ + if( ( Anew_QA[ 0 ] > A_LIMIT ) || ( Anew_QA[ 0 ] < -A_LIMIT ) ) { + return 0; + } + + /* Set RC equal to negated AR coef */ + rc_Q31 = -silk_LSHIFT( Anew_QA[ 0 ], 31 - QA ); + + /* Range: [ 1 : 2^30 ] */ + rc_mult1_Q30 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); + + /* Update inverse gain */ + /* Range: [ 0 : 2^30 ] */ + invGain_Q30 = silk_LSHIFT( silk_SMMUL( invGain_Q30, rc_mult1_Q30 ), 2 ); + silk_assert( invGain_Q30 >= 0 ); + silk_assert( invGain_Q30 <= 1<<30 ); + + return invGain_Q30; +} + +/* For input in Q12 domain */ +opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */ + const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ + const opus_int order /* I Prediction order */ +) +{ + opus_int k; + opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ]; + opus_int32 *Anew_QA; + opus_int32 DC_resp = 0; + + Anew_QA = Atmp_QA[ order & 1 ]; + + /* Increase Q domain of the AR coefficients */ + for( k = 0; k < order; k++ ) { + DC_resp += (opus_int32)A_Q12[ k ]; + Anew_QA[ k ] = silk_LSHIFT32( (opus_int32)A_Q12[ k ], QA - 12 ); + } + /* If the DC is unstable, we don't even need to do the full calculations */ + if( DC_resp >= 4096 ) { + return 0; + } + return LPC_inverse_pred_gain_QA( Atmp_QA, order ); +} + +#ifdef OPUS_FIXED_POINT + +/* For input in Q24 domain */ +opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */ + const opus_int32 *A_Q24, /* I Prediction coefficients [order] */ + const opus_int order /* I Prediction order */ +) +{ + opus_int k; + opus_int32 Atmp_QA[ 2 ][ SILK_MAX_ORDER_LPC ]; + opus_int32 *Anew_QA; + + Anew_QA = Atmp_QA[ order & 1 ]; + + /* Increase Q domain of the AR coefficients */ + for( k = 0; k < order; k++ ) { + Anew_QA[ k ] = silk_RSHIFT32( A_Q24[ k ], 24 - QA ); + } + + return LPC_inverse_pred_gain_QA( Atmp_QA, order ); +} +#endif diff --git a/drivers/opus/silk/LP_variable_cutoff.c b/drivers/opus/silk/LP_variable_cutoff.c new file mode 100644 index 0000000000..098c19d34f --- /dev/null +++ b/drivers/opus/silk/LP_variable_cutoff.c @@ -0,0 +1,135 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/* + Elliptic/Cauer filters designed with 0.1 dB passband ripple, + 80 dB minimum stopband attenuation, and + [0.95 : 0.15 : 0.35] normalized cut off frequencies. +*/ + +#include "silk_main.h" + +/* Helper function, interpolates the filter taps */ +static OPUS_INLINE void silk_LP_interpolate_filter_taps( + opus_int32 B_Q28[ TRANSITION_NB ], + opus_int32 A_Q28[ TRANSITION_NA ], + const opus_int ind, + const opus_int32 fac_Q16 +) +{ + opus_int nb, na; + + if( ind < TRANSITION_INT_NUM - 1 ) { + if( fac_Q16 > 0 ) { + if( fac_Q16 < 32768 ) { /* fac_Q16 is in range of a 16-bit int */ + /* Piece-wise linear interpolation of B and A */ + for( nb = 0; nb < TRANSITION_NB; nb++ ) { + B_Q28[ nb ] = silk_SMLAWB( + silk_Transition_LP_B_Q28[ ind ][ nb ], + silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] - + silk_Transition_LP_B_Q28[ ind ][ nb ], + fac_Q16 ); + } + for( na = 0; na < TRANSITION_NA; na++ ) { + A_Q28[ na ] = silk_SMLAWB( + silk_Transition_LP_A_Q28[ ind ][ na ], + silk_Transition_LP_A_Q28[ ind + 1 ][ na ] - + silk_Transition_LP_A_Q28[ ind ][ na ], + fac_Q16 ); + } + } else { /* ( fac_Q16 - ( 1 << 16 ) ) is in range of a 16-bit int */ + silk_assert( fac_Q16 - ( 1 << 16 ) == silk_SAT16( fac_Q16 - ( 1 << 16 ) ) ); + /* Piece-wise linear interpolation of B and A */ + for( nb = 0; nb < TRANSITION_NB; nb++ ) { + B_Q28[ nb ] = silk_SMLAWB( + silk_Transition_LP_B_Q28[ ind + 1 ][ nb ], + silk_Transition_LP_B_Q28[ ind + 1 ][ nb ] - + silk_Transition_LP_B_Q28[ ind ][ nb ], + fac_Q16 - ( (opus_int32)1 << 16 ) ); + } + for( na = 0; na < TRANSITION_NA; na++ ) { + A_Q28[ na ] = silk_SMLAWB( + silk_Transition_LP_A_Q28[ ind + 1 ][ na ], + silk_Transition_LP_A_Q28[ ind + 1 ][ na ] - + silk_Transition_LP_A_Q28[ ind ][ na ], + fac_Q16 - ( (opus_int32)1 << 16 ) ); + } + } + } else { + silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ ind ], TRANSITION_NB * sizeof( opus_int32 ) ); + silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ ind ], TRANSITION_NA * sizeof( opus_int32 ) ); + } + } else { + silk_memcpy( B_Q28, silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NB * sizeof( opus_int32 ) ); + silk_memcpy( A_Q28, silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM - 1 ], TRANSITION_NA * sizeof( opus_int32 ) ); + } +} + +/* Low-pass filter with variable cutoff frequency based on */ +/* piece-wise linear interpolation between elliptic filters */ +/* Start by setting psEncC->mode <> 0; */ +/* Deactivate by setting psEncC->mode = 0; */ +void silk_LP_variable_cutoff( + silk_LP_state *psLP, /* I/O LP filter state */ + opus_int16 *frame, /* I/O Low-pass filtered output signal */ + const opus_int frame_length /* I Frame length */ +) +{ + opus_int32 B_Q28[ TRANSITION_NB ], A_Q28[ TRANSITION_NA ], fac_Q16 = 0; + opus_int ind = 0; + + silk_assert( psLP->transition_frame_no >= 0 && psLP->transition_frame_no <= TRANSITION_FRAMES ); + + /* Run filter if needed */ + if( psLP->mode != 0 ) { + /* Calculate index and interpolation factor for interpolation */ +#if( TRANSITION_INT_STEPS == 64 ) + fac_Q16 = silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 - 6 ); +#else + fac_Q16 = silk_DIV32_16( silk_LSHIFT( TRANSITION_FRAMES - psLP->transition_frame_no, 16 ), TRANSITION_FRAMES ); +#endif + ind = silk_RSHIFT( fac_Q16, 16 ); + fac_Q16 -= silk_LSHIFT( ind, 16 ); + + silk_assert( ind >= 0 ); + silk_assert( ind < TRANSITION_INT_NUM ); + + /* Interpolate filter coefficients */ + silk_LP_interpolate_filter_taps( B_Q28, A_Q28, ind, fac_Q16 ); + + /* Update transition frame number for next frame */ + psLP->transition_frame_no = silk_LIMIT( psLP->transition_frame_no + psLP->mode, 0, TRANSITION_FRAMES ); + + /* ARMA low-pass filtering */ + silk_assert( TRANSITION_NB == 3 && TRANSITION_NA == 2 ); + silk_biquad_alt( frame, B_Q28, A_Q28, psLP->In_LP_State, frame, frame_length, 1); + } +} diff --git a/drivers/opus/silk/MacroCount.h b/drivers/opus/silk/MacroCount.h new file mode 100644 index 0000000000..834817d058 --- /dev/null +++ b/drivers/opus/silk/MacroCount.h @@ -0,0 +1,718 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SIGPROCFIX_API_MACROCOUNT_H +#define SIGPROCFIX_API_MACROCOUNT_H +#include <stdio.h> + +#ifdef silk_MACRO_COUNT +#define varDefine opus_int64 ops_count = 0; + +extern opus_int64 ops_count; + +static OPUS_INLINE opus_int64 silk_SaveCount(){ + return(ops_count); +} + +static OPUS_INLINE opus_int64 silk_SaveResetCount(){ + opus_int64 ret; + + ret = ops_count; + ops_count = 0; + return(ret); +} + +static OPUS_INLINE silk_PrintCount(){ + printf("ops_count = %d \n ", (opus_int32)ops_count); +} + +#undef silk_MUL +static OPUS_INLINE opus_int32 silk_MUL(opus_int32 a32, opus_int32 b32){ + opus_int32 ret; + ops_count += 4; + ret = a32 * b32; + return ret; +} + +#undef silk_MUL_uint +static OPUS_INLINE opus_uint32 silk_MUL_uint(opus_uint32 a32, opus_uint32 b32){ + opus_uint32 ret; + ops_count += 4; + ret = a32 * b32; + return ret; +} +#undef silk_MLA +static OPUS_INLINE opus_int32 silk_MLA(opus_int32 a32, opus_int32 b32, opus_int32 c32){ + opus_int32 ret; + ops_count += 4; + ret = a32 + b32 * c32; + return ret; +} + +#undef silk_MLA_uint +static OPUS_INLINE opus_int32 silk_MLA_uint(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32){ + opus_uint32 ret; + ops_count += 4; + ret = a32 + b32 * c32; + return ret; +} + +#undef silk_SMULWB +static OPUS_INLINE opus_int32 silk_SMULWB(opus_int32 a32, opus_int32 b32){ + opus_int32 ret; + ops_count += 5; + ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16); + return ret; +} +#undef silk_SMLAWB +static OPUS_INLINE opus_int32 silk_SMLAWB(opus_int32 a32, opus_int32 b32, opus_int32 c32){ + opus_int32 ret; + ops_count += 5; + ret = ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))); + return ret; +} + +#undef silk_SMULWT +static OPUS_INLINE opus_int32 silk_SMULWT(opus_int32 a32, opus_int32 b32){ + opus_int32 ret; + ops_count += 4; + ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16); + return ret; +} +#undef silk_SMLAWT +static OPUS_INLINE opus_int32 silk_SMLAWT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ + opus_int32 ret; + ops_count += 4; + ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16)); + return ret; +} + +#undef silk_SMULBB +static OPUS_INLINE opus_int32 silk_SMULBB(opus_int32 a32, opus_int32 b32){ + opus_int32 ret; + ops_count += 1; + ret = (opus_int32)((opus_int16)a32) * (opus_int32)((opus_int16)b32); + return ret; +} +#undef silk_SMLABB +static OPUS_INLINE opus_int32 silk_SMLABB(opus_int32 a32, opus_int32 b32, opus_int32 c32){ + opus_int32 ret; + ops_count += 1; + ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32); + return ret; +} + +#undef silk_SMULBT +static OPUS_INLINE opus_int32 silk_SMULBT(opus_int32 a32, opus_int32 b32 ){ + opus_int32 ret; + ops_count += 4; + ret = ((opus_int32)((opus_int16)a32)) * (b32 >> 16); + return ret; +} + +#undef silk_SMLABT +static OPUS_INLINE opus_int32 silk_SMLABT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ + opus_int32 ret; + ops_count += 1; + ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16); + return ret; +} + +#undef silk_SMULTT +static OPUS_INLINE opus_int32 silk_SMULTT(opus_int32 a32, opus_int32 b32){ + opus_int32 ret; + ops_count += 1; + ret = (a32 >> 16) * (b32 >> 16); + return ret; +} + +#undef silk_SMLATT +static OPUS_INLINE opus_int32 silk_SMLATT(opus_int32 a32, opus_int32 b32, opus_int32 c32){ + opus_int32 ret; + ops_count += 1; + ret = a32 + (b32 >> 16) * (c32 >> 16); + return ret; +} + + +/* multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode)*/ +#undef silk_MLA_ovflw +#define silk_MLA_ovflw silk_MLA + +#undef silk_SMLABB_ovflw +#define silk_SMLABB_ovflw silk_SMLABB + +#undef silk_SMLABT_ovflw +#define silk_SMLABT_ovflw silk_SMLABT + +#undef silk_SMLATT_ovflw +#define silk_SMLATT_ovflw silk_SMLATT + +#undef silk_SMLAWB_ovflw +#define silk_SMLAWB_ovflw silk_SMLAWB + +#undef silk_SMLAWT_ovflw +#define silk_SMLAWT_ovflw silk_SMLAWT + +#undef silk_SMULL +static OPUS_INLINE opus_int64 silk_SMULL(opus_int32 a32, opus_int32 b32){ + opus_int64 ret; + ops_count += 8; + ret = ((opus_int64)(a32) * /*(opus_int64)*/(b32)); + return ret; +} + +#undef silk_SMLAL +static OPUS_INLINE opus_int64 silk_SMLAL(opus_int64 a64, opus_int32 b32, opus_int32 c32){ + opus_int64 ret; + ops_count += 8; + ret = a64 + ((opus_int64)(b32) * /*(opus_int64)*/(c32)); + return ret; +} +#undef silk_SMLALBB +static OPUS_INLINE opus_int64 silk_SMLALBB(opus_int64 a64, opus_int16 b16, opus_int16 c16){ + opus_int64 ret; + ops_count += 4; + ret = a64 + ((opus_int64)(b16) * /*(opus_int64)*/(c16)); + return ret; +} + +#undef SigProcFIX_CLZ16 +static OPUS_INLINE opus_int32 SigProcFIX_CLZ16(opus_int16 in16) +{ + opus_int32 out32 = 0; + ops_count += 10; + if( in16 == 0 ) { + return 16; + } + /* test nibbles */ + if( in16 & 0xFF00 ) { + if( in16 & 0xF000 ) { + in16 >>= 12; + } else { + out32 += 4; + in16 >>= 8; + } + } else { + if( in16 & 0xFFF0 ) { + out32 += 8; + in16 >>= 4; + } else { + out32 += 12; + } + } + /* test bits and return */ + if( in16 & 0xC ) { + if( in16 & 0x8 ) + return out32 + 0; + else + return out32 + 1; + } else { + if( in16 & 0xE ) + return out32 + 2; + else + return out32 + 3; + } +} + +#undef SigProcFIX_CLZ32 +static OPUS_INLINE opus_int32 SigProcFIX_CLZ32(opus_int32 in32) +{ + /* test highest 16 bits and convert to opus_int16 */ + ops_count += 2; + if( in32 & 0xFFFF0000 ) { + return SigProcFIX_CLZ16((opus_int16)(in32 >> 16)); + } else { + return SigProcFIX_CLZ16((opus_int16)in32) + 16; + } +} + +#undef silk_DIV32 +static OPUS_INLINE opus_int32 silk_DIV32(opus_int32 a32, opus_int32 b32){ + ops_count += 64; + return a32 / b32; +} + +#undef silk_DIV32_16 +static OPUS_INLINE opus_int32 silk_DIV32_16(opus_int32 a32, opus_int32 b32){ + ops_count += 32; + return a32 / b32; +} + +#undef silk_SAT8 +static OPUS_INLINE opus_int8 silk_SAT8(opus_int64 a){ + opus_int8 tmp; + ops_count += 1; + tmp = (opus_int8)((a) > silk_int8_MAX ? silk_int8_MAX : \ + ((a) < silk_int8_MIN ? silk_int8_MIN : (a))); + return(tmp); +} + +#undef silk_SAT16 +static OPUS_INLINE opus_int16 silk_SAT16(opus_int64 a){ + opus_int16 tmp; + ops_count += 1; + tmp = (opus_int16)((a) > silk_int16_MAX ? silk_int16_MAX : \ + ((a) < silk_int16_MIN ? silk_int16_MIN : (a))); + return(tmp); +} +#undef silk_SAT32 +static OPUS_INLINE opus_int32 silk_SAT32(opus_int64 a){ + opus_int32 tmp; + ops_count += 1; + tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : \ + ((a) < silk_int32_MIN ? silk_int32_MIN : (a))); + return(tmp); +} +#undef silk_POS_SAT32 +static OPUS_INLINE opus_int32 silk_POS_SAT32(opus_int64 a){ + opus_int32 tmp; + ops_count += 1; + tmp = (opus_int32)((a) > silk_int32_MAX ? silk_int32_MAX : (a)); + return(tmp); +} + +#undef silk_ADD_POS_SAT8 +static OPUS_INLINE opus_int8 silk_ADD_POS_SAT8(opus_int64 a, opus_int64 b){ + opus_int8 tmp; + ops_count += 1; + tmp = (opus_int8)((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b))); + return(tmp); +} +#undef silk_ADD_POS_SAT16 +static OPUS_INLINE opus_int16 silk_ADD_POS_SAT16(opus_int64 a, opus_int64 b){ + opus_int16 tmp; + ops_count += 1; + tmp = (opus_int16)((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b))); + return(tmp); +} + +#undef silk_ADD_POS_SAT32 +static OPUS_INLINE opus_int32 silk_ADD_POS_SAT32(opus_int64 a, opus_int64 b){ + opus_int32 tmp; + ops_count += 1; + tmp = (opus_int32)((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b))); + return(tmp); +} + +#undef silk_ADD_POS_SAT64 +static OPUS_INLINE opus_int64 silk_ADD_POS_SAT64(opus_int64 a, opus_int64 b){ + opus_int64 tmp; + ops_count += 1; + tmp = ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b))); + return(tmp); +} + +#undef silk_LSHIFT8 +static OPUS_INLINE opus_int8 silk_LSHIFT8(opus_int8 a, opus_int32 shift){ + opus_int8 ret; + ops_count += 1; + ret = a << shift; + return ret; +} +#undef silk_LSHIFT16 +static OPUS_INLINE opus_int16 silk_LSHIFT16(opus_int16 a, opus_int32 shift){ + opus_int16 ret; + ops_count += 1; + ret = a << shift; + return ret; +} +#undef silk_LSHIFT32 +static OPUS_INLINE opus_int32 silk_LSHIFT32(opus_int32 a, opus_int32 shift){ + opus_int32 ret; + ops_count += 1; + ret = a << shift; + return ret; +} +#undef silk_LSHIFT64 +static OPUS_INLINE opus_int64 silk_LSHIFT64(opus_int64 a, opus_int shift){ + ops_count += 1; + return a << shift; +} + +#undef silk_LSHIFT_ovflw +static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw(opus_int32 a, opus_int32 shift){ + ops_count += 1; + return a << shift; +} + +#undef silk_LSHIFT_uint +static OPUS_INLINE opus_uint32 silk_LSHIFT_uint(opus_uint32 a, opus_int32 shift){ + opus_uint32 ret; + ops_count += 1; + ret = a << shift; + return ret; +} + +#undef silk_RSHIFT8 +static OPUS_INLINE opus_int8 silk_RSHIFT8(opus_int8 a, opus_int32 shift){ + ops_count += 1; + return a >> shift; +} +#undef silk_RSHIFT16 +static OPUS_INLINE opus_int16 silk_RSHIFT16(opus_int16 a, opus_int32 shift){ + ops_count += 1; + return a >> shift; +} +#undef silk_RSHIFT32 +static OPUS_INLINE opus_int32 silk_RSHIFT32(opus_int32 a, opus_int32 shift){ + ops_count += 1; + return a >> shift; +} +#undef silk_RSHIFT64 +static OPUS_INLINE opus_int64 silk_RSHIFT64(opus_int64 a, opus_int64 shift){ + ops_count += 1; + return a >> shift; +} + +#undef silk_RSHIFT_uint +static OPUS_INLINE opus_uint32 silk_RSHIFT_uint(opus_uint32 a, opus_int32 shift){ + ops_count += 1; + return a >> shift; +} + +#undef silk_ADD_LSHIFT +static OPUS_INLINE opus_int32 silk_ADD_LSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){ + opus_int32 ret; + ops_count += 1; + ret = a + (b << shift); + return ret; /* shift >= 0*/ +} +#undef silk_ADD_LSHIFT32 +static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ + opus_int32 ret; + ops_count += 1; + ret = a + (b << shift); + return ret; /* shift >= 0*/ +} +#undef silk_ADD_LSHIFT_uint +static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){ + opus_uint32 ret; + ops_count += 1; + ret = a + (b << shift); + return ret; /* shift >= 0*/ +} +#undef silk_ADD_RSHIFT +static OPUS_INLINE opus_int32 silk_ADD_RSHIFT(opus_int32 a, opus_int32 b, opus_int32 shift){ + opus_int32 ret; + ops_count += 1; + ret = a + (b >> shift); + return ret; /* shift > 0*/ +} +#undef silk_ADD_RSHIFT32 +static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ + opus_int32 ret; + ops_count += 1; + ret = a + (b >> shift); + return ret; /* shift > 0*/ +} +#undef silk_ADD_RSHIFT_uint +static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint(opus_uint32 a, opus_uint32 b, opus_int32 shift){ + opus_uint32 ret; + ops_count += 1; + ret = a + (b >> shift); + return ret; /* shift > 0*/ +} +#undef silk_SUB_LSHIFT32 +static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ + opus_int32 ret; + ops_count += 1; + ret = a - (b << shift); + return ret; /* shift >= 0*/ +} +#undef silk_SUB_RSHIFT32 +static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32(opus_int32 a, opus_int32 b, opus_int32 shift){ + opus_int32 ret; + ops_count += 1; + ret = a - (b >> shift); + return ret; /* shift > 0*/ +} + +#undef silk_RSHIFT_ROUND +static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND(opus_int32 a, opus_int32 shift){ + opus_int32 ret; + ops_count += 3; + ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; + return ret; +} + +#undef silk_RSHIFT_ROUND64 +static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64(opus_int64 a, opus_int32 shift){ + opus_int64 ret; + ops_count += 6; + ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; + return ret; +} + +#undef silk_abs_int64 +static OPUS_INLINE opus_int64 silk_abs_int64(opus_int64 a){ + ops_count += 1; + return (((a) > 0) ? (a) : -(a)); /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN*/ +} + +#undef silk_abs_int32 +static OPUS_INLINE opus_int32 silk_abs_int32(opus_int32 a){ + ops_count += 1; + return silk_abs(a); +} + + +#undef silk_min +static silk_min(a, b){ + ops_count += 1; + return (((a) < (b)) ? (a) : (b)); +} +#undef silk_max +static silk_max(a, b){ + ops_count += 1; + return (((a) > (b)) ? (a) : (b)); +} +#undef silk_sign +static silk_sign(a){ + ops_count += 1; + return ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 )); +} + +#undef silk_ADD16 +static OPUS_INLINE opus_int16 silk_ADD16(opus_int16 a, opus_int16 b){ + opus_int16 ret; + ops_count += 1; + ret = a + b; + return ret; +} + +#undef silk_ADD32 +static OPUS_INLINE opus_int32 silk_ADD32(opus_int32 a, opus_int32 b){ + opus_int32 ret; + ops_count += 1; + ret = a + b; + return ret; +} + +#undef silk_ADD64 +static OPUS_INLINE opus_int64 silk_ADD64(opus_int64 a, opus_int64 b){ + opus_int64 ret; + ops_count += 2; + ret = a + b; + return ret; +} + +#undef silk_SUB16 +static OPUS_INLINE opus_int16 silk_SUB16(opus_int16 a, opus_int16 b){ + opus_int16 ret; + ops_count += 1; + ret = a - b; + return ret; +} + +#undef silk_SUB32 +static OPUS_INLINE opus_int32 silk_SUB32(opus_int32 a, opus_int32 b){ + opus_int32 ret; + ops_count += 1; + ret = a - b; + return ret; +} + +#undef silk_SUB64 +static OPUS_INLINE opus_int64 silk_SUB64(opus_int64 a, opus_int64 b){ + opus_int64 ret; + ops_count += 2; + ret = a - b; + return ret; +} + +#undef silk_ADD_SAT16 +static OPUS_INLINE opus_int16 silk_ADD_SAT16( opus_int16 a16, opus_int16 b16 ) { + opus_int16 res; + /* Nb will be counted in AKP_add32 and silk_SAT16*/ + res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) ); + return res; +} + +#undef silk_ADD_SAT32 +static OPUS_INLINE opus_int32 silk_ADD_SAT32(opus_int32 a32, opus_int32 b32){ + opus_int32 res; + ops_count += 1; + res = ((((a32) + (b32)) & 0x80000000) == 0 ? \ + ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \ + ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) ); + return res; +} + +#undef silk_ADD_SAT64 +static OPUS_INLINE opus_int64 silk_ADD_SAT64( opus_int64 a64, opus_int64 b64 ) { + opus_int64 res; + ops_count += 1; + res = ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ? \ + ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \ + ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) ); + return res; +} + +#undef silk_SUB_SAT16 +static OPUS_INLINE opus_int16 silk_SUB_SAT16( opus_int16 a16, opus_int16 b16 ) { + opus_int16 res; + silk_assert(0); + /* Nb will be counted in sub-macros*/ + res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) ); + return res; +} + +#undef silk_SUB_SAT32 +static OPUS_INLINE opus_int32 silk_SUB_SAT32( opus_int32 a32, opus_int32 b32 ) { + opus_int32 res; + ops_count += 1; + res = ((((a32)-(b32)) & 0x80000000) == 0 ? \ + (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \ + ((((a32)^0x80000000) & (b32) & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) ); + return res; +} + +#undef silk_SUB_SAT64 +static OPUS_INLINE opus_int64 silk_SUB_SAT64( opus_int64 a64, opus_int64 b64 ) { + opus_int64 res; + ops_count += 1; + res = ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ? \ + (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \ + ((((a64)^0x8000000000000000LL) & (b64) & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) ); + + return res; +} + +#undef silk_SMULWW +static OPUS_INLINE opus_int32 silk_SMULWW(opus_int32 a32, opus_int32 b32){ + opus_int32 ret; + /* Nb will be counted in sub-macros*/ + ret = silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)); + return ret; +} + +#undef silk_SMLAWW +static OPUS_INLINE opus_int32 silk_SMLAWW(opus_int32 a32, opus_int32 b32, opus_int32 c32){ + opus_int32 ret; + /* Nb will be counted in sub-macros*/ + ret = silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16)); + return ret; +} + +#undef silk_min_int +static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b) +{ + ops_count += 1; + return (((a) < (b)) ? (a) : (b)); +} + +#undef silk_min_16 +static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b) +{ + ops_count += 1; + return (((a) < (b)) ? (a) : (b)); +} +#undef silk_min_32 +static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b) +{ + ops_count += 1; + return (((a) < (b)) ? (a) : (b)); +} +#undef silk_min_64 +static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b) +{ + ops_count += 1; + return (((a) < (b)) ? (a) : (b)); +} + +/* silk_min() versions with typecast in the function call */ +#undef silk_max_int +static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b) +{ + ops_count += 1; + return (((a) > (b)) ? (a) : (b)); +} +#undef silk_max_16 +static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b) +{ + ops_count += 1; + return (((a) > (b)) ? (a) : (b)); +} +#undef silk_max_32 +static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b) +{ + ops_count += 1; + return (((a) > (b)) ? (a) : (b)); +} + +#undef silk_max_64 +static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) +{ + ops_count += 1; + return (((a) > (b)) ? (a) : (b)); +} + + +#undef silk_LIMIT_int +static OPUS_INLINE opus_int silk_LIMIT_int(opus_int a, opus_int limit1, opus_int limit2) +{ + opus_int ret; + ops_count += 6; + + ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ + : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); + + return(ret); +} + +#undef silk_LIMIT_16 +static OPUS_INLINE opus_int16 silk_LIMIT_16(opus_int16 a, opus_int16 limit1, opus_int16 limit2) +{ + opus_int16 ret; + ops_count += 6; + + ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ + : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); + +return(ret); +} + + +#undef silk_LIMIT_32 +static OPUS_INLINE opus_int silk_LIMIT_32(opus_int32 a, opus_int32 limit1, opus_int32 limit2) +{ + opus_int32 ret; + ops_count += 6; + + ret = ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ + : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))); + return(ret); +} + +#else +#define varDefine +#define silk_SaveCount() + +#endif +#endif + diff --git a/drivers/opus/silk/MacroDebug.h b/drivers/opus/silk/MacroDebug.h new file mode 100644 index 0000000000..35aedc5c5f --- /dev/null +++ b/drivers/opus/silk/MacroDebug.h @@ -0,0 +1,952 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Copyright (C) 2012 Xiph.Org Foundation +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef MACRO_DEBUG_H +#define MACRO_DEBUG_H + +/* Redefine macro functions with extensive assertion in DEBUG mode. + As functions can't be undefined, this file can't work with SigProcFIX_MacroCount.h */ + +#if ( defined (FIXED_DEBUG) || ( 0 && defined (_DEBUG) ) ) && !defined (silk_MACRO_COUNT) + +#undef silk_ADD16 +#define silk_ADD16(a,b) silk_ADD16_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int16 silk_ADD16_(opus_int16 a, opus_int16 b, char *file, int line){ + opus_int16 ret; + + ret = a + b; + if ( ret != silk_ADD_SAT16( a, b ) ) + { + fprintf (stderr, "silk_ADD16(%d, %d) in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_ADD32 +#define silk_ADD32(a,b) silk_ADD32_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_ADD32_(opus_int32 a, opus_int32 b, char *file, int line){ + opus_int32 ret; + + ret = a + b; + if ( ret != silk_ADD_SAT32( a, b ) ) + { + fprintf (stderr, "silk_ADD32(%d, %d) in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_ADD64 +#define silk_ADD64(a,b) silk_ADD64_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int64 silk_ADD64_(opus_int64 a, opus_int64 b, char *file, int line){ + opus_int64 ret; + + ret = a + b; + if ( ret != silk_ADD_SAT64( a, b ) ) + { + fprintf (stderr, "silk_ADD64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_SUB16 +#define silk_SUB16(a,b) silk_SUB16_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int16 silk_SUB16_(opus_int16 a, opus_int16 b, char *file, int line){ + opus_int16 ret; + + ret = a - b; + if ( ret != silk_SUB_SAT16( a, b ) ) + { + fprintf (stderr, "silk_SUB16(%d, %d) in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_SUB32 +#define silk_SUB32(a,b) silk_SUB32_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SUB32_(opus_int32 a, opus_int32 b, char *file, int line){ + opus_int32 ret; + + ret = a - b; + if ( ret != silk_SUB_SAT32( a, b ) ) + { + fprintf (stderr, "silk_SUB32(%d, %d) in %s: line %d\n", a, b, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_SUB64 +#define silk_SUB64(a,b) silk_SUB64_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int64 silk_SUB64_(opus_int64 a, opus_int64 b, char *file, int line){ + opus_int64 ret; + + ret = a - b; + if ( ret != silk_SUB_SAT64( a, b ) ) + { + fprintf (stderr, "silk_SUB64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)b, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_ADD_SAT16 +#define silk_ADD_SAT16(a,b) silk_ADD_SAT16_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int16 silk_ADD_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line) { + opus_int16 res; + res = (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a16), (b16) ) ); + if ( res != silk_SAT16( (opus_int32)a16 + (opus_int32)b16 ) ) + { + fprintf (stderr, "silk_ADD_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return res; +} + +#undef silk_ADD_SAT32 +#define silk_ADD_SAT32(a,b) silk_ADD_SAT32_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_ADD_SAT32_(opus_int32 a32, opus_int32 b32, char *file, int line){ + opus_int32 res; + res = ((((opus_uint32)(a32) + (opus_uint32)(b32)) & 0x80000000) == 0 ? \ + ((((a32) & (b32)) & 0x80000000) != 0 ? silk_int32_MIN : (a32)+(b32)) : \ + ((((a32) | (b32)) & 0x80000000) == 0 ? silk_int32_MAX : (a32)+(b32)) ); + if ( res != silk_SAT32( (opus_int64)a32 + (opus_int64)b32 ) ) + { + fprintf (stderr, "silk_ADD_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return res; +} + +#undef silk_ADD_SAT64 +#define silk_ADD_SAT64(a,b) silk_ADD_SAT64_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int64 silk_ADD_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line) { + opus_int64 res; + int fail = 0; + res = ((((a64) + (b64)) & 0x8000000000000000LL) == 0 ? \ + ((((a64) & (b64)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a64)+(b64)) : \ + ((((a64) | (b64)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a64)+(b64)) ); + if( res != a64 + b64 ) { + /* Check that we saturated to the correct extreme value */ + if ( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) || + ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) ) ) + { + fail = 1; + } + } else { + /* Saturation not necessary */ + fail = res != a64 + b64; + } + if ( fail ) + { + fprintf (stderr, "silk_ADD_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return res; +} + +#undef silk_SUB_SAT16 +#define silk_SUB_SAT16(a,b) silk_SUB_SAT16_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int16 silk_SUB_SAT16_( opus_int16 a16, opus_int16 b16, char *file, int line ) { + opus_int16 res; + res = (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a16), (b16) ) ); + if ( res != silk_SAT16( (opus_int32)a16 - (opus_int32)b16 ) ) + { + fprintf (stderr, "silk_SUB_SAT16(%d, %d) in %s: line %d\n", a16, b16, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return res; +} + +#undef silk_SUB_SAT32 +#define silk_SUB_SAT32(a,b) silk_SUB_SAT32_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SUB_SAT32_( opus_int32 a32, opus_int32 b32, char *file, int line ) { + opus_int32 res; + res = ((((opus_uint32)(a32)-(opus_uint32)(b32)) & 0x80000000) == 0 ? \ + (( (a32) & ((b32)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a32)-(b32)) : \ + ((((a32)^0x80000000) & (b32) & 0x80000000) ? silk_int32_MAX : (a32)-(b32)) ); + if ( res != silk_SAT32( (opus_int64)a32 - (opus_int64)b32 ) ) + { + fprintf (stderr, "silk_SUB_SAT32(%d, %d) in %s: line %d\n", a32, b32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return res; +} + +#undef silk_SUB_SAT64 +#define silk_SUB_SAT64(a,b) silk_SUB_SAT64_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int64 silk_SUB_SAT64_( opus_int64 a64, opus_int64 b64, char *file, int line ) { + opus_int64 res; + int fail = 0; + res = ((((a64)-(b64)) & 0x8000000000000000LL) == 0 ? \ + (( (a64) & ((b64)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a64)-(b64)) : \ + ((((a64)^0x8000000000000000LL) & (b64) & 0x8000000000000000LL) ? silk_int64_MAX : (a64)-(b64)) ); + if( res != a64 - b64 ) { + /* Check that we saturated to the correct extreme value */ + if( !(( res == silk_int64_MAX && ( ( a64 >> 1 ) + ( b64 >> 1 ) > ( silk_int64_MAX >> 3 ) ) ) || + ( res == silk_int64_MIN && ( ( a64 >> 1 ) + ( b64 >> 1 ) < ( silk_int64_MIN >> 3 ) ) ) )) + { + fail = 1; + } + } else { + /* Saturation not necessary */ + fail = res != a64 - b64; + } + if ( fail ) + { + fprintf (stderr, "silk_SUB_SAT64(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return res; +} + +#undef silk_MUL +#define silk_MUL(a,b) silk_MUL_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_MUL_(opus_int32 a32, opus_int32 b32, char *file, int line){ + opus_int32 ret; + opus_int64 ret64; + ret = a32 * b32; + ret64 = (opus_int64)a32 * (opus_int64)b32; + if ( (opus_int64)ret != ret64 ) + { + fprintf (stderr, "silk_MUL(%d, %d) in %s: line %d\n", a32, b32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_MUL_uint +#define silk_MUL_uint(a,b) silk_MUL_uint_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_uint32 silk_MUL_uint_(opus_uint32 a32, opus_uint32 b32, char *file, int line){ + opus_uint32 ret; + ret = a32 * b32; + if ( (opus_uint64)ret != (opus_uint64)a32 * (opus_uint64)b32 ) + { + fprintf (stderr, "silk_MUL_uint(%u, %u) in %s: line %d\n", a32, b32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_MLA +#define silk_MLA(a,b,c) silk_MLA_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_MLA_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ + opus_int32 ret; + ret = a32 + b32 * c32; + if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 ) + { + fprintf (stderr, "silk_MLA(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_MLA_uint +#define silk_MLA_uint(a,b,c) silk_MLA_uint_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_MLA_uint_(opus_uint32 a32, opus_uint32 b32, opus_uint32 c32, char *file, int line){ + opus_uint32 ret; + ret = a32 + b32 * c32; + if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int64)c32 ) + { + fprintf (stderr, "silk_MLA_uint(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_SMULWB +#define silk_SMULWB(a,b) silk_SMULWB_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SMULWB_(opus_int32 a32, opus_int32 b32, char *file, int line){ + opus_int32 ret; + ret = (a32 >> 16) * (opus_int32)((opus_int16)b32) + (((a32 & 0x0000FFFF) * (opus_int32)((opus_int16)b32)) >> 16); + if ( (opus_int64)ret != ((opus_int64)a32 * (opus_int16)b32) >> 16 ) + { + fprintf (stderr, "silk_SMULWB(%d, %d) in %s: line %d\n", a32, b32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_SMLAWB +#define silk_SMLAWB(a,b,c) silk_SMLAWB_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SMLAWB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ + opus_int32 ret; + ret = silk_ADD32( a32, silk_SMULWB( b32, c32 ) ); + if ( silk_ADD32( a32, silk_SMULWB( b32, c32 ) ) != silk_ADD_SAT32( a32, silk_SMULWB( b32, c32 ) ) ) + { + fprintf (stderr, "silk_SMLAWB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_SMULWT +#define silk_SMULWT(a,b) silk_SMULWT_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SMULWT_(opus_int32 a32, opus_int32 b32, char *file, int line){ + opus_int32 ret; + ret = (a32 >> 16) * (b32 >> 16) + (((a32 & 0x0000FFFF) * (b32 >> 16)) >> 16); + if ( (opus_int64)ret != ((opus_int64)a32 * (b32 >> 16)) >> 16 ) + { + fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_SMLAWT +#define silk_SMLAWT(a,b,c) silk_SMLAWT_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SMLAWT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ + opus_int32 ret; + ret = a32 + ((b32 >> 16) * (c32 >> 16)) + (((b32 & 0x0000FFFF) * ((c32 >> 16)) >> 16)); + if ( (opus_int64)ret != (opus_int64)a32 + (((opus_int64)b32 * (c32 >> 16)) >> 16) ) + { + fprintf (stderr, "silk_SMLAWT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_SMULL +#define silk_SMULL(a,b) silk_SMULL_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int64 silk_SMULL_(opus_int64 a64, opus_int64 b64, char *file, int line){ + opus_int64 ret64; + int fail = 0; + ret64 = a64 * b64; + if( b64 != 0 ) { + fail = a64 != (ret64 / b64); + } else if( a64 != 0 ) { + fail = b64 != (ret64 / a64); + } + if ( fail ) + { + fprintf (stderr, "silk_SMULL(%lld, %lld) in %s: line %d\n", (long long)a64, (long long)b64, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret64; +} + +/* no checking needed for silk_SMULBB */ +#undef silk_SMLABB +#define silk_SMLABB(a,b,c) silk_SMLABB_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SMLABB_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ + opus_int32 ret; + ret = a32 + (opus_int32)((opus_int16)b32) * (opus_int32)((opus_int16)c32); + if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (opus_int16)c32 ) + { + fprintf (stderr, "silk_SMLABB(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +/* no checking needed for silk_SMULBT */ +#undef silk_SMLABT +#define silk_SMLABT(a,b,c) silk_SMLABT_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SMLABT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ + opus_int32 ret; + ret = a32 + ((opus_int32)((opus_int16)b32)) * (c32 >> 16); + if ( (opus_int64)ret != (opus_int64)a32 + (opus_int64)b32 * (c32 >> 16) ) + { + fprintf (stderr, "silk_SMLABT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +/* no checking needed for silk_SMULTT */ +#undef silk_SMLATT +#define silk_SMLATT(a,b,c) silk_SMLATT_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SMLATT_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ + opus_int32 ret; + ret = a32 + (b32 >> 16) * (c32 >> 16); + if ( (opus_int64)ret != (opus_int64)a32 + (b32 >> 16) * (c32 >> 16) ) + { + fprintf (stderr, "silk_SMLATT(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_SMULWW +#define silk_SMULWW(a,b) silk_SMULWW_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SMULWW_(opus_int32 a32, opus_int32 b32, char *file, int line){ + opus_int32 ret, tmp1, tmp2; + opus_int64 ret64; + int fail = 0; + + ret = silk_SMULWB( a32, b32 ); + tmp1 = silk_RSHIFT_ROUND( b32, 16 ); + tmp2 = silk_MUL( a32, tmp1 ); + + fail |= (opus_int64)tmp2 != (opus_int64) a32 * (opus_int64) tmp1; + + tmp1 = ret; + ret = silk_ADD32( tmp1, tmp2 ); + fail |= silk_ADD32( tmp1, tmp2 ) != silk_ADD_SAT32( tmp1, tmp2 ); + + ret64 = silk_RSHIFT64( silk_SMULL( a32, b32 ), 16 ); + fail |= (opus_int64)ret != ret64; + + if ( fail ) + { + fprintf (stderr, "silk_SMULWT(%d, %d) in %s: line %d\n", a32, b32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + + return ret; +} + +#undef silk_SMLAWW +#define silk_SMLAWW(a,b,c) silk_SMLAWW_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SMLAWW_(opus_int32 a32, opus_int32 b32, opus_int32 c32, char *file, int line){ + opus_int32 ret, tmp; + + tmp = silk_SMULWW( b32, c32 ); + ret = silk_ADD32( a32, tmp ); + if ( ret != silk_ADD_SAT32( a32, tmp ) ) + { + fprintf (stderr, "silk_SMLAWW(%d, %d, %d) in %s: line %d\n", a32, b32, c32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */ +#undef silk_MLA_ovflw +#define silk_MLA_ovflw(a32, b32, c32) ((a32) + ((b32) * (c32))) +#undef silk_SMLABB_ovflw +#define silk_SMLABB_ovflw(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))) + +/* no checking needed for silk_SMULL + no checking needed for silk_SMLAL + no checking needed for silk_SMLALBB + no checking needed for SigProcFIX_CLZ16 + no checking needed for SigProcFIX_CLZ32*/ + +#undef silk_DIV32 +#define silk_DIV32(a,b) silk_DIV32_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_DIV32_(opus_int32 a32, opus_int32 b32, char *file, int line){ + if ( b32 == 0 ) + { + fprintf (stderr, "silk_DIV32(%d, %d) in %s: line %d\n", a32, b32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return a32 / b32; +} + +#undef silk_DIV32_16 +#define silk_DIV32_16(a,b) silk_DIV32_16_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_DIV32_16_(opus_int32 a32, opus_int32 b32, char *file, int line){ + int fail = 0; + fail |= b32 == 0; + fail |= b32 > silk_int16_MAX; + fail |= b32 < silk_int16_MIN; + if ( fail ) + { + fprintf (stderr, "silk_DIV32_16(%d, %d) in %s: line %d\n", a32, b32, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return a32 / b32; +} + +/* no checking needed for silk_SAT8 + no checking needed for silk_SAT16 + no checking needed for silk_SAT32 + no checking needed for silk_POS_SAT32 + no checking needed for silk_ADD_POS_SAT8 + no checking needed for silk_ADD_POS_SAT16 + no checking needed for silk_ADD_POS_SAT32 + no checking needed for silk_ADD_POS_SAT64 */ + +#undef silk_LSHIFT8 +#define silk_LSHIFT8(a,b) silk_LSHIFT8_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int8 silk_LSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ + opus_int8 ret; + int fail = 0; + ret = a << shift; + fail |= shift < 0; + fail |= shift >= 8; + fail |= (opus_int64)ret != ((opus_int64)a) << shift; + if ( fail ) + { + fprintf (stderr, "silk_LSHIFT8(%d, %d) in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_LSHIFT16 +#define silk_LSHIFT16(a,b) silk_LSHIFT16_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int16 silk_LSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ + opus_int16 ret; + int fail = 0; + ret = a << shift; + fail |= shift < 0; + fail |= shift >= 16; + fail |= (opus_int64)ret != ((opus_int64)a) << shift; + if ( fail ) + { + fprintf (stderr, "silk_LSHIFT16(%d, %d) in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_LSHIFT32 +#define silk_LSHIFT32(a,b) silk_LSHIFT32_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_LSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ + opus_int32 ret; + int fail = 0; + ret = a << shift; + fail |= shift < 0; + fail |= shift >= 32; + fail |= (opus_int64)ret != ((opus_int64)a) << shift; + if ( fail ) + { + fprintf (stderr, "silk_LSHIFT32(%d, %d) in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_LSHIFT64 +#define silk_LSHIFT64(a,b) silk_LSHIFT64_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int64 silk_LSHIFT64_(opus_int64 a, opus_int shift, char *file, int line){ + opus_int64 ret; + int fail = 0; + ret = a << shift; + fail |= shift < 0; + fail |= shift >= 64; + fail |= (ret>>shift) != ((opus_int64)a); + if ( fail ) + { + fprintf (stderr, "silk_LSHIFT64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_LSHIFT_ovflw +#define silk_LSHIFT_ovflw(a,b) silk_LSHIFT_ovflw_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_LSHIFT_ovflw_(opus_int32 a, opus_int32 shift, char *file, int line){ + if ( (shift < 0) || (shift >= 32) ) /* no check for overflow */ + { + fprintf (stderr, "silk_LSHIFT_ovflw(%d, %d) in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return a << shift; +} + +#undef silk_LSHIFT_uint +#define silk_LSHIFT_uint(a,b) silk_LSHIFT_uint_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_uint32 silk_LSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){ + opus_uint32 ret; + ret = a << shift; + if ( (shift < 0) || ((opus_int64)ret != ((opus_int64)a) << shift)) + { + fprintf (stderr, "silk_LSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_RSHIFT8 +#define silk_RSHITF8(a,b) silk_RSHIFT8_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int8 silk_RSHIFT8_(opus_int8 a, opus_int32 shift, char *file, int line){ + if ( (shift < 0) || (shift>=8) ) + { + fprintf (stderr, "silk_RSHITF8(%d, %d) in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return a >> shift; +} + +#undef silk_RSHIFT16 +#define silk_RSHITF16(a,b) silk_RSHIFT16_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int16 silk_RSHIFT16_(opus_int16 a, opus_int32 shift, char *file, int line){ + if ( (shift < 0) || (shift>=16) ) + { + fprintf (stderr, "silk_RSHITF16(%d, %d) in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return a >> shift; +} + +#undef silk_RSHIFT32 +#define silk_RSHIFT32(a,b) silk_RSHIFT32_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_RSHIFT32_(opus_int32 a, opus_int32 shift, char *file, int line){ + if ( (shift < 0) || (shift>=32) ) + { + fprintf (stderr, "silk_RSHITF32(%d, %d) in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return a >> shift; +} + +#undef silk_RSHIFT64 +#define silk_RSHIFT64(a,b) silk_RSHIFT64_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int64 silk_RSHIFT64_(opus_int64 a, opus_int64 shift, char *file, int line){ + if ( (shift < 0) || (shift>=64) ) + { + fprintf (stderr, "silk_RSHITF64(%lld, %lld) in %s: line %d\n", (long long)a, (long long)shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return a >> shift; +} + +#undef silk_RSHIFT_uint +#define silk_RSHIFT_uint(a,b) silk_RSHIFT_uint_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_uint32 silk_RSHIFT_uint_(opus_uint32 a, opus_int32 shift, char *file, int line){ + if ( (shift < 0) || (shift>32) ) + { + fprintf (stderr, "silk_RSHIFT_uint(%u, %d) in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return a >> shift; +} + +#undef silk_ADD_LSHIFT +#define silk_ADD_LSHIFT(a,b,c) silk_ADD_LSHIFT_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE int silk_ADD_LSHIFT_(int a, int b, int shift, char *file, int line){ + opus_int16 ret; + ret = a + (b << shift); + if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) + { + fprintf (stderr, "silk_ADD_LSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; /* shift >= 0 */ +} + +#undef silk_ADD_LSHIFT32 +#define silk_ADD_LSHIFT32(a,b,c) silk_ADD_LSHIFT32_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_ADD_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ + opus_int32 ret; + ret = a + (b << shift); + if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) + { + fprintf (stderr, "silk_ADD_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; /* shift >= 0 */ +} + +#undef silk_ADD_LSHIFT_uint +#define silk_ADD_LSHIFT_uint(a,b,c) silk_ADD_LSHIFT_uint_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_uint32 silk_ADD_LSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){ + opus_uint32 ret; + ret = a + (b << shift); + if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) << shift)) ) + { + fprintf (stderr, "silk_ADD_LSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; /* shift >= 0 */ +} + +#undef silk_ADD_RSHIFT +#define silk_ADD_RSHIFT(a,b,c) silk_ADD_RSHIFT_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE int silk_ADD_RSHIFT_(int a, int b, int shift, char *file, int line){ + opus_int16 ret; + ret = a + (b >> shift); + if ( (shift < 0) || (shift>15) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) + { + fprintf (stderr, "silk_ADD_RSHIFT(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; /* shift > 0 */ +} + +#undef silk_ADD_RSHIFT32 +#define silk_ADD_RSHIFT32(a,b,c) silk_ADD_RSHIFT32_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_ADD_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ + opus_int32 ret; + ret = a + (b >> shift); + if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) + { + fprintf (stderr, "silk_ADD_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; /* shift > 0 */ +} + +#undef silk_ADD_RSHIFT_uint +#define silk_ADD_RSHIFT_uint(a,b,c) silk_ADD_RSHIFT_uint_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_uint32 silk_ADD_RSHIFT_uint_(opus_uint32 a, opus_uint32 b, opus_int32 shift, char *file, int line){ + opus_uint32 ret; + ret = a + (b >> shift); + if ( (shift < 0) || (shift>32) || ((opus_int64)ret != (opus_int64)a + (((opus_int64)b) >> shift)) ) + { + fprintf (stderr, "silk_ADD_RSHIFT_uint(%u, %u, %d) in %s: line %d\n", a, b, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; /* shift > 0 */ +} + +#undef silk_SUB_LSHIFT32 +#define silk_SUB_LSHIFT32(a,b,c) silk_SUB_LSHIFT32_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SUB_LSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ + opus_int32 ret; + ret = a - (b << shift); + if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) << shift)) ) + { + fprintf (stderr, "silk_SUB_LSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; /* shift >= 0 */ +} + +#undef silk_SUB_RSHIFT32 +#define silk_SUB_RSHIFT32(a,b,c) silk_SUB_RSHIFT32_((a), (b), (c), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_SUB_RSHIFT32_(opus_int32 a, opus_int32 b, opus_int32 shift, char *file, int line){ + opus_int32 ret; + ret = a - (b >> shift); + if ( (shift < 0) || (shift>31) || ((opus_int64)ret != (opus_int64)a - (((opus_int64)b) >> shift)) ) + { + fprintf (stderr, "silk_SUB_RSHIFT32(%d, %d, %d) in %s: line %d\n", a, b, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; /* shift > 0 */ +} + +#undef silk_RSHIFT_ROUND +#define silk_RSHIFT_ROUND(a,b) silk_RSHIFT_ROUND_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_RSHIFT_ROUND_(opus_int32 a, opus_int32 shift, char *file, int line){ + opus_int32 ret; + ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; + /* the marco definition can't handle a shift of zero */ + if ( (shift <= 0) || (shift>31) || ((opus_int64)ret != ((opus_int64)a + ((opus_int64)1 << (shift - 1))) >> shift) ) + { + fprintf (stderr, "silk_RSHIFT_ROUND(%d, %d) in %s: line %d\n", a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return ret; +} + +#undef silk_RSHIFT_ROUND64 +#define silk_RSHIFT_ROUND64(a,b) silk_RSHIFT_ROUND64_((a), (b), __FILE__, __LINE__) +static OPUS_INLINE opus_int64 silk_RSHIFT_ROUND64_(opus_int64 a, opus_int32 shift, char *file, int line){ + opus_int64 ret; + /* the marco definition can't handle a shift of zero */ + if ( (shift <= 0) || (shift>=64) ) + { + fprintf (stderr, "silk_RSHIFT_ROUND64(%lld, %d) in %s: line %d\n", (long long)a, shift, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + ret = shift == 1 ? (a >> 1) + (a & 1) : ((a >> (shift - 1)) + 1) >> 1; + return ret; +} + +/* silk_abs is used on floats also, so doesn't work... */ +/*#undef silk_abs +static OPUS_INLINE opus_int32 silk_abs(opus_int32 a){ + silk_assert(a != 0x80000000); + return (((a) > 0) ? (a) : -(a)); // Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN +}*/ + +#undef silk_abs_int64 +#define silk_abs_int64(a) silk_abs_int64_((a), __FILE__, __LINE__) +static OPUS_INLINE opus_int64 silk_abs_int64_(opus_int64 a, char *file, int line){ + if ( a == silk_int64_MIN ) + { + fprintf (stderr, "silk_abs_int64(%lld) in %s: line %d\n", (long long)a, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return (((a) > 0) ? (a) : -(a)); /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */ +} + +#undef silk_abs_int32 +#define silk_abs_int32(a) silk_abs_int32_((a), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_abs_int32_(opus_int32 a, char *file, int line){ + if ( a == silk_int32_MIN ) + { + fprintf (stderr, "silk_abs_int32(%d) in %s: line %d\n", a, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return silk_abs(a); +} + +#undef silk_CHECK_FIT8 +#define silk_CHECK_FIT8(a) silk_CHECK_FIT8_((a), __FILE__, __LINE__) +static OPUS_INLINE opus_int8 silk_CHECK_FIT8_( opus_int64 a, char *file, int line ){ + opus_int8 ret; + ret = (opus_int8)a; + if ( (opus_int64)ret != a ) + { + fprintf (stderr, "silk_CHECK_FIT8(%lld) in %s: line %d\n", (long long)a, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return( ret ); +} + +#undef silk_CHECK_FIT16 +#define silk_CHECK_FIT16(a) silk_CHECK_FIT16_((a), __FILE__, __LINE__) +static OPUS_INLINE opus_int16 silk_CHECK_FIT16_( opus_int64 a, char *file, int line ){ + opus_int16 ret; + ret = (opus_int16)a; + if ( (opus_int64)ret != a ) + { + fprintf (stderr, "silk_CHECK_FIT16(%lld) in %s: line %d\n", (long long)a, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return( ret ); +} + +#undef silk_CHECK_FIT32 +#define silk_CHECK_FIT32(a) silk_CHECK_FIT32_((a), __FILE__, __LINE__) +static OPUS_INLINE opus_int32 silk_CHECK_FIT32_( opus_int64 a, char *file, int line ){ + opus_int32 ret; + ret = (opus_int32)a; + if ( (opus_int64)ret != a ) + { + fprintf (stderr, "silk_CHECK_FIT32(%lld) in %s: line %d\n", (long long)a, file, line); +#ifdef FIXED_DEBUG_ASSERT + silk_assert( 0 ); +#endif + } + return( ret ); +} + +/* no checking for silk_NSHIFT_MUL_32_32 + no checking for silk_NSHIFT_MUL_16_16 + no checking needed for silk_min + no checking needed for silk_max + no checking needed for silk_sign +*/ + +#endif +#endif /* MACRO_DEBUG_H */ diff --git a/drivers/opus/silk/NLSF2A.c b/drivers/opus/silk/NLSF2A.c new file mode 100644 index 0000000000..2b6f685f49 --- /dev/null +++ b/drivers/opus/silk/NLSF2A.c @@ -0,0 +1,178 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/* conversion between prediction filter coefficients and LSFs */ +/* order should be even */ +/* a piecewise linear approximation maps LSF <-> cos(LSF) */ +/* therefore the result is not accurate LSFs, but the two */ +/* functions are accurate inverses of each other */ + +#include "SigProc_FIX.h" +#include "tables.h" + +#define QA 16 + +/* helper function for NLSF2A(..) */ +static OPUS_INLINE void silk_NLSF2A_find_poly( + opus_int32 *out, /* O intermediate polynomial, QA [dd+1] */ + const opus_int32 *cLSF, /* I vector of interleaved 2*cos(LSFs), QA [d] */ + opus_int dd /* I polynomial order (= 1/2 * filter order) */ +) +{ + opus_int k, n; + opus_int32 ftmp; + + out[0] = silk_LSHIFT( 1, QA ); + out[1] = -cLSF[0]; + for( k = 1; k < dd; k++ ) { + ftmp = cLSF[2*k]; /* QA*/ + out[k+1] = silk_LSHIFT( out[k-1], 1 ) - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[k] ), QA ); + for( n = k; n > 1; n-- ) { + out[n] += out[n-2] - (opus_int32)silk_RSHIFT_ROUND64( silk_SMULL( ftmp, out[n-1] ), QA ); + } + out[1] -= ftmp; + } +} + +/* compute whitening filter coefficients from normalized line spectral frequencies */ +void silk_NLSF2A( + opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */ + const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */ + const opus_int d /* I filter order (should be even) */ +) +{ + /* This ordering was found to maximize quality. It improves numerical accuracy of + silk_NLSF2A_find_poly() compared to "standard" ordering. */ + static const unsigned char ordering16[16] = { + 0, 15, 8, 7, 4, 11, 12, 3, 2, 13, 10, 5, 6, 9, 14, 1 + }; + static const unsigned char ordering10[10] = { + 0, 9, 6, 3, 4, 5, 8, 1, 2, 7 + }; + const unsigned char *ordering; + opus_int k, i, dd; + opus_int32 cos_LSF_QA[ SILK_MAX_ORDER_LPC ]; + opus_int32 P[ SILK_MAX_ORDER_LPC / 2 + 1 ], Q[ SILK_MAX_ORDER_LPC / 2 + 1 ]; + opus_int32 Ptmp, Qtmp, f_int, f_frac, cos_val, delta; + opus_int32 a32_QA1[ SILK_MAX_ORDER_LPC ]; + opus_int32 maxabs, absval, idx=0, sc_Q16; + + silk_assert( LSF_COS_TAB_SZ_FIX == 128 ); + silk_assert( d==10||d==16 ); + + /* convert LSFs to 2*cos(LSF), using piecewise linear curve from table */ + ordering = d == 16 ? ordering16 : ordering10; + for( k = 0; k < d; k++ ) { + silk_assert(NLSF[k] >= 0 ); + + /* f_int on a scale 0-127 (rounded down) */ + f_int = silk_RSHIFT( NLSF[k], 15 - 7 ); + + /* f_frac, range: 0..255 */ + f_frac = NLSF[k] - silk_LSHIFT( f_int, 15 - 7 ); + + silk_assert(f_int >= 0); + silk_assert(f_int < LSF_COS_TAB_SZ_FIX ); + + /* Read start and end value from table */ + cos_val = silk_LSFCosTab_FIX_Q12[ f_int ]; /* Q12 */ + delta = silk_LSFCosTab_FIX_Q12[ f_int + 1 ] - cos_val; /* Q12, with a range of 0..200 */ + + /* Linear interpolation */ + cos_LSF_QA[ordering[k]] = silk_RSHIFT_ROUND( silk_LSHIFT( cos_val, 8 ) + silk_MUL( delta, f_frac ), 20 - QA ); /* QA */ + } + + dd = silk_RSHIFT( d, 1 ); + + /* generate even and odd polynomials using convolution */ + silk_NLSF2A_find_poly( P, &cos_LSF_QA[ 0 ], dd ); + silk_NLSF2A_find_poly( Q, &cos_LSF_QA[ 1 ], dd ); + + /* convert even and odd polynomials to opus_int32 Q12 filter coefs */ + for( k = 0; k < dd; k++ ) { + Ptmp = P[ k+1 ] + P[ k ]; + Qtmp = Q[ k+1 ] - Q[ k ]; + + /* the Ptmp and Qtmp values at this stage need to fit in int32 */ + a32_QA1[ k ] = -Qtmp - Ptmp; /* QA+1 */ + a32_QA1[ d-k-1 ] = Qtmp - Ptmp; /* QA+1 */ + } + + /* Limit the maximum absolute value of the prediction coefficients, so that they'll fit in int16 */ + for( i = 0; i < 10; i++ ) { + /* Find maximum absolute value and its index */ + maxabs = 0; + for( k = 0; k < d; k++ ) { + absval = silk_abs( a32_QA1[k] ); + if( absval > maxabs ) { + maxabs = absval; + idx = k; + } + } + maxabs = silk_RSHIFT_ROUND( maxabs, QA + 1 - 12 ); /* QA+1 -> Q12 */ + + if( maxabs > silk_int16_MAX ) { + /* Reduce magnitude of prediction coefficients */ + maxabs = silk_min( maxabs, 163838 ); /* ( silk_int32_MAX >> 14 ) + silk_int16_MAX = 163838 */ + sc_Q16 = SILK_FIX_CONST( 0.999, 16 ) - silk_DIV32( silk_LSHIFT( maxabs - silk_int16_MAX, 14 ), + silk_RSHIFT32( silk_MUL( maxabs, idx + 1), 2 ) ); + silk_bwexpander_32( a32_QA1, d, sc_Q16 ); + } else { + break; + } + } + + if( i == 10 ) { + /* Reached the last iteration, clip the coefficients */ + for( k = 0; k < d; k++ ) { + a_Q12[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ) ); /* QA+1 -> Q12 */ + a32_QA1[ k ] = silk_LSHIFT( (opus_int32)a_Q12[ k ], QA + 1 - 12 ); + } + } else { + for( k = 0; k < d; k++ ) { + a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */ + } + } + + for( i = 0; i < MAX_LPC_STABILIZE_ITERATIONS; i++ ) { + if( silk_LPC_inverse_pred_gain( a_Q12, d ) < SILK_FIX_CONST( 1.0 / MAX_PREDICTION_POWER_GAIN, 30 ) ) { + /* Prediction coefficients are (too close to) unstable; apply bandwidth expansion */ + /* on the unscaled coefficients, convert to Q12 and measure again */ + silk_bwexpander_32( a32_QA1, d, 65536 - silk_LSHIFT( 2, i ) ); + for( k = 0; k < d; k++ ) { + a_Q12[ k ] = (opus_int16)silk_RSHIFT_ROUND( a32_QA1[ k ], QA + 1 - 12 ); /* QA+1 -> Q12 */ + } + } else { + break; + } + } +} + diff --git a/drivers/opus/silk/NLSF_VQ.c b/drivers/opus/silk/NLSF_VQ.c new file mode 100644 index 0000000000..e4ca79fbfe --- /dev/null +++ b/drivers/opus/silk/NLSF_VQ.c @@ -0,0 +1,68 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */ +void silk_NLSF_VQ( + opus_int32 err_Q26[], /* O Quantization errors [K] */ + const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */ + const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */ + const opus_int K, /* I Number of codebook vectors */ + const opus_int LPC_order /* I Number of LPCs */ +) +{ + opus_int i, m; + opus_int32 diff_Q15, sum_error_Q30, sum_error_Q26; + + silk_assert( LPC_order <= 16 ); + silk_assert( ( LPC_order & 1 ) == 0 ); + + /* Loop over codebook */ + for( i = 0; i < K; i++ ) { + sum_error_Q26 = 0; + for( m = 0; m < LPC_order; m += 2 ) { + /* Compute weighted squared quantization error for index m */ + diff_Q15 = silk_SUB_LSHIFT32( in_Q15[ m ], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/ + sum_error_Q30 = silk_SMULBB( diff_Q15, diff_Q15 ); + + /* Compute weighted squared quantization error for index m + 1 */ + diff_Q15 = silk_SUB_LSHIFT32( in_Q15[m + 1], (opus_int32)*pCB_Q8++, 7 ); /* range: [ -32767 : 32767 ]*/ + sum_error_Q30 = silk_SMLABB( sum_error_Q30, diff_Q15, diff_Q15 ); + + sum_error_Q26 = silk_ADD_RSHIFT32( sum_error_Q26, sum_error_Q30, 4 ); + + silk_assert( sum_error_Q26 >= 0 ); + silk_assert( sum_error_Q30 >= 0 ); + } + err_Q26[ i ] = sum_error_Q26; + } +} diff --git a/drivers/opus/silk/NLSF_VQ_weights_laroia.c b/drivers/opus/silk/NLSF_VQ_weights_laroia.c new file mode 100644 index 0000000000..f461ba01c0 --- /dev/null +++ b/drivers/opus/silk/NLSF_VQ_weights_laroia.c @@ -0,0 +1,80 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "define.h" +#include "SigProc_FIX.h" + +/* +R. Laroia, N. Phamdo and N. Farvardin, "Robust and Efficient Quantization of Speech LSP +Parameters Using Structured Vector Quantization", Proc. IEEE Int. Conf. Acoust., Speech, +Signal Processing, pp. 641-644, 1991. +*/ + +/* Laroia low complexity NLSF weights */ +void silk_NLSF_VQ_weights_laroia( + opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */ + const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */ + const opus_int D /* I Input vector dimension (even) */ +) +{ + opus_int k; + opus_int32 tmp1_int, tmp2_int; + + silk_assert( D > 0 ); + silk_assert( ( D & 1 ) == 0 ); + + /* First value */ + tmp1_int = silk_max_int( pNLSF_Q15[ 0 ], 1 ); + tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int ); + tmp2_int = silk_max_int( pNLSF_Q15[ 1 ] - pNLSF_Q15[ 0 ], 1 ); + tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int ); + pNLSFW_Q_OUT[ 0 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); + silk_assert( pNLSFW_Q_OUT[ 0 ] > 0 ); + + /* Main loop */ + for( k = 1; k < D - 1; k += 2 ) { + tmp1_int = silk_max_int( pNLSF_Q15[ k + 1 ] - pNLSF_Q15[ k ], 1 ); + tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int ); + pNLSFW_Q_OUT[ k ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); + silk_assert( pNLSFW_Q_OUT[ k ] > 0 ); + + tmp2_int = silk_max_int( pNLSF_Q15[ k + 2 ] - pNLSF_Q15[ k + 1 ], 1 ); + tmp2_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp2_int ); + pNLSFW_Q_OUT[ k + 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); + silk_assert( pNLSFW_Q_OUT[ k + 1 ] > 0 ); + } + + /* Last value */ + tmp1_int = silk_max_int( ( 1 << 15 ) - pNLSF_Q15[ D - 1 ], 1 ); + tmp1_int = silk_DIV32_16( (opus_int32)1 << ( 15 + NLSF_W_Q ), tmp1_int ); + pNLSFW_Q_OUT[ D - 1 ] = (opus_int16)silk_min_int( tmp1_int + tmp2_int, silk_int16_MAX ); + silk_assert( pNLSFW_Q_OUT[ D - 1 ] > 0 ); +} diff --git a/drivers/opus/silk/NLSF_decode.c b/drivers/opus/silk/NLSF_decode.c new file mode 100644 index 0000000000..786a62d278 --- /dev/null +++ b/drivers/opus/silk/NLSF_decode.c @@ -0,0 +1,101 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Predictive dequantizer for NLSF residuals */ +static OPUS_INLINE void silk_NLSF_residual_dequant( /* O Returns RD value in Q30 */ + opus_int16 x_Q10[], /* O Output [ order ] */ + const opus_int8 indices[], /* I Quantization indices [ order ] */ + const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ + const opus_int quant_step_size_Q16, /* I Quantization step size */ + const opus_int16 order /* I Number of input values */ +) +{ + opus_int i, out_Q10, pred_Q10; + + out_Q10 = 0; + for( i = order-1; i >= 0; i-- ) { + pred_Q10 = silk_RSHIFT( silk_SMULBB( out_Q10, (opus_int16)pred_coef_Q8[ i ] ), 8 ); + out_Q10 = silk_LSHIFT( indices[ i ], 10 ); + if( out_Q10 > 0 ) { + out_Q10 = silk_SUB16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + } else if( out_Q10 < 0 ) { + out_Q10 = silk_ADD16( out_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + } + out_Q10 = silk_SMLAWB( pred_Q10, (opus_int32)out_Q10, quant_step_size_Q16 ); + x_Q10[ i ] = out_Q10; + } +} + + +/***********************/ +/* NLSF vector decoder */ +/***********************/ +void silk_NLSF_decode( + opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */ + opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ + const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */ +) +{ + opus_int i; + opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; + opus_int16 ec_ix[ MAX_LPC_ORDER ]; + opus_int16 res_Q10[ MAX_LPC_ORDER ]; + opus_int16 W_tmp_QW[ MAX_LPC_ORDER ]; + opus_int32 W_tmp_Q9, NLSF_Q15_tmp; + const opus_uint8 *pCB_element; + + /* Decode first stage */ + pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ NLSFIndices[ 0 ] * psNLSF_CB->order ]; + for( i = 0; i < psNLSF_CB->order; i++ ) { + pNLSF_Q15[ i ] = silk_LSHIFT( (opus_int16)pCB_element[ i ], 7 ); + } + + /* Unpack entropy table indices and predictor for current CB1 index */ + silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, NLSFIndices[ 0 ] ); + + /* Predictive residual dequantizer */ + silk_NLSF_residual_dequant( res_Q10, &NLSFIndices[ 1 ], pred_Q8, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->order ); + + /* Weights from codebook vector */ + silk_NLSF_VQ_weights_laroia( W_tmp_QW, pNLSF_Q15, psNLSF_CB->order ); + + /* Apply inverse square-rooted weights and add to output */ + for( i = 0; i < psNLSF_CB->order; i++ ) { + W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) ); + NLSF_Q15_tmp = silk_ADD32( pNLSF_Q15[ i ], silk_DIV32_16( silk_LSHIFT( (opus_int32)res_Q10[ i ], 14 ), W_tmp_Q9 ) ); + pNLSF_Q15[ i ] = (opus_int16)silk_LIMIT( NLSF_Q15_tmp, 0, 32767 ); + } + + /* NLSF stabilization */ + silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order ); +} diff --git a/drivers/opus/silk/NLSF_del_dec_quant.c b/drivers/opus/silk/NLSF_del_dec_quant.c new file mode 100644 index 0000000000..b74585370c --- /dev/null +++ b/drivers/opus/silk/NLSF_del_dec_quant.c @@ -0,0 +1,207 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Delayed-decision quantizer for NLSF residuals */ +opus_int32 silk_NLSF_del_dec_quant( /* O Returns RD value in Q25 */ + opus_int8 indices[], /* O Quantization indices [ order ] */ + const opus_int16 x_Q10[], /* I Input [ order ] */ + const opus_int16 w_Q5[], /* I Weights [ order ] */ + const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ + const opus_int16 ec_ix[], /* I Indices to entropy coding tables [ order ] */ + const opus_uint8 ec_rates_Q5[], /* I Rates [] */ + const opus_int quant_step_size_Q16, /* I Quantization step size */ + const opus_int16 inv_quant_step_size_Q6, /* I Inverse quantization step size */ + const opus_int32 mu_Q20, /* I R/D tradeoff */ + const opus_int16 order /* I Number of input values */ +) +{ + opus_int i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10; + opus_int pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5; + opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16; + opus_int ind_sort[ NLSF_QUANT_DEL_DEC_STATES ]; + opus_int8 ind[ NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ]; + opus_int16 prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ]; + opus_int32 RD_Q25[ 2 * NLSF_QUANT_DEL_DEC_STATES ]; + opus_int32 RD_min_Q25[ NLSF_QUANT_DEL_DEC_STATES ]; + opus_int32 RD_max_Q25[ NLSF_QUANT_DEL_DEC_STATES ]; + const opus_uint8 *rates_Q5; + + silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 ); /* must be power of two */ + + nStates = 1; + RD_Q25[ 0 ] = 0; + prev_out_Q10[ 0 ] = 0; + for( i = order - 1; ; i-- ) { + rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ]; + pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 ); + in_Q10 = x_Q10[ i ]; + for( j = 0; j < nStates; j++ ) { + pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] ); + res_Q10 = silk_SUB16( in_Q10, pred_Q10 ); + ind_tmp = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 ); + ind_tmp = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 ); + ind[ j ][ i ] = (opus_int8)ind_tmp; + + /* compute outputs for ind_tmp and ind_tmp + 1 */ + out0_Q10 = silk_LSHIFT( ind_tmp, 10 ); + out1_Q10 = silk_ADD16( out0_Q10, 1024 ); + if( ind_tmp > 0 ) { + out0_Q10 = silk_SUB16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + } else if( ind_tmp == 0 ) { + out1_Q10 = silk_SUB16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + } else if( ind_tmp == -1 ) { + out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + } else { + out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); + } + out0_Q10 = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 ); + out1_Q10 = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 ); + out0_Q10 = silk_ADD16( out0_Q10, pred_Q10 ); + out1_Q10 = silk_ADD16( out1_Q10, pred_Q10 ); + prev_out_Q10[ j ] = out0_Q10; + prev_out_Q10[ j + nStates ] = out1_Q10; + + /* compute RD for ind_tmp and ind_tmp + 1 */ + if( ind_tmp + 1 >= NLSF_QUANT_MAX_AMPLITUDE ) { + if( ind_tmp + 1 == NLSF_QUANT_MAX_AMPLITUDE ) { + rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ]; + rate1_Q5 = 280; + } else { + rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, 43, ind_tmp ); + rate1_Q5 = silk_ADD16( rate0_Q5, 43 ); + } + } else if( ind_tmp <= -NLSF_QUANT_MAX_AMPLITUDE ) { + if( ind_tmp == -NLSF_QUANT_MAX_AMPLITUDE ) { + rate0_Q5 = 280; + rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ]; + } else { + rate0_Q5 = silk_SMLABB( 280 - 43 * NLSF_QUANT_MAX_AMPLITUDE, -43, ind_tmp ); + rate1_Q5 = silk_SUB16( rate0_Q5, 43 ); + } + } else { + rate0_Q5 = rates_Q5[ ind_tmp + NLSF_QUANT_MAX_AMPLITUDE ]; + rate1_Q5 = rates_Q5[ ind_tmp + 1 + NLSF_QUANT_MAX_AMPLITUDE ]; + } + RD_tmp_Q25 = RD_Q25[ j ]; + diff_Q10 = silk_SUB16( in_Q10, out0_Q10 ); + RD_Q25[ j ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate0_Q5 ); + diff_Q10 = silk_SUB16( in_Q10, out1_Q10 ); + RD_Q25[ j + nStates ] = silk_SMLABB( silk_MLA( RD_tmp_Q25, silk_SMULBB( diff_Q10, diff_Q10 ), w_Q5[ i ] ), mu_Q20, rate1_Q5 ); + } + + if( nStates <= ( NLSF_QUANT_DEL_DEC_STATES >> 1 ) ) { + /* double number of states and copy */ + for( j = 0; j < nStates; j++ ) { + ind[ j + nStates ][ i ] = ind[ j ][ i ] + 1; + } + nStates = silk_LSHIFT( nStates, 1 ); + for( j = nStates; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { + ind[ j ][ i ] = ind[ j - nStates ][ i ]; + } + } else if( i > 0 ) { + /* sort lower and upper half of RD_Q25, pairwise */ + for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { + if( RD_Q25[ j ] > RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] ) { + RD_max_Q25[ j ] = RD_Q25[ j ]; + RD_min_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ]; + RD_Q25[ j ] = RD_min_Q25[ j ]; + RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ] = RD_max_Q25[ j ]; + /* swap prev_out values */ + out0_Q10 = prev_out_Q10[ j ]; + prev_out_Q10[ j ] = prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ]; + prev_out_Q10[ j + NLSF_QUANT_DEL_DEC_STATES ] = out0_Q10; + ind_sort[ j ] = j + NLSF_QUANT_DEL_DEC_STATES; + } else { + RD_min_Q25[ j ] = RD_Q25[ j ]; + RD_max_Q25[ j ] = RD_Q25[ j + NLSF_QUANT_DEL_DEC_STATES ]; + ind_sort[ j ] = j; + } + } + /* compare the highest RD values of the winning half with the lowest one in the losing half, and copy if necessary */ + /* afterwards ind_sort[] will contain the indices of the NLSF_QUANT_DEL_DEC_STATES winning RD values */ + while( 1 ) { + min_max_Q25 = silk_int32_MAX; + max_min_Q25 = 0; + ind_min_max = 0; + ind_max_min = 0; + for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { + if( min_max_Q25 > RD_max_Q25[ j ] ) { + min_max_Q25 = RD_max_Q25[ j ]; + ind_min_max = j; + } + if( max_min_Q25 < RD_min_Q25[ j ] ) { + max_min_Q25 = RD_min_Q25[ j ]; + ind_max_min = j; + } + } + if( min_max_Q25 >= max_min_Q25 ) { + break; + } + /* copy ind_min_max to ind_max_min */ + ind_sort[ ind_max_min ] = ind_sort[ ind_min_max ] ^ NLSF_QUANT_DEL_DEC_STATES; + RD_Q25[ ind_max_min ] = RD_Q25[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ]; + prev_out_Q10[ ind_max_min ] = prev_out_Q10[ ind_min_max + NLSF_QUANT_DEL_DEC_STATES ]; + RD_min_Q25[ ind_max_min ] = 0; + RD_max_Q25[ ind_min_max ] = silk_int32_MAX; + silk_memcpy( ind[ ind_max_min ], ind[ ind_min_max ], MAX_LPC_ORDER * sizeof( opus_int8 ) ); + } + /* increment index if it comes from the upper half */ + for( j = 0; j < NLSF_QUANT_DEL_DEC_STATES; j++ ) { + ind[ j ][ i ] += silk_RSHIFT( ind_sort[ j ], NLSF_QUANT_DEL_DEC_STATES_LOG2 ); + } + } else { /* i == 0 */ + break; + } + } + + /* last sample: find winner, copy indices and return RD value */ + ind_tmp = 0; + min_Q25 = silk_int32_MAX; + for( j = 0; j < 2 * NLSF_QUANT_DEL_DEC_STATES; j++ ) { + if( min_Q25 > RD_Q25[ j ] ) { + min_Q25 = RD_Q25[ j ]; + ind_tmp = j; + } + } + for( j = 0; j < order; j++ ) { + indices[ j ] = ind[ ind_tmp & ( NLSF_QUANT_DEL_DEC_STATES - 1 ) ][ j ]; + silk_assert( indices[ j ] >= -NLSF_QUANT_MAX_AMPLITUDE_EXT ); + silk_assert( indices[ j ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT ); + } + indices[ 0 ] += silk_RSHIFT( ind_tmp, NLSF_QUANT_DEL_DEC_STATES_LOG2 ); + silk_assert( indices[ 0 ] <= NLSF_QUANT_MAX_AMPLITUDE_EXT ); + silk_assert( min_Q25 >= 0 ); + return min_Q25; +} diff --git a/drivers/opus/silk/NLSF_encode.c b/drivers/opus/silk/NLSF_encode.c new file mode 100644 index 0000000000..bf67bd5cf1 --- /dev/null +++ b/drivers/opus/silk/NLSF_encode.c @@ -0,0 +1,136 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "stack_alloc.h" + +/***********************/ +/* NLSF vector encoder */ +/***********************/ +opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */ + opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ + opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */ + const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ + const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */ + const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */ + const opus_int nSurvivors, /* I Max survivors after first stage */ + const opus_int signalType /* I Signal type: 0/1/2 */ +) +{ + opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7; + opus_int32 W_tmp_Q9; + VARDECL( opus_int32, err_Q26 ); + VARDECL( opus_int32, RD_Q25 ); + VARDECL( opus_int, tempIndices1 ); + VARDECL( opus_int8, tempIndices2 ); + opus_int16 res_Q15[ MAX_LPC_ORDER ]; + opus_int16 res_Q10[ MAX_LPC_ORDER ]; + opus_int16 NLSF_tmp_Q15[ MAX_LPC_ORDER ]; + opus_int16 W_tmp_QW[ MAX_LPC_ORDER ]; + opus_int16 W_adj_Q5[ MAX_LPC_ORDER ]; + opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; + opus_int16 ec_ix[ MAX_LPC_ORDER ]; + const opus_uint8 *pCB_element, *iCDF_ptr; + SAVE_STACK; + + silk_assert( nSurvivors <= NLSF_VQ_MAX_SURVIVORS ); + silk_assert( signalType >= 0 && signalType <= 2 ); + silk_assert( NLSF_mu_Q20 <= 32767 && NLSF_mu_Q20 >= 0 ); + + /* NLSF stabilization */ + silk_NLSF_stabilize( pNLSF_Q15, psNLSF_CB->deltaMin_Q15, psNLSF_CB->order ); + + /* First stage: VQ */ + ALLOC( err_Q26, psNLSF_CB->nVectors, opus_int32 ); + silk_NLSF_VQ( err_Q26, pNLSF_Q15, psNLSF_CB->CB1_NLSF_Q8, psNLSF_CB->nVectors, psNLSF_CB->order ); + + /* Sort the quantization errors */ + ALLOC( tempIndices1, nSurvivors, opus_int ); + silk_insertion_sort_increasing( err_Q26, tempIndices1, psNLSF_CB->nVectors, nSurvivors ); + + ALLOC( RD_Q25, nSurvivors, opus_int32 ); + ALLOC( tempIndices2, nSurvivors * MAX_LPC_ORDER, opus_int8 ); + + /* Loop over survivors */ + for( s = 0; s < nSurvivors; s++ ) { + ind1 = tempIndices1[ s ]; + + /* Residual after first stage */ + pCB_element = &psNLSF_CB->CB1_NLSF_Q8[ ind1 * psNLSF_CB->order ]; + for( i = 0; i < psNLSF_CB->order; i++ ) { + NLSF_tmp_Q15[ i ] = silk_LSHIFT16( (opus_int16)pCB_element[ i ], 7 ); + res_Q15[ i ] = pNLSF_Q15[ i ] - NLSF_tmp_Q15[ i ]; + } + + /* Weights from codebook vector */ + silk_NLSF_VQ_weights_laroia( W_tmp_QW, NLSF_tmp_Q15, psNLSF_CB->order ); + + /* Apply square-rooted weights */ + for( i = 0; i < psNLSF_CB->order; i++ ) { + W_tmp_Q9 = silk_SQRT_APPROX( silk_LSHIFT( (opus_int32)W_tmp_QW[ i ], 18 - NLSF_W_Q ) ); + res_Q10[ i ] = (opus_int16)silk_RSHIFT( silk_SMULBB( res_Q15[ i ], W_tmp_Q9 ), 14 ); + } + + /* Modify input weights accordingly */ + for( i = 0; i < psNLSF_CB->order; i++ ) { + W_adj_Q5[ i ] = silk_DIV32_16( silk_LSHIFT( (opus_int32)pW_QW[ i ], 5 ), W_tmp_QW[ i ] ); + } + + /* Unpack entropy table indices and predictor for current CB1 index */ + silk_NLSF_unpack( ec_ix, pred_Q8, psNLSF_CB, ind1 ); + + /* Trellis quantizer */ + RD_Q25[ s ] = silk_NLSF_del_dec_quant( &tempIndices2[ s * MAX_LPC_ORDER ], res_Q10, W_adj_Q5, pred_Q8, ec_ix, + psNLSF_CB->ec_Rates_Q5, psNLSF_CB->quantStepSize_Q16, psNLSF_CB->invQuantStepSize_Q6, NLSF_mu_Q20, psNLSF_CB->order ); + + /* Add rate for first stage */ + iCDF_ptr = &psNLSF_CB->CB1_iCDF[ ( signalType >> 1 ) * psNLSF_CB->nVectors ]; + if( ind1 == 0 ) { + prob_Q8 = 256 - iCDF_ptr[ ind1 ]; + } else { + prob_Q8 = iCDF_ptr[ ind1 - 1 ] - iCDF_ptr[ ind1 ]; + } + bits_q7 = ( 8 << 7 ) - silk_lin2log( prob_Q8 ); + RD_Q25[ s ] = silk_SMLABB( RD_Q25[ s ], bits_q7, silk_RSHIFT( NLSF_mu_Q20, 2 ) ); + } + + /* Find the lowest rate-distortion error */ + silk_insertion_sort_increasing( RD_Q25, &bestIndex, nSurvivors, 1 ); + + NLSFIndices[ 0 ] = (opus_int8)tempIndices1[ bestIndex ]; + silk_memcpy( &NLSFIndices[ 1 ], &tempIndices2[ bestIndex * MAX_LPC_ORDER ], psNLSF_CB->order * sizeof( opus_int8 ) ); + + /* Decode */ + silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB ); + + RESTORE_STACK; + return RD_Q25[ 0 ]; +} diff --git a/drivers/opus/silk/NLSF_stabilize.c b/drivers/opus/silk/NLSF_stabilize.c new file mode 100644 index 0000000000..a1bf20b8d4 --- /dev/null +++ b/drivers/opus/silk/NLSF_stabilize.c @@ -0,0 +1,142 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/* NLSF stabilizer: */ +/* */ +/* - Moves NLSFs further apart if they are too close */ +/* - Moves NLSFs away from borders if they are too close */ +/* - High effort to achieve a modification with minimum */ +/* Euclidean distance to input vector */ +/* - Output are sorted NLSF coefficients */ +/* */ + +#include "SigProc_FIX.h" + +/* Constant Definitions */ +#define MAX_LOOPS 20 + +/* NLSF stabilizer, for a single input data vector */ +void silk_NLSF_stabilize( + opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */ + const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */ + const opus_int L /* I Number of NLSF parameters in the input vector */ +) +{ + opus_int i, I=0, k, loops; + opus_int16 center_freq_Q15; + opus_int32 diff_Q15, min_diff_Q15, min_center_Q15, max_center_Q15; + + /* This is necessary to ensure an output within range of a opus_int16 */ + silk_assert( NDeltaMin_Q15[L] >= 1 ); + + for( loops = 0; loops < MAX_LOOPS; loops++ ) { + /**************************/ + /* Find smallest distance */ + /**************************/ + /* First element */ + min_diff_Q15 = NLSF_Q15[0] - NDeltaMin_Q15[0]; + I = 0; + /* Middle elements */ + for( i = 1; i <= L-1; i++ ) { + diff_Q15 = NLSF_Q15[i] - ( NLSF_Q15[i-1] + NDeltaMin_Q15[i] ); + if( diff_Q15 < min_diff_Q15 ) { + min_diff_Q15 = diff_Q15; + I = i; + } + } + /* Last element */ + diff_Q15 = ( 1 << 15 ) - ( NLSF_Q15[L-1] + NDeltaMin_Q15[L] ); + if( diff_Q15 < min_diff_Q15 ) { + min_diff_Q15 = diff_Q15; + I = L; + } + + /***************************************************/ + /* Now check if the smallest distance non-negative */ + /***************************************************/ + if( min_diff_Q15 >= 0 ) { + return; + } + + if( I == 0 ) { + /* Move away from lower limit */ + NLSF_Q15[0] = NDeltaMin_Q15[0]; + + } else if( I == L) { + /* Move away from higher limit */ + NLSF_Q15[L-1] = ( 1 << 15 ) - NDeltaMin_Q15[L]; + + } else { + /* Find the lower extreme for the location of the current center frequency */ + min_center_Q15 = 0; + for( k = 0; k < I; k++ ) { + min_center_Q15 += NDeltaMin_Q15[k]; + } + min_center_Q15 += silk_RSHIFT( NDeltaMin_Q15[I], 1 ); + + /* Find the upper extreme for the location of the current center frequency */ + max_center_Q15 = 1 << 15; + for( k = L; k > I; k-- ) { + max_center_Q15 -= NDeltaMin_Q15[k]; + } + max_center_Q15 -= silk_RSHIFT( NDeltaMin_Q15[I], 1 ); + + /* Move apart, sorted by value, keeping the same center frequency */ + center_freq_Q15 = (opus_int16)silk_LIMIT_32( silk_RSHIFT_ROUND( (opus_int32)NLSF_Q15[I-1] + (opus_int32)NLSF_Q15[I], 1 ), + min_center_Q15, max_center_Q15 ); + NLSF_Q15[I-1] = center_freq_Q15 - silk_RSHIFT( NDeltaMin_Q15[I], 1 ); + NLSF_Q15[I] = NLSF_Q15[I-1] + NDeltaMin_Q15[I]; + } + } + + /* Safe and simple fall back method, which is less ideal than the above */ + if( loops == MAX_LOOPS ) + { + /* Insertion sort (fast for already almost sorted arrays): */ + /* Best case: O(n) for an already sorted array */ + /* Worst case: O(n^2) for an inversely sorted array */ + silk_insertion_sort_increasing_all_values_int16( &NLSF_Q15[0], L ); + + /* First NLSF should be no less than NDeltaMin[0] */ + NLSF_Q15[0] = silk_max_int( NLSF_Q15[0], NDeltaMin_Q15[0] ); + + /* Keep delta_min distance between the NLSFs */ + for( i = 1; i < L; i++ ) + NLSF_Q15[i] = silk_max_int( NLSF_Q15[i], NLSF_Q15[i-1] + NDeltaMin_Q15[i] ); + + /* Last NLSF should be no higher than 1 - NDeltaMin[L] */ + NLSF_Q15[L-1] = silk_min_int( NLSF_Q15[L-1], (1<<15) - NDeltaMin_Q15[L] ); + + /* Keep NDeltaMin distance between the NLSFs */ + for( i = L-2; i >= 0; i-- ) + NLSF_Q15[i] = silk_min_int( NLSF_Q15[i], NLSF_Q15[i+1] - NDeltaMin_Q15[i+1] ); + } +} diff --git a/drivers/opus/silk/NLSF_unpack.c b/drivers/opus/silk/NLSF_unpack.c new file mode 100644 index 0000000000..60242a3b52 --- /dev/null +++ b/drivers/opus/silk/NLSF_unpack.c @@ -0,0 +1,55 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Unpack predictor values and indices for entropy coding tables */ +void silk_NLSF_unpack( + opus_int16 ec_ix[], /* O Indices to entropy tables [ LPC_ORDER ] */ + opus_uint8 pred_Q8[], /* O LSF predictor [ LPC_ORDER ] */ + const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ + const opus_int CB1_index /* I Index of vector in first LSF codebook */ +) +{ + opus_int i; + opus_uint8 entry; + const opus_uint8 *ec_sel_ptr; + + ec_sel_ptr = &psNLSF_CB->ec_sel[ CB1_index * psNLSF_CB->order / 2 ]; + for( i = 0; i < psNLSF_CB->order; i += 2 ) { + entry = *ec_sel_ptr++; + ec_ix [ i ] = silk_SMULBB( silk_RSHIFT( entry, 1 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 ); + pred_Q8[ i ] = psNLSF_CB->pred_Q8[ i + ( entry & 1 ) * ( psNLSF_CB->order - 1 ) ]; + ec_ix [ i + 1 ] = silk_SMULBB( silk_RSHIFT( entry, 5 ) & 7, 2 * NLSF_QUANT_MAX_AMPLITUDE + 1 ); + pred_Q8[ i + 1 ] = psNLSF_CB->pred_Q8[ i + ( silk_RSHIFT( entry, 4 ) & 1 ) * ( psNLSF_CB->order - 1 ) + 1 ]; + } +} + diff --git a/drivers/opus/silk/NSQ.c b/drivers/opus/silk/NSQ.c new file mode 100644 index 0000000000..a08e34e893 --- /dev/null +++ b/drivers/opus/silk/NSQ.c @@ -0,0 +1,446 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "stack_alloc.h" + +static OPUS_INLINE void silk_nsq_scale_states( + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + const opus_int32 x_Q3[], /* I input in Q3 */ + opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ + const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I subframe number */ + const opus_int LTP_scale_Q14, /* I */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type /* I Signal type */ +); + +static OPUS_INLINE void silk_noise_shape_quantizer( + silk_nsq_state *NSQ, /* I/O NSQ state */ + opus_int signalType, /* I Signal type */ + const opus_int32 x_sc_Q10[], /* I */ + opus_int8 pulses[], /* O */ + opus_int16 xq[], /* O */ + opus_int32 sLTP_Q15[], /* I/O LTP state */ + const opus_int16 a_Q12[], /* I Short term prediction coefs */ + const opus_int16 b_Q14[], /* I Long term prediction coefs */ + const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ + opus_int lag, /* I Pitch lag */ + opus_int32 HarmShapeFIRPacked_Q14, /* I */ + opus_int Tilt_Q14, /* I Spectral tilt */ + opus_int32 LF_shp_Q14, /* I */ + opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ + opus_int offset_Q10, /* I */ + opus_int length, /* I Input length */ + opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ + opus_int predictLPCOrder /* I Prediction filter order */ +); + +void silk_NSQ( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +) +{ + opus_int k, lag, start_idx, LSF_interpolation_flag; + const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; + opus_int16 *pxq; + VARDECL( opus_int32, sLTP_Q15 ); + VARDECL( opus_int16, sLTP ); + opus_int32 HarmShapeFIRPacked_Q14; + opus_int offset_Q10; + VARDECL( opus_int32, x_sc_Q10 ); + SAVE_STACK; + + NSQ->rand_seed = psIndices->Seed; + + /* Set unvoiced lag to the previous one, overwrite later for voiced */ + lag = NSQ->lagPrev; + + silk_assert( NSQ->prev_gain_Q16 != 0 ); + + offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; + + if( psIndices->NLSFInterpCoef_Q2 == 4 ) { + LSF_interpolation_flag = 0; + } else { + LSF_interpolation_flag = 1; + } + + ALLOC( sLTP_Q15, + psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); + ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); + ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); + /* Set up pointers to start of sub frame */ + NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; + NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; + pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; + for( k = 0; k < psEncC->nb_subfr; k++ ) { + A_Q12 = &PredCoef_Q12[ (( k >> 1 ) | ( 1 - LSF_interpolation_flag )) * MAX_LPC_ORDER ]; + B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; + AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + + /* Noise shape parameters */ + silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); + HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); + HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); + + NSQ->rewhite_flag = 0; + if( psIndices->signalType == TYPE_VOICED ) { + /* Voiced */ + lag = pitchL[ k ]; + + /* Re-whitening */ + if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { + /* Rewhiten with new A coefs */ + start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; + silk_assert( start_idx > 0 ); + + silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], + A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder ); + + NSQ->rewhite_flag = 1; + NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; + } + } + + silk_nsq_scale_states( psEncC, NSQ, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType ); + + silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, + AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, + offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder ); + + x_Q3 += psEncC->subfr_length; + pulses += psEncC->subfr_length; + pxq += psEncC->subfr_length; + } + + /* Update lagPrev for next frame */ + NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; + + /* Save quantized speech and noise shaping signals */ + /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[ psEncC->ltp_mem_length ], psEncC->frame_length * sizeof( opus_int16 ) ) */ + silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); + silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); + RESTORE_STACK; +} + +/***********************************/ +/* silk_noise_shape_quantizer */ +/***********************************/ +static OPUS_INLINE void silk_noise_shape_quantizer( + silk_nsq_state *NSQ, /* I/O NSQ state */ + opus_int signalType, /* I Signal type */ + const opus_int32 x_sc_Q10[], /* I */ + opus_int8 pulses[], /* O */ + opus_int16 xq[], /* O */ + opus_int32 sLTP_Q15[], /* I/O LTP state */ + const opus_int16 a_Q12[], /* I Short term prediction coefs */ + const opus_int16 b_Q14[], /* I Long term prediction coefs */ + const opus_int16 AR_shp_Q13[], /* I Noise shaping AR coefs */ + opus_int lag, /* I Pitch lag */ + opus_int32 HarmShapeFIRPacked_Q14, /* I */ + opus_int Tilt_Q14, /* I Spectral tilt */ + opus_int32 LF_shp_Q14, /* I */ + opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ + opus_int offset_Q10, /* I */ + opus_int length, /* I Input length */ + opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ + opus_int predictLPCOrder /* I Prediction filter order */ +) +{ + opus_int i, j; + opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13; + opus_int32 n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20; + opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; + opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; + opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr; + + shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; + pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; + Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); + + /* Set up short term AR state */ + psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ]; + + for( i = 0; i < length; i++ ) { + /* Generate dither */ + NSQ->rand_seed = silk_RAND( NSQ->rand_seed ); + + /* Short-term prediction */ + silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ 0 ], a_Q12[ 0 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] ); + if( predictLPCOrder == 16 ) { + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] ); + } + + /* Long-term prediction */ + if( signalType == TYPE_VOICED ) { + /* Unrolled loop */ + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LTP_pred_Q13 = 2; + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ 0 ], b_Q14[ 0 ] ); + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], b_Q14[ 1 ] ); + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], b_Q14[ 2 ] ); + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], b_Q14[ 3 ] ); + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); + pred_lag_ptr++; + } else { + LTP_pred_Q13 = 0; + } + + /* Noise shape feedback */ + silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ + tmp2 = psLPC_Q14[ 0 ]; + tmp1 = NSQ->sAR2_Q14[ 0 ]; + NSQ->sAR2_Q14[ 0 ] = tmp2; + n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 ); + n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] ); + for( j = 2; j < shapingLPCOrder; j += 2 ) { + tmp2 = NSQ->sAR2_Q14[ j - 1 ]; + NSQ->sAR2_Q14[ j - 1 ] = tmp1; + n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] ); + tmp1 = NSQ->sAR2_Q14[ j + 0 ]; + NSQ->sAR2_Q14[ j + 0 ] = tmp2; + n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] ); + } + NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; + n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); + + n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 ); /* Q11 -> Q12 */ + n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 ); + + n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); + n_LF_Q12 = silk_SMLAWT( n_LF_Q12, NSQ->sLF_AR_shp_Q14, LF_shp_Q14 ); + + silk_assert( lag > 0 || signalType != TYPE_VOICED ); + + /* Combine prediction and noise shaping signals */ + tmp1 = silk_SUB32( silk_LSHIFT32( LPC_pred_Q10, 2 ), n_AR_Q12 ); /* Q12 */ + tmp1 = silk_SUB32( tmp1, n_LF_Q12 ); /* Q12 */ + if( lag > 0 ) { + /* Symmetric, packed FIR coefficients */ + n_LTP_Q13 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q13 = silk_SMLAWT( n_LTP_Q13, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); + n_LTP_Q13 = silk_LSHIFT( n_LTP_Q13, 1 ); + shp_lag_ptr++; + + tmp2 = silk_SUB32( LTP_pred_Q13, n_LTP_Q13 ); /* Q13 */ + tmp1 = silk_ADD_LSHIFT32( tmp2, tmp1, 1 ); /* Q13 */ + tmp1 = silk_RSHIFT_ROUND( tmp1, 3 ); /* Q10 */ + } else { + tmp1 = silk_RSHIFT_ROUND( tmp1, 2 ); /* Q10 */ + } + + r_Q10 = silk_SUB32( x_sc_Q10[ i ], tmp1 ); /* residual error Q10 */ + + /* Flip sign depending on dither */ + if ( NSQ->rand_seed < 0 ) { + r_Q10 = -r_Q10; + } + r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); + + /* Find two quantization level candidates and measure their rate-distortion */ + q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); + q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); + if( q1_Q0 > 0 ) { + q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); + q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); + q2_Q10 = silk_ADD32( q1_Q10, 1024 ); + rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 ); + rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else if( q1_Q0 == 0 ) { + q1_Q10 = offset_Q10; + q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); + rd1_Q20 = silk_SMULBB( q1_Q10, Lambda_Q10 ); + rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else if( q1_Q0 == -1 ) { + q2_Q10 = offset_Q10; + q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); + rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); + rd2_Q20 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else { /* Q1_Q0 < -1 */ + q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); + q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); + q2_Q10 = silk_ADD32( q1_Q10, 1024 ); + rd1_Q20 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); + rd2_Q20 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); + } + rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); + rd1_Q20 = silk_SMLABB( rd1_Q20, rr_Q10, rr_Q10 ); + rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); + rd2_Q20 = silk_SMLABB( rd2_Q20, rr_Q10, rr_Q10 ); + + if( rd2_Q20 < rd1_Q20 ) { + q1_Q10 = q2_Q10; + } + + pulses[ i ] = (opus_int8)silk_RSHIFT_ROUND( q1_Q10, 10 ); + + /* Excitation */ + exc_Q14 = silk_LSHIFT( q1_Q10, 4 ); + if ( NSQ->rand_seed < 0 ) { + exc_Q14 = -exc_Q14; + } + + /* Add predictions */ + LPC_exc_Q14 = silk_ADD_LSHIFT32( exc_Q14, LTP_pred_Q13, 1 ); + xq_Q14 = silk_ADD_LSHIFT32( LPC_exc_Q14, LPC_pred_Q10, 4 ); + + /* Scale XQ back to normal level before saving */ + xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( xq_Q14, Gain_Q10 ), 8 ) ); + + /* Update states */ + psLPC_Q14++; + *psLPC_Q14 = xq_Q14; + sLF_AR_shp_Q14 = silk_SUB_LSHIFT32( xq_Q14, n_AR_Q12, 2 ); + NSQ->sLF_AR_shp_Q14 = sLF_AR_shp_Q14; + + NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx ] = silk_SUB_LSHIFT32( sLF_AR_shp_Q14, n_LF_Q12, 2 ); + sLTP_Q15[ NSQ->sLTP_buf_idx ] = silk_LSHIFT( LPC_exc_Q14, 1 ); + NSQ->sLTP_shp_buf_idx++; + NSQ->sLTP_buf_idx++; + + /* Make dither dependent on quantized signal */ + NSQ->rand_seed = silk_ADD32_ovflw( NSQ->rand_seed, pulses[ i ] ); + } + + /* Update LPC synth buffer */ + silk_memcpy( NSQ->sLPC_Q14, &NSQ->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); +} + +static OPUS_INLINE void silk_nsq_scale_states( + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + const opus_int32 x_Q3[], /* I input in Q3 */ + opus_int32 x_sc_Q10[], /* O input scaled with 1/Gain */ + const opus_int16 sLTP[], /* I re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I subframe number */ + const opus_int LTP_scale_Q14, /* I */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type /* I Signal type */ +) +{ + opus_int i, lag; + opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; + + lag = pitchL[ subfr ]; + inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); + silk_assert( inv_gain_Q31 != 0 ); + + /* Calculate gain adjustment factor */ + if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { + gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); + } else { + gain_adj_Q16 = (opus_int32)1 << 16; + } + + /* Scale input */ + inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); + for( i = 0; i < psEncC->subfr_length; i++ ) { + x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); + } + + /* Save inverse gain */ + NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; + + /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ + if( NSQ->rewhite_flag ) { + if( subfr == 0 ) { + /* Do LTP downscaling */ + inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); + } + for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { + silk_assert( i < MAX_FRAME_LENGTH ); + sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); + } + } + + /* Adjust for changing gain */ + if( gain_adj_Q16 != (opus_int32)1 << 16 ) { + /* Scale long-term shaping state */ + for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) { + NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); + } + + /* Scale long-term prediction state */ + if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { + for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { + sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); + } + } + + NSQ->sLF_AR_shp_Q14 = silk_SMULWW( gain_adj_Q16, NSQ->sLF_AR_shp_Q14 ); + + /* Scale short-term prediction and shaping states */ + for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { + NSQ->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLPC_Q14[ i ] ); + } + for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { + NSQ->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sAR2_Q14[ i ] ); + } + } +} diff --git a/drivers/opus/silk/NSQ_del_dec.c b/drivers/opus/silk/NSQ_del_dec.c new file mode 100644 index 0000000000..8ac6311b11 --- /dev/null +++ b/drivers/opus/silk/NSQ_del_dec.c @@ -0,0 +1,719 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "stack_alloc.h" + +typedef struct { + opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; + opus_int32 RandState[ DECISION_DELAY ]; + opus_int32 Q_Q10[ DECISION_DELAY ]; + opus_int32 Xq_Q14[ DECISION_DELAY ]; + opus_int32 Pred_Q15[ DECISION_DELAY ]; + opus_int32 Shape_Q14[ DECISION_DELAY ]; + opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 LF_AR_Q14; + opus_int32 Seed; + opus_int32 SeedInit; + opus_int32 RD_Q10; +} NSQ_del_dec_struct; + +typedef struct { + opus_int32 Q_Q10; + opus_int32 RD_Q10; + opus_int32 xq_Q14; + opus_int32 LF_AR_Q14; + opus_int32 sLTP_shp_Q14; + opus_int32 LPC_exc_Q14; +} NSQ_sample_struct; + +typedef NSQ_sample_struct NSQ_sample_pair[ 2 ]; + +static OPUS_INLINE void silk_nsq_del_dec_scale_states( + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ + const opus_int32 x_Q3[], /* I Input in Q3 */ + opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ + const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I Subframe number */ + opus_int nStatesDelayedDecision, /* I Number of del dec states */ + const opus_int LTP_scale_Q14, /* I LTP state scaling */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type, /* I Signal type */ + const opus_int decisionDelay /* I Decision delay */ +); + +/******************************************/ +/* Noise shape quantizer for one subframe */ +/******************************************/ +static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( + silk_nsq_state *NSQ, /* I/O NSQ state */ + NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ + opus_int signalType, /* I Signal type */ + const opus_int32 x_Q10[], /* I */ + opus_int8 pulses[], /* O */ + opus_int16 xq[], /* O */ + opus_int32 sLTP_Q15[], /* I/O LTP filter state */ + opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ + const opus_int16 a_Q12[], /* I Short term prediction coefs */ + const opus_int16 b_Q14[], /* I Long term prediction coefs */ + const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ + opus_int lag, /* I Pitch lag */ + opus_int32 HarmShapeFIRPacked_Q14, /* I */ + opus_int Tilt_Q14, /* I Spectral tilt */ + opus_int32 LF_shp_Q14, /* I */ + opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ + opus_int offset_Q10, /* I */ + opus_int length, /* I Input length */ + opus_int subfr, /* I Subframe number */ + opus_int shapingLPCOrder, /* I Shaping LPC filter order */ + opus_int predictLPCOrder, /* I Prediction filter order */ + opus_int warping_Q16, /* I */ + opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ + opus_int decisionDelay /* I */ +); + +void silk_NSQ_del_dec( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +) +{ + opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr; + opus_int last_smple_idx, smpl_buf_idx, decisionDelay; + const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13; + opus_int16 *pxq; + VARDECL( opus_int32, sLTP_Q15 ); + VARDECL( opus_int16, sLTP ); + opus_int32 HarmShapeFIRPacked_Q14; + opus_int offset_Q10; + opus_int32 RDmin_Q10, Gain_Q10; + VARDECL( opus_int32, x_sc_Q10 ); + VARDECL( opus_int32, delayedGain_Q10 ); + VARDECL( NSQ_del_dec_struct, psDelDec ); + NSQ_del_dec_struct *psDD; + SAVE_STACK; + + /* Set unvoiced lag to the previous one, overwrite later for voiced */ + lag = NSQ->lagPrev; + + silk_assert( NSQ->prev_gain_Q16 != 0 ); + + /* Initialize delayed decision states */ + ALLOC( psDelDec, psEncC->nStatesDelayedDecision, NSQ_del_dec_struct ); + silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) ); + for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) { + psDD = &psDelDec[ k ]; + psDD->Seed = ( k + psIndices->Seed ) & 3; + psDD->SeedInit = psDD->Seed; + psDD->RD_Q10 = 0; + psDD->LF_AR_Q14 = NSQ->sLF_AR_shp_Q14; + psDD->Shape_Q14[ 0 ] = NSQ->sLTP_shp_Q14[ psEncC->ltp_mem_length - 1 ]; + silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); + silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) ); + } + + offset_Q10 = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ]; + smpl_buf_idx = 0; /* index of oldest samples */ + + decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length ); + + /* For voiced frames limit the decision delay to lower than the pitch lag */ + if( psIndices->signalType == TYPE_VOICED ) { + for( k = 0; k < psEncC->nb_subfr; k++ ) { + decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 ); + } + } else { + if( lag > 0 ) { + decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 ); + } + } + + if( psIndices->NLSFInterpCoef_Q2 == 4 ) { + LSF_interpolation_flag = 0; + } else { + LSF_interpolation_flag = 1; + } + + ALLOC( sLTP_Q15, + psEncC->ltp_mem_length + psEncC->frame_length, opus_int32 ); + ALLOC( sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16 ); + ALLOC( x_sc_Q10, psEncC->subfr_length, opus_int32 ); + ALLOC( delayedGain_Q10, DECISION_DELAY, opus_int32 ); + /* Set up pointers to start of sub frame */ + pxq = &NSQ->xq[ psEncC->ltp_mem_length ]; + NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length; + NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; + subfr = 0; + for( k = 0; k < psEncC->nb_subfr; k++ ) { + A_Q12 = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ]; + B_Q14 = <PCoef_Q14[ k * LTP_ORDER ]; + AR_shp_Q13 = &AR2_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + + /* Noise shape parameters */ + silk_assert( HarmShapeGain_Q14[ k ] >= 0 ); + HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 ); + HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 ); + + NSQ->rewhite_flag = 0; + if( psIndices->signalType == TYPE_VOICED ) { + /* Voiced */ + lag = pitchL[ k ]; + + /* Re-whitening */ + if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) { + if( k == 2 ) { + /* RESET DELAYED DECISIONS */ + /* Find winner */ + RDmin_Q10 = psDelDec[ 0 ].RD_Q10; + Winner_ind = 0; + for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) { + if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) { + RDmin_Q10 = psDelDec[ i ].RD_Q10; + Winner_ind = i; + } + } + for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) { + if( i != Winner_ind ) { + psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 ); + silk_assert( psDelDec[ i ].RD_Q10 >= 0 ); + } + } + + /* Copy final part of signals from winner state to output and long-term filter states */ + psDD = &psDelDec[ Winner_ind ]; + last_smple_idx = smpl_buf_idx + decisionDelay; + for( i = 0; i < decisionDelay; i++ ) { + last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; + pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); + pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( + silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gains_Q16[ 1 ] ), 14 ) ); + NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; + } + + subfr = 0; + } + + /* Rewhiten with new A coefs */ + start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2; + silk_assert( start_idx > 0 ); + + silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ], + A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder ); + + NSQ->sLTP_buf_idx = psEncC->ltp_mem_length; + NSQ->rewhite_flag = 1; + } + } + + silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x_Q3, x_sc_Q10, sLTP, sLTP_Q15, k, + psEncC->nStatesDelayedDecision, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay ); + + silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, + delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], + Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, + psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay ); + + x_Q3 += psEncC->subfr_length; + pulses += psEncC->subfr_length; + pxq += psEncC->subfr_length; + } + + /* Find winner */ + RDmin_Q10 = psDelDec[ 0 ].RD_Q10; + Winner_ind = 0; + for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) { + if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) { + RDmin_Q10 = psDelDec[ k ].RD_Q10; + Winner_ind = k; + } + } + + /* Copy final part of signals from winner state to output and long-term filter states */ + psDD = &psDelDec[ Winner_ind ]; + psIndices->Seed = psDD->SeedInit; + last_smple_idx = smpl_buf_idx + decisionDelay; + Gain_Q10 = silk_RSHIFT32( Gains_Q16[ psEncC->nb_subfr - 1 ], 6 ); + for( i = 0; i < decisionDelay; i++ ) { + last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK; + pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); + pxq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( + silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], Gain_Q10 ), 8 ) ); + NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q14[ last_smple_idx ]; + } + silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); + silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) ); + + /* Update states */ + NSQ->sLF_AR_shp_Q14 = psDD->LF_AR_Q14; + NSQ->lagPrev = pitchL[ psEncC->nb_subfr - 1 ]; + + /* Save quantized speech signal */ + /* DEBUG_STORE_DATA( enc.pcm, &NSQ->xq[psEncC->ltp_mem_length], psEncC->frame_length * sizeof( opus_int16 ) ) */ + silk_memmove( NSQ->xq, &NSQ->xq[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) ); + silk_memmove( NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) ); + RESTORE_STACK; +} + +/******************************************/ +/* Noise shape quantizer for one subframe */ +/******************************************/ +static OPUS_INLINE void silk_noise_shape_quantizer_del_dec( + silk_nsq_state *NSQ, /* I/O NSQ state */ + NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ + opus_int signalType, /* I Signal type */ + const opus_int32 x_Q10[], /* I */ + opus_int8 pulses[], /* O */ + opus_int16 xq[], /* O */ + opus_int32 sLTP_Q15[], /* I/O LTP filter state */ + opus_int32 delayedGain_Q10[], /* I/O Gain delay buffer */ + const opus_int16 a_Q12[], /* I Short term prediction coefs */ + const opus_int16 b_Q14[], /* I Long term prediction coefs */ + const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */ + opus_int lag, /* I Pitch lag */ + opus_int32 HarmShapeFIRPacked_Q14, /* I */ + opus_int Tilt_Q14, /* I Spectral tilt */ + opus_int32 LF_shp_Q14, /* I */ + opus_int32 Gain_Q16, /* I */ + opus_int Lambda_Q10, /* I */ + opus_int offset_Q10, /* I */ + opus_int length, /* I Input length */ + opus_int subfr, /* I Subframe number */ + opus_int shapingLPCOrder, /* I Shaping LPC filter order */ + opus_int predictLPCOrder, /* I Prediction filter order */ + opus_int warping_Q16, /* I */ + opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ + opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ + opus_int decisionDelay /* I */ +) +{ + opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; + opus_int32 Winner_rand_state; + opus_int32 LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14; + opus_int32 n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10; + opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; + opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; + opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; + VARDECL( NSQ_sample_pair, psSampleState ); + NSQ_del_dec_struct *psDD; + NSQ_sample_struct *psSS; + SAVE_STACK; + + silk_assert( nStatesDelayedDecision > 0 ); + ALLOC( psSampleState, nStatesDelayedDecision, NSQ_sample_pair ); + + shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; + pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; + Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); + + for( i = 0; i < length; i++ ) { + /* Perform common calculations used in all states */ + + /* Long-term prediction */ + if( signalType == TYPE_VOICED ) { + /* Unrolled loop */ + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LTP_pred_Q14 = 2; + LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ 0 ], b_Q14[ 0 ] ); + LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] ); + LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] ); + LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] ); + LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] ); + LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 ); /* Q13 -> Q14 */ + pred_lag_ptr++; + } else { + LTP_pred_Q14 = 0; + } + + /* Long-term shaping */ + if( lag > 0 ) { + /* Symmetric, packed FIR coefficients */ + n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ], HarmShapeFIRPacked_Q14 ); + n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 ); /* Q12 -> Q14 */ + shp_lag_ptr++; + } else { + n_LTP_Q14 = 0; + } + + for( k = 0; k < nStatesDelayedDecision; k++ ) { + /* Delayed decision state */ + psDD = &psDelDec[ k ]; + + /* Sample state */ + psSS = psSampleState[ k ]; + + /* Generate dither */ + psDD->Seed = silk_RAND( psDD->Seed ); + + /* Pointer used in short term prediction and shaping */ + psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; + /* Short-term prediction */ + silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ 0 ], a_Q12[ 0 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] ); + if( predictLPCOrder == 16 ) { + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] ); + LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] ); + } + LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ + + /* Noise shape feedback */ + silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ + /* Output of lowpass section */ + tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 ); + psDD->sAR2_Q14[ 0 ] = tmp2; + n_AR_Q14 = silk_RSHIFT( shapingLPCOrder, 1 ); + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ 0 ] ); + /* Loop over allpass sections */ + for( j = 2; j < shapingLPCOrder; j += 2 ) { + /* Output of allpass section */ + tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 ); + psDD->sAR2_Q14[ j - 1 ] = tmp1; + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ j - 1 ] ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 ); + psDD->sAR2_Q14[ j + 0 ] = tmp2; + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp2, AR_shp_Q13[ j ] ); + } + psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1; + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] ); + + n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 ); /* Q11 -> Q12 */ + n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 ); /* Q12 */ + n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 ); /* Q12 -> Q14 */ + + n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 ); /* Q12 */ + n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 ); /* Q12 */ + n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 ); /* Q12 -> Q14 */ + + /* Input minus prediction plus noise feedback */ + /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */ + tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 ); /* Q14 */ + tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 ); /* Q13 */ + tmp1 = silk_SUB32( tmp2, tmp1 ); /* Q13 */ + tmp1 = silk_RSHIFT_ROUND( tmp1, 4 ); /* Q10 */ + + r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 ); /* residual error Q10 */ + + /* Flip sign depending on dither */ + if ( psDD->Seed < 0 ) { + r_Q10 = -r_Q10; + } + r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 ); + + /* Find two quantization level candidates and measure their rate-distortion */ + q1_Q10 = silk_SUB32( r_Q10, offset_Q10 ); + q1_Q0 = silk_RSHIFT( q1_Q10, 10 ); + if( q1_Q0 > 0 ) { + q1_Q10 = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); + q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); + q2_Q10 = silk_ADD32( q1_Q10, 1024 ); + rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else if( q1_Q0 == 0 ) { + q1_Q10 = offset_Q10; + q2_Q10 = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); + rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else if( q1_Q0 == -1 ) { + q2_Q10 = offset_Q10; + q1_Q10 = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 ); + rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 ); + } else { /* q1_Q0 < -1 */ + q1_Q10 = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 ); + q1_Q10 = silk_ADD32( q1_Q10, offset_Q10 ); + q2_Q10 = silk_ADD32( q1_Q10, 1024 ); + rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 ); + rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 ); + } + rr_Q10 = silk_SUB32( r_Q10, q1_Q10 ); + rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 ); + rr_Q10 = silk_SUB32( r_Q10, q2_Q10 ); + rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 ); + + if( rd1_Q10 < rd2_Q10 ) { + psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); + psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); + psSS[ 0 ].Q_Q10 = q1_Q10; + psSS[ 1 ].Q_Q10 = q2_Q10; + } else { + psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 ); + psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 ); + psSS[ 0 ].Q_Q10 = q2_Q10; + psSS[ 1 ].Q_Q10 = q1_Q10; + } + + /* Update states for best quantization */ + + /* Quantized excitation */ + exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 ); + if ( psDD->Seed < 0 ) { + exc_Q14 = -exc_Q14; + } + + /* Add predictions */ + LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); + xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); + + /* Update states */ + sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); + psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 0 ].LF_AR_Q14 = sLF_AR_shp_Q14; + psSS[ 0 ].LPC_exc_Q14 = LPC_exc_Q14; + psSS[ 0 ].xq_Q14 = xq_Q14; + + /* Update states for second best quantization */ + + /* Quantized excitation */ + exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 ); + if ( psDD->Seed < 0 ) { + exc_Q14 = -exc_Q14; + } + + + /* Add predictions */ + LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 ); + xq_Q14 = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 ); + + /* Update states */ + sLF_AR_shp_Q14 = silk_SUB32( xq_Q14, n_AR_Q14 ); + psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 ); + psSS[ 1 ].LF_AR_Q14 = sLF_AR_shp_Q14; + psSS[ 1 ].LPC_exc_Q14 = LPC_exc_Q14; + psSS[ 1 ].xq_Q14 = xq_Q14; + } + + *smpl_buf_idx = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK; /* Index to newest samples */ + last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK; /* Index to decisionDelay old samples */ + + /* Find winner */ + RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; + Winner_ind = 0; + for( k = 1; k < nStatesDelayedDecision; k++ ) { + if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) { + RDmin_Q10 = psSampleState[ k ][ 0 ].RD_Q10; + Winner_ind = k; + } + } + + /* Increase RD values of expired states */ + Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ]; + for( k = 0; k < nStatesDelayedDecision; k++ ) { + if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) { + psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 ); + psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 ); + silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 ); + } + } + + /* Find worst in first set and best in second set */ + RDmax_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10; + RDmin_Q10 = psSampleState[ 0 ][ 1 ].RD_Q10; + RDmax_ind = 0; + RDmin_ind = 0; + for( k = 1; k < nStatesDelayedDecision; k++ ) { + /* find worst in first set */ + if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) { + RDmax_Q10 = psSampleState[ k ][ 0 ].RD_Q10; + RDmax_ind = k; + } + /* find best in second set */ + if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) { + RDmin_Q10 = psSampleState[ k ][ 1 ].RD_Q10; + RDmin_ind = k; + } + } + + /* Replace a state if best from second set outperforms worst in first set */ + if( RDmin_Q10 < RDmax_Q10 ) { + silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i, + ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) ); + silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) ); + } + + /* Write samples from winner to output and long-term filter states */ + psDD = &psDelDec[ Winner_ind ]; + if( subfr > 0 || i >= decisionDelay ) { + pulses[ i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 ); + xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( + silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) ); + NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ]; + sLTP_Q15[ NSQ->sLTP_buf_idx - decisionDelay ] = psDD->Pred_Q15[ last_smple_idx ]; + } + NSQ->sLTP_shp_buf_idx++; + NSQ->sLTP_buf_idx++; + + /* Update states */ + for( k = 0; k < nStatesDelayedDecision; k++ ) { + psDD = &psDelDec[ k ]; + psSS = &psSampleState[ k ][ 0 ]; + psDD->LF_AR_Q14 = psSS->LF_AR_Q14; + psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14; + psDD->Xq_Q14[ *smpl_buf_idx ] = psSS->xq_Q14; + psDD->Q_Q10[ *smpl_buf_idx ] = psSS->Q_Q10; + psDD->Pred_Q15[ *smpl_buf_idx ] = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 ); + psDD->Shape_Q14[ *smpl_buf_idx ] = psSS->sLTP_shp_Q14; + psDD->Seed = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) ); + psDD->RandState[ *smpl_buf_idx ] = psDD->Seed; + psDD->RD_Q10 = psSS->RD_Q10; + } + delayedGain_Q10[ *smpl_buf_idx ] = Gain_Q10; + } + /* Update LPC states */ + for( k = 0; k < nStatesDelayedDecision; k++ ) { + psDD = &psDelDec[ k ]; + silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) ); + } + RESTORE_STACK; +} + +static OPUS_INLINE void silk_nsq_del_dec_scale_states( + const silk_encoder_state *psEncC, /* I Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */ + const opus_int32 x_Q3[], /* I Input in Q3 */ + opus_int32 x_sc_Q10[], /* O Input scaled with 1/Gain in Q10 */ + const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */ + opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */ + opus_int subfr, /* I Subframe number */ + opus_int nStatesDelayedDecision, /* I Number of del dec states */ + const opus_int LTP_scale_Q14, /* I LTP state scaling */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag */ + const opus_int signal_type, /* I Signal type */ + const opus_int decisionDelay /* I Decision delay */ +) +{ + opus_int i, k, lag; + opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q23; + NSQ_del_dec_struct *psDD; + + lag = pitchL[ subfr ]; + inv_gain_Q31 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 47 ); + silk_assert( inv_gain_Q31 != 0 ); + + /* Calculate gain adjustment factor */ + if( Gains_Q16[ subfr ] != NSQ->prev_gain_Q16 ) { + gain_adj_Q16 = silk_DIV32_varQ( NSQ->prev_gain_Q16, Gains_Q16[ subfr ], 16 ); + } else { + gain_adj_Q16 = (opus_int32)1 << 16; + } + + /* Scale input */ + inv_gain_Q23 = silk_RSHIFT_ROUND( inv_gain_Q31, 8 ); + for( i = 0; i < psEncC->subfr_length; i++ ) { + x_sc_Q10[ i ] = silk_SMULWW( x_Q3[ i ], inv_gain_Q23 ); + } + + /* Save inverse gain */ + NSQ->prev_gain_Q16 = Gains_Q16[ subfr ]; + + /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */ + if( NSQ->rewhite_flag ) { + if( subfr == 0 ) { + /* Do LTP downscaling */ + inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, LTP_scale_Q14 ), 2 ); + } + for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) { + silk_assert( i < MAX_FRAME_LENGTH ); + sLTP_Q15[ i ] = silk_SMULWB( inv_gain_Q31, sLTP[ i ] ); + } + } + + /* Adjust for changing gain */ + if( gain_adj_Q16 != (opus_int32)1 << 16 ) { + /* Scale long-term shaping state */ + for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) { + NSQ->sLTP_shp_Q14[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q14[ i ] ); + } + + /* Scale long-term prediction state */ + if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) { + for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) { + sLTP_Q15[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ i ] ); + } + } + + for( k = 0; k < nStatesDelayedDecision; k++ ) { + psDD = &psDelDec[ k ]; + + /* Scale scalar states */ + psDD->LF_AR_Q14 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q14 ); + + /* Scale short-term prediction and shaping states */ + for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) { + psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] ); + } + for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) { + psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] ); + } + for( i = 0; i < DECISION_DELAY; i++ ) { + psDD->Pred_Q15[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q15[ i ] ); + psDD->Shape_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q14[ i ] ); + } + } + } +} diff --git a/drivers/opus/silk/PLC.c b/drivers/opus/silk/PLC.c new file mode 100644 index 0000000000..9fc11adda9 --- /dev/null +++ b/drivers/opus/silk/PLC.c @@ -0,0 +1,423 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "stack_alloc.h" +#include "PLC.h" + +#define NB_ATT 2 +static const opus_int16 HARM_ATT_Q15[NB_ATT] = { 32440, 31130 }; /* 0.99, 0.95 */ +static const opus_int16 PLC_RAND_ATTENUATE_V_Q15[NB_ATT] = { 31130, 26214 }; /* 0.95, 0.8 */ +static const opus_int16 PLC_RAND_ATTENUATE_UV_Q15[NB_ATT] = { 32440, 29491 }; /* 0.99, 0.9 */ + +static OPUS_INLINE void silk_PLC_update( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl /* I/O Decoder control */ +); + +static OPUS_INLINE void silk_PLC_conceal( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I/O Decoder control */ + opus_int16 frame[] /* O LPC residual signal */ +); + + +void silk_PLC_Reset( + silk_decoder_state *psDec /* I/O Decoder state */ +) +{ + psDec->sPLC.pitchL_Q8 = silk_LSHIFT( psDec->frame_length, 8 - 1 ); + psDec->sPLC.prevGain_Q16[ 0 ] = SILK_FIX_CONST( 1, 16 ); + psDec->sPLC.prevGain_Q16[ 1 ] = SILK_FIX_CONST( 1, 16 ); + psDec->sPLC.subfr_length = 20; + psDec->sPLC.nb_subfr = 2; +} + +void silk_PLC( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I/O Decoder control */ + opus_int16 frame[], /* I/O signal */ + opus_int lost /* I Loss flag */ +) +{ + /* PLC control function */ + if( psDec->fs_kHz != psDec->sPLC.fs_kHz ) { + silk_PLC_Reset( psDec ); + psDec->sPLC.fs_kHz = psDec->fs_kHz; + } + + if( lost ) { + /****************************/ + /* Generate Signal */ + /****************************/ + silk_PLC_conceal( psDec, psDecCtrl, frame ); + + psDec->lossCnt++; + } else { + /****************************/ + /* Update state */ + /****************************/ + silk_PLC_update( psDec, psDecCtrl ); + } +} + +/**************************************************/ +/* Update state of PLC */ +/**************************************************/ +static OPUS_INLINE void silk_PLC_update( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl /* I/O Decoder control */ +) +{ + opus_int32 LTP_Gain_Q14, temp_LTP_Gain_Q14; + opus_int i, j; + silk_PLC_struct *psPLC; + + psPLC = &psDec->sPLC; + + /* Update parameters used in case of packet loss */ + psDec->prevSignalType = psDec->indices.signalType; + LTP_Gain_Q14 = 0; + if( psDec->indices.signalType == TYPE_VOICED ) { + /* Find the parameters for the last subframe which contains a pitch pulse */ + for( j = 0; j * psDec->subfr_length < psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; j++ ) { + if( j == psDec->nb_subfr ) { + break; + } + temp_LTP_Gain_Q14 = 0; + for( i = 0; i < LTP_ORDER; i++ ) { + temp_LTP_Gain_Q14 += psDecCtrl->LTPCoef_Q14[ ( psDec->nb_subfr - 1 - j ) * LTP_ORDER + i ]; + } + if( temp_LTP_Gain_Q14 > LTP_Gain_Q14 ) { + LTP_Gain_Q14 = temp_LTP_Gain_Q14; + silk_memcpy( psPLC->LTPCoef_Q14, + &psDecCtrl->LTPCoef_Q14[ silk_SMULBB( psDec->nb_subfr - 1 - j, LTP_ORDER ) ], + LTP_ORDER * sizeof( opus_int16 ) ); + + psPLC->pitchL_Q8 = silk_LSHIFT( psDecCtrl->pitchL[ psDec->nb_subfr - 1 - j ], 8 ); + } + } + + silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) ); + psPLC->LTPCoef_Q14[ LTP_ORDER / 2 ] = LTP_Gain_Q14; + + /* Limit LT coefs */ + if( LTP_Gain_Q14 < V_PITCH_GAIN_START_MIN_Q14 ) { + opus_int scale_Q10; + opus_int32 tmp; + + tmp = silk_LSHIFT( V_PITCH_GAIN_START_MIN_Q14, 10 ); + scale_Q10 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) ); + for( i = 0; i < LTP_ORDER; i++ ) { + psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q10 ), 10 ); + } + } else if( LTP_Gain_Q14 > V_PITCH_GAIN_START_MAX_Q14 ) { + opus_int scale_Q14; + opus_int32 tmp; + + tmp = silk_LSHIFT( V_PITCH_GAIN_START_MAX_Q14, 14 ); + scale_Q14 = silk_DIV32( tmp, silk_max( LTP_Gain_Q14, 1 ) ); + for( i = 0; i < LTP_ORDER; i++ ) { + psPLC->LTPCoef_Q14[ i ] = silk_RSHIFT( silk_SMULBB( psPLC->LTPCoef_Q14[ i ], scale_Q14 ), 14 ); + } + } + } else { + psPLC->pitchL_Q8 = silk_LSHIFT( silk_SMULBB( psDec->fs_kHz, 18 ), 8 ); + silk_memset( psPLC->LTPCoef_Q14, 0, LTP_ORDER * sizeof( opus_int16 )); + } + + /* Save LPC coeficients */ + silk_memcpy( psPLC->prevLPC_Q12, psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) ); + psPLC->prevLTP_scale_Q14 = psDecCtrl->LTP_scale_Q14; + + /* Save last two gains */ + silk_memcpy( psPLC->prevGain_Q16, &psDecCtrl->Gains_Q16[ psDec->nb_subfr - 2 ], 2 * sizeof( opus_int32 ) ); + + psPLC->subfr_length = psDec->subfr_length; + psPLC->nb_subfr = psDec->nb_subfr; +} + +static OPUS_INLINE void silk_PLC_conceal( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I/O Decoder control */ + opus_int16 frame[] /* O LPC residual signal */ +) +{ + opus_int i, j, k; + opus_int lag, idx, sLTP_buf_idx, shift1, shift2; + opus_int32 rand_seed, harm_Gain_Q15, rand_Gain_Q15, inv_gain_Q30; + opus_int32 energy1, energy2, *rand_ptr, *pred_lag_ptr; + opus_int32 LPC_pred_Q10, LTP_pred_Q12; + opus_int16 rand_scale_Q14; + opus_int16 *B_Q14, *exc_buf_ptr; + opus_int32 *sLPC_Q14_ptr; + VARDECL( opus_int16, exc_buf ); + opus_int16 A_Q12[ MAX_LPC_ORDER ]; + VARDECL( opus_int16, sLTP ); + VARDECL( opus_int32, sLTP_Q14 ); + silk_PLC_struct *psPLC = &psDec->sPLC; + opus_int32 prevGain_Q10[2]; + SAVE_STACK; + + ALLOC( exc_buf, 2*psPLC->subfr_length, opus_int16 ); + ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); + ALLOC( sLTP_Q14, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); + + prevGain_Q10[0] = silk_RSHIFT( psPLC->prevGain_Q16[ 0 ], 6); + prevGain_Q10[1] = silk_RSHIFT( psPLC->prevGain_Q16[ 1 ], 6); + + if( psDec->first_frame_after_reset ) { + silk_memset( psPLC->prevLPC_Q12, 0, sizeof( psPLC->prevLPC_Q12 ) ); + } + + /* Find random noise component */ + /* Scale previous excitation signal */ + exc_buf_ptr = exc_buf; + for( k = 0; k < 2; k++ ) { + for( i = 0; i < psPLC->subfr_length; i++ ) { + exc_buf_ptr[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( + silk_SMULWW( psDec->exc_Q14[ i + ( k + psPLC->nb_subfr - 2 ) * psPLC->subfr_length ], prevGain_Q10[ k ] ), 8 ) ); + } + exc_buf_ptr += psPLC->subfr_length; + } + /* Find the subframe with lowest energy of the last two and use that as random noise generator */ + silk_sum_sqr_shift( &energy1, &shift1, exc_buf, psPLC->subfr_length ); + silk_sum_sqr_shift( &energy2, &shift2, &exc_buf[ psPLC->subfr_length ], psPLC->subfr_length ); + + if( silk_RSHIFT( energy1, shift2 ) < silk_RSHIFT( energy2, shift1 ) ) { + /* First sub-frame has lowest energy */ + rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, ( psPLC->nb_subfr - 1 ) * psPLC->subfr_length - RAND_BUF_SIZE ) ]; + } else { + /* Second sub-frame has lowest energy */ + rand_ptr = &psDec->exc_Q14[ silk_max_int( 0, psPLC->nb_subfr * psPLC->subfr_length - RAND_BUF_SIZE ) ]; + } + + /* Set up Gain to random noise component */ + B_Q14 = psPLC->LTPCoef_Q14; + rand_scale_Q14 = psPLC->randScale_Q14; + + /* Set up attenuation gains */ + harm_Gain_Q15 = HARM_ATT_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; + if( psDec->prevSignalType == TYPE_VOICED ) { + rand_Gain_Q15 = PLC_RAND_ATTENUATE_V_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; + } else { + rand_Gain_Q15 = PLC_RAND_ATTENUATE_UV_Q15[ silk_min_int( NB_ATT - 1, psDec->lossCnt ) ]; + } + + /* LPC concealment. Apply BWE to previous LPC */ + silk_bwexpander( psPLC->prevLPC_Q12, psDec->LPC_order, SILK_FIX_CONST( BWE_COEF, 16 ) ); + + /* Preload LPC coeficients to array on stack. Gives small performance gain */ + silk_memcpy( A_Q12, psPLC->prevLPC_Q12, psDec->LPC_order * sizeof( opus_int16 ) ); + + /* First Lost frame */ + if( psDec->lossCnt == 0 ) { + rand_scale_Q14 = 1 << 14; + + /* Reduce random noise Gain for voiced frames */ + if( psDec->prevSignalType == TYPE_VOICED ) { + for( i = 0; i < LTP_ORDER; i++ ) { + rand_scale_Q14 -= B_Q14[ i ]; + } + rand_scale_Q14 = silk_max_16( 3277, rand_scale_Q14 ); /* 0.2 */ + rand_scale_Q14 = (opus_int16)silk_RSHIFT( silk_SMULBB( rand_scale_Q14, psPLC->prevLTP_scale_Q14 ), 14 ); + } else { + /* Reduce random noise for unvoiced frames with high LPC gain */ + opus_int32 invGain_Q30, down_scale_Q30; + + invGain_Q30 = silk_LPC_inverse_pred_gain( psPLC->prevLPC_Q12, psDec->LPC_order ); + + down_scale_Q30 = silk_min_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_HIGH_THRES ), invGain_Q30 ); + down_scale_Q30 = silk_max_32( silk_RSHIFT( (opus_int32)1 << 30, LOG2_INV_LPC_GAIN_LOW_THRES ), down_scale_Q30 ); + down_scale_Q30 = silk_LSHIFT( down_scale_Q30, LOG2_INV_LPC_GAIN_HIGH_THRES ); + + rand_Gain_Q15 = silk_RSHIFT( silk_SMULWB( down_scale_Q30, rand_Gain_Q15 ), 14 ); + } + } + + rand_seed = psPLC->rand_seed; + lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 ); + sLTP_buf_idx = psDec->ltp_mem_length; + + /* Rewhiten LTP state */ + idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2; + silk_assert( idx > 0 ); + silk_LPC_analysis_filter( &sLTP[ idx ], &psDec->outBuf[ idx ], A_Q12, psDec->ltp_mem_length - idx, psDec->LPC_order ); + /* Scale LTP state */ + inv_gain_Q30 = silk_INVERSE32_varQ( psPLC->prevGain_Q16[ 1 ], 46 ); + inv_gain_Q30 = silk_min( inv_gain_Q30, silk_int32_MAX >> 1 ); + for( i = idx + psDec->LPC_order; i < psDec->ltp_mem_length; i++ ) { + sLTP_Q14[ i ] = silk_SMULWB( inv_gain_Q30, sLTP[ i ] ); + } + + /***************************/ + /* LTP synthesis filtering */ + /***************************/ + for( k = 0; k < psDec->nb_subfr; k++ ) { + /* Set up pointer */ + pred_lag_ptr = &sLTP_Q14[ sLTP_buf_idx - lag + LTP_ORDER / 2 ]; + for( i = 0; i < psDec->subfr_length; i++ ) { + /* Unrolled loop */ + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LTP_pred_Q12 = 2; + LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ 0 ], B_Q14[ 0 ] ); + LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -1 ], B_Q14[ 1 ] ); + LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -2 ], B_Q14[ 2 ] ); + LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -3 ], B_Q14[ 3 ] ); + LTP_pred_Q12 = silk_SMLAWB( LTP_pred_Q12, pred_lag_ptr[ -4 ], B_Q14[ 4 ] ); + pred_lag_ptr++; + + /* Generate LPC excitation */ + rand_seed = silk_RAND( rand_seed ); + idx = silk_RSHIFT( rand_seed, 25 ) & RAND_BUF_MASK; + sLTP_Q14[ sLTP_buf_idx ] = silk_LSHIFT32( silk_SMLAWB( LTP_pred_Q12, rand_ptr[ idx ], rand_scale_Q14 ), 2 ); + sLTP_buf_idx++; + } + + /* Gradually reduce LTP gain */ + for( j = 0; j < LTP_ORDER; j++ ) { + B_Q14[ j ] = silk_RSHIFT( silk_SMULBB( harm_Gain_Q15, B_Q14[ j ] ), 15 ); + } + /* Gradually reduce excitation gain */ + rand_scale_Q14 = silk_RSHIFT( silk_SMULBB( rand_scale_Q14, rand_Gain_Q15 ), 15 ); + + /* Slowly increase pitch lag */ + psPLC->pitchL_Q8 = silk_SMLAWB( psPLC->pitchL_Q8, psPLC->pitchL_Q8, PITCH_DRIFT_FAC_Q16 ); + psPLC->pitchL_Q8 = silk_min_32( psPLC->pitchL_Q8, silk_LSHIFT( silk_SMULBB( MAX_PITCH_LAG_MS, psDec->fs_kHz ), 8 ) ); + lag = silk_RSHIFT_ROUND( psPLC->pitchL_Q8, 8 ); + } + + /***************************/ + /* LPC synthesis filtering */ + /***************************/ + sLPC_Q14_ptr = &sLTP_Q14[ psDec->ltp_mem_length - MAX_LPC_ORDER ]; + + /* Copy LPC state */ + silk_memcpy( sLPC_Q14_ptr, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) ); + + silk_assert( psDec->LPC_order >= 10 ); /* check that unrolling works */ + for( i = 0; i < psDec->frame_length; i++ ) { + /* partly unrolled */ + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] ); + for( j = 10; j < psDec->LPC_order; j++ ) { + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14_ptr[ MAX_LPC_ORDER + i - j - 1 ], A_Q12[ j ] ); + } + + /* Add prediction to LPC excitation */ + sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 ); + + /* Scale with Gain */ + frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) ); + } + + /* Save LPC state */ + silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); + + /**************************************/ + /* Update states */ + /**************************************/ + psPLC->rand_seed = rand_seed; + psPLC->randScale_Q14 = rand_scale_Q14; + for( i = 0; i < MAX_NB_SUBFR; i++ ) { + psDecCtrl->pitchL[ i ] = lag; + } + RESTORE_STACK; +} + +/* Glues concealed frames with new good received frames */ +void silk_PLC_glue_frames( + silk_decoder_state *psDec, /* I/O decoder state */ + opus_int16 frame[], /* I/O signal */ + opus_int length /* I length of signal */ +) +{ + opus_int i, energy_shift; + opus_int32 energy; + silk_PLC_struct *psPLC; + psPLC = &psDec->sPLC; + + if( psDec->lossCnt ) { + /* Calculate energy in concealed residual */ + silk_sum_sqr_shift( &psPLC->conc_energy, &psPLC->conc_energy_shift, frame, length ); + + psPLC->last_frame_lost = 1; + } else { + if( psDec->sPLC.last_frame_lost ) { + /* Calculate residual in decoded signal if last frame was lost */ + silk_sum_sqr_shift( &energy, &energy_shift, frame, length ); + + /* Normalize energies */ + if( energy_shift > psPLC->conc_energy_shift ) { + psPLC->conc_energy = silk_RSHIFT( psPLC->conc_energy, energy_shift - psPLC->conc_energy_shift ); + } else if( energy_shift < psPLC->conc_energy_shift ) { + energy = silk_RSHIFT( energy, psPLC->conc_energy_shift - energy_shift ); + } + + /* Fade in the energy difference */ + if( energy > psPLC->conc_energy ) { + opus_int32 frac_Q24, LZ; + opus_int32 gain_Q16, slope_Q16; + + LZ = silk_CLZ32( psPLC->conc_energy ); + LZ = LZ - 1; + psPLC->conc_energy = silk_LSHIFT( psPLC->conc_energy, LZ ); + energy = silk_RSHIFT( energy, silk_max_32( 24 - LZ, 0 ) ); + + frac_Q24 = silk_DIV32( psPLC->conc_energy, silk_max( energy, 1 ) ); + + gain_Q16 = silk_LSHIFT( silk_SQRT_APPROX( frac_Q24 ), 4 ); + slope_Q16 = silk_DIV32_16( ( (opus_int32)1 << 16 ) - gain_Q16, length ); + /* Make slope 4x steeper to avoid missing onsets after DTX */ + slope_Q16 = silk_LSHIFT( slope_Q16, 2 ); + + for( i = 0; i < length; i++ ) { + frame[ i ] = silk_SMULWB( gain_Q16, frame[ i ] ); + gain_Q16 += slope_Q16; + if( gain_Q16 > (opus_int32)1 << 16 ) { + break; + } + } + } + } + psPLC->last_frame_lost = 0; + } +} diff --git a/drivers/opus/silk/PLC.h b/drivers/opus/silk/PLC.h new file mode 100644 index 0000000000..f531cda950 --- /dev/null +++ b/drivers/opus/silk/PLC.h @@ -0,0 +1,61 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_PLC_H +#define SILK_PLC_H + +#include "silk_main.h" + +#define BWE_COEF 0.99 +#define V_PITCH_GAIN_START_MIN_Q14 11469 /* 0.7 in Q14 */ +#define V_PITCH_GAIN_START_MAX_Q14 15565 /* 0.95 in Q14 */ +#define MAX_PITCH_LAG_MS 18 +#define RAND_BUF_SIZE 128 +#define RAND_BUF_MASK ( RAND_BUF_SIZE - 1 ) +#define LOG2_INV_LPC_GAIN_HIGH_THRES 3 /* 2^3 = 8 dB LPC gain */ +#define LOG2_INV_LPC_GAIN_LOW_THRES 8 /* 2^8 = 24 dB LPC gain */ +#define PITCH_DRIFT_FAC_Q16 655 /* 0.01 in Q16 */ + +void silk_PLC_Reset( + silk_decoder_state *psDec /* I/O Decoder state */ +); + +void silk_PLC( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I/O Decoder control */ + opus_int16 frame[], /* I/O signal */ + opus_int lost /* I Loss flag */ +); + +void silk_PLC_glue_frames( + silk_decoder_state *psDec, /* I/O decoder state */ + opus_int16 frame[], /* I/O signal */ + opus_int length /* I length of signal */ +); + +#endif + diff --git a/drivers/opus/silk/SigProc_FIX.h b/drivers/opus/silk/SigProc_FIX.h new file mode 100644 index 0000000000..1b58057910 --- /dev/null +++ b/drivers/opus/silk/SigProc_FIX.h @@ -0,0 +1,594 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_SIGPROC_FIX_H +#define SILK_SIGPROC_FIX_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +/*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */ + +#define SILK_MAX_ORDER_LPC 16 /* max order of the LPC analysis in schur() and k2a() */ + +#include <string.h> /* for memset(), memcpy(), memmove() */ +#include "typedef.h" +#include "resampler_structs.h" +#include "macros.h" + + +/********************************************************************/ +/* SIGNAL PROCESSING FUNCTIONS */ +/********************************************************************/ + +/*! + * Initialize/reset the resampler state for a given pair of input/output sampling rates +*/ +opus_int silk_resampler_init( + silk_resampler_state_struct *S, /* I/O Resampler state */ + opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */ + opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */ + opus_int forEnc /* I If 1: encoder; if 0: decoder */ +); + +/*! + * Resampler: convert from one sampling rate to another + */ +opus_int silk_resampler( + silk_resampler_state_struct *S, /* I/O Resampler state */ + opus_int16 out[], /* O Output signal */ + const opus_int16 in[], /* I Input signal */ + opus_int32 inLen /* I Number of input samples */ +); + +/*! +* Downsample 2x, mediocre quality +*/ +void silk_resampler_down2( + opus_int32 *S, /* I/O State vector [ 2 ] */ + opus_int16 *out, /* O Output signal [ len ] */ + const opus_int16 *in, /* I Input signal [ floor(len/2) ] */ + opus_int32 inLen /* I Number of input samples */ +); + +/*! + * Downsample by a factor 2/3, low quality +*/ +void silk_resampler_down2_3( + opus_int32 *S, /* I/O State vector [ 6 ] */ + opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */ + const opus_int16 *in, /* I Input signal [ inLen ] */ + opus_int32 inLen /* I Number of input samples */ +); + +/*! + * second order ARMA filter; + * slower than biquad() but uses more precise coefficients + * can handle (slowly) varying coefficients + */ +void silk_biquad_alt( + const opus_int16 *in, /* I input signal */ + const opus_int32 *B_Q28, /* I MA coefficients [3] */ + const opus_int32 *A_Q28, /* I AR coefficients [2] */ + opus_int32 *S, /* I/O State vector [2] */ + opus_int16 *out, /* O output signal */ + const opus_int32 len, /* I signal length (must be even) */ + opus_int stride /* I Operate on interleaved signal if > 1 */ +); + +/* Variable order MA prediction error filter. */ +void silk_LPC_analysis_filter( + opus_int16 *out, /* O Output signal */ + const opus_int16 *in, /* I Input signal */ + const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */ + const opus_int32 len, /* I Signal length */ + const opus_int32 d /* I Filter order */ +); + +/* Chirp (bandwidth expand) LP AR filter */ +void silk_bwexpander( + opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */ + const opus_int d, /* I Length of ar */ + opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */ +); + +/* Chirp (bandwidth expand) LP AR filter */ +void silk_bwexpander_32( + opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */ + const opus_int d, /* I Length of ar */ + opus_int32 chirp_Q16 /* I Chirp factor in Q16 */ +); + +/* Compute inverse of LPC prediction gain, and */ +/* test if LPC coefficients are stable (all poles within unit circle) */ +opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */ + const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ + const opus_int order /* I Prediction order */ +); + +/* For input in Q24 domain */ +opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */ + const opus_int32 *A_Q24, /* I Prediction coefficients [order] */ + const opus_int order /* I Prediction order */ +); + +/* Split signal in two decimated bands using first-order allpass filters */ +void silk_ana_filt_bank_1( + const opus_int16 *in, /* I Input signal [N] */ + opus_int32 *S, /* I/O State vector [2] */ + opus_int16 *outL, /* O Low band [N/2] */ + opus_int16 *outH, /* O High band [N/2] */ + const opus_int32 N /* I Number of input samples */ +); + +/********************************************************************/ +/* SCALAR FUNCTIONS */ +/********************************************************************/ + +/* Approximation of 128 * log2() (exact inverse of approx 2^() below) */ +/* Convert input to a log scale */ +opus_int32 silk_lin2log( + const opus_int32 inLin /* I input in linear scale */ +); + +/* Approximation of a sigmoid function */ +opus_int silk_sigm_Q15( + opus_int in_Q5 /* I */ +); + +/* Approximation of 2^() (exact inverse of approx log2() above) */ +/* Convert input to a linear scale */ +opus_int32 silk_log2lin( + const opus_int32 inLog_Q7 /* I input on log scale */ +); + +/* Compute number of bits to right shift the sum of squares of a vector */ +/* of int16s to make it fit in an int32 */ +void silk_sum_sqr_shift( + opus_int32 *energy, /* O Energy of x, after shifting to the right */ + opus_int *shift, /* O Number of bits right shift applied to energy */ + const opus_int16 *x, /* I Input vector */ + opus_int len /* I Length of input vector */ +); + +/* Calculates the reflection coefficients from the correlation sequence */ +/* Faster than schur64(), but much less accurate. */ +/* uses SMLAWB(), requiring armv5E and higher. */ +opus_int32 silk_schur( /* O Returns residual energy */ + opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */ + const opus_int32 *c, /* I correlations [order+1] */ + const opus_int32 order /* I prediction order */ +); + +/* Calculates the reflection coefficients from the correlation sequence */ +/* Slower than schur(), but more accurate. */ +/* Uses SMULL(), available on armv4 */ +opus_int32 silk_schur64( /* O returns residual energy */ + opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */ + const opus_int32 c[], /* I Correlations [order+1] */ + opus_int32 order /* I Prediction order */ +); + +/* Step up function, converts reflection coefficients to prediction coefficients */ +void silk_k2a( + opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ + const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */ + const opus_int32 order /* I Prediction order */ +); + +/* Step up function, converts reflection coefficients to prediction coefficients */ +void silk_k2a_Q16( + opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ + const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */ + const opus_int32 order /* I Prediction order */ +); + +/* Apply sine window to signal vector. */ +/* Window types: */ +/* 1 -> sine window from 0 to pi/2 */ +/* 2 -> sine window from pi/2 to pi */ +/* every other sample of window is linearly interpolated, for speed */ +void silk_apply_sine_window( + opus_int16 px_win[], /* O Pointer to windowed signal */ + const opus_int16 px[], /* I Pointer to input signal */ + const opus_int win_type, /* I Selects a window type */ + const opus_int length /* I Window length, multiple of 4 */ +); + +/* Compute autocorrelation */ +void silk_autocorr( + opus_int32 *results, /* O Result (length correlationCount) */ + opus_int *scale, /* O Scaling of the correlation vector */ + const opus_int16 *inputData, /* I Input data to correlate */ + const opus_int inputDataSize, /* I Length of input */ + const opus_int correlationCount, /* I Number of correlation taps to compute */ + int arch /* I Run-time architecture */ +); + +void silk_decode_pitch( + opus_int16 lagIndex, /* I */ + opus_int8 contourIndex, /* O */ + opus_int pitch_lags[], /* O 4 pitch values */ + const opus_int Fs_kHz, /* I sampling frequency (kHz) */ + const opus_int nb_subfr /* I number of sub frames */ +); + +opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ + const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ + opus_int *pitch_out, /* O 4 pitch lag values */ + opus_int16 *lagIndex, /* O Lag Index */ + opus_int8 *contourIndex, /* O Pitch contour Index */ + opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ + opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ + const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ + const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ + const opus_int Fs_kHz, /* I Sample frequency (kHz) */ + const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ + const opus_int nb_subfr, /* I number of 5 ms subframes */ + int arch /* I Run-time architecture */ +); + +/* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */ +/* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */ +void silk_A2NLSF( + opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */ + opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */ + const opus_int d /* I Filter order (must be even) */ +); + +/* compute whitening filter coefficients from normalized line spectral frequencies */ +void silk_NLSF2A( + opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */ + const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */ + const opus_int d /* I filter order (should be even) */ +); + +void silk_insertion_sort_increasing( + opus_int32 *a, /* I/O Unsorted / Sorted vector */ + opus_int *idx, /* O Index vector for the sorted elements */ + const opus_int L, /* I Vector length */ + const opus_int K /* I Number of correctly sorted positions */ +); + +void silk_insertion_sort_decreasing_int16( + opus_int16 *a, /* I/O Unsorted / Sorted vector */ + opus_int *idx, /* O Index vector for the sorted elements */ + const opus_int L, /* I Vector length */ + const opus_int K /* I Number of correctly sorted positions */ +); + +void silk_insertion_sort_increasing_all_values_int16( + opus_int16 *a, /* I/O Unsorted / Sorted vector */ + const opus_int L /* I Vector length */ +); + +/* NLSF stabilizer, for a single input data vector */ +void silk_NLSF_stabilize( + opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */ + const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */ + const opus_int L /* I Number of NLSF parameters in the input vector */ +); + +/* Laroia low complexity NLSF weights */ +void silk_NLSF_VQ_weights_laroia( + opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */ + const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */ + const opus_int D /* I Input vector dimension (even) */ +); + +/* Compute reflection coefficients from input signal */ +void silk_burg_modified( + opus_int32 *res_nrg, /* O Residual energy */ + opus_int *res_nrg_Q, /* O Residual energy Q value */ + opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ + const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ + const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ + const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ + const opus_int nb_subfr, /* I Number of subframes stacked in x */ + const opus_int D, /* I Order */ + int arch /* I Run-time architecture */ +); + +/* Copy and multiply a vector by a constant */ +void silk_scale_copy_vector16( + opus_int16 *data_out, + const opus_int16 *data_in, + opus_int32 gain_Q16, /* I Gain in Q16 */ + const opus_int dataSize /* I Length */ +); + +/* Some for the LTP related function requires Q26 to work.*/ +void silk_scale_vector32_Q26_lshift_18( + opus_int32 *data1, /* I/O Q0/Q18 */ + opus_int32 gain_Q26, /* I Q26 */ + opus_int dataSize /* I length */ +); + +/********************************************************************/ +/* INLINE ARM MATH */ +/********************************************************************/ + +/* return sum( inVec1[i] * inVec2[i] ) */ +opus_int32 silk_inner_prod_aligned( + const opus_int16 *const inVec1, /* I input vector 1 */ + const opus_int16 *const inVec2, /* I input vector 2 */ + const opus_int len /* I vector lengths */ +); + +opus_int32 silk_inner_prod_aligned_scale( + const opus_int16 *const inVec1, /* I input vector 1 */ + const opus_int16 *const inVec2, /* I input vector 2 */ + const opus_int scale, /* I number of bits to shift */ + const opus_int len /* I vector lengths */ +); + +opus_int64 silk_inner_prod16_aligned_64( + const opus_int16 *inVec1, /* I input vector 1 */ + const opus_int16 *inVec2, /* I input vector 2 */ + const opus_int len /* I vector lengths */ +); + +/********************************************************************/ +/* MACROS */ +/********************************************************************/ + +/* Rotate a32 right by 'rot' bits. Negative rot values result in rotating + left. Output is 32bit int. + Note: contemporary compilers recognize the C expression below and + compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */ +static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot ) +{ + opus_uint32 x = (opus_uint32) a32; + opus_uint32 r = (opus_uint32) rot; + opus_uint32 m = (opus_uint32) -rot; + if( rot == 0 ) { + return a32; + } else if( rot < 0 ) { + return (opus_int32) ((x << m) | (x >> (32 - m))); + } else { + return (opus_int32) ((x << (32 - r)) | (x >> r)); + } +} + +/* Allocate opus_int16 aligned to 4-byte memory address */ +#if EMBEDDED_ARM +#define silk_DWORD_ALIGN __attribute__((aligned(4))) +#else +#define silk_DWORD_ALIGN +#endif + +/* Useful Macros that can be adjusted to other platforms */ +#define silk_memcpy(dest, src, size) memcpy((dest), (src), (size)) +#define silk_memset(dest, src, size) memset((dest), (src), (size)) +#define silk_memmove(dest, src, size) memmove((dest), (src), (size)) + +/* Fixed point macros */ + +/* (a32 * b32) output have to be 32bit int */ +#define silk_MUL(a32, b32) ((a32) * (b32)) + +/* (a32 * b32) output have to be 32bit uint */ +#define silk_MUL_uint(a32, b32) silk_MUL(a32, b32) + +/* a32 + (b32 * c32) output have to be 32bit int */ +#define silk_MLA(a32, b32, c32) silk_ADD32((a32),((b32) * (c32))) + +/* a32 + (b32 * c32) output have to be 32bit uint */ +#define silk_MLA_uint(a32, b32, c32) silk_MLA(a32, b32, c32) + +/* ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */ +#define silk_SMULTT(a32, b32) (((a32) >> 16) * ((b32) >> 16)) + +/* a32 + ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */ +#define silk_SMLATT(a32, b32, c32) silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16)) + +#define silk_SMLALBB(a64, b16, c16) silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16))) + +/* (a32 * b32) */ +#define silk_SMULL(a32, b32) ((opus_int64)(a32) * /*(opus_int64)*/(b32)) + +/* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour + (just standard two's complement implementation-specific behaviour) */ +#define silk_ADD32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b))) +/* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour + (just standard two's complement implementation-specific behaviour) */ +#define silk_SUB32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b))) + +/* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */ +#define silk_MLA_ovflw(a32, b32, c32) silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32)) +#define silk_SMLABB_ovflw(a32, b32, c32) (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32)))) + +#define silk_DIV32_16(a32, b16) ((opus_int32)((a32) / (b16))) +#define silk_DIV32(a32, b32) ((opus_int32)((a32) / (b32))) + +/* These macros enables checking for overflow in silk_API_Debug.h*/ +#define silk_ADD16(a, b) ((a) + (b)) +#define silk_ADD32(a, b) ((a) + (b)) +#define silk_ADD64(a, b) ((a) + (b)) + +#define silk_SUB16(a, b) ((a) - (b)) +#define silk_SUB32(a, b) ((a) - (b)) +#define silk_SUB64(a, b) ((a) - (b)) + +#define silk_SAT8(a) ((a) > silk_int8_MAX ? silk_int8_MAX : \ + ((a) < silk_int8_MIN ? silk_int8_MIN : (a))) +#define silk_SAT16(a) ((a) > silk_int16_MAX ? silk_int16_MAX : \ + ((a) < silk_int16_MIN ? silk_int16_MIN : (a))) +#define silk_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : \ + ((a) < silk_int32_MIN ? silk_int32_MIN : (a))) + +#define silk_CHECK_FIT8(a) (a) +#define silk_CHECK_FIT16(a) (a) +#define silk_CHECK_FIT32(a) (a) + +#define silk_ADD_SAT16(a, b) (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) ) +#define silk_ADD_SAT64(a, b) ((((a) + (b)) & 0x8000000000000000LL) == 0 ? \ + ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \ + ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) ) + +#define silk_SUB_SAT16(a, b) (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) ) +#define silk_SUB_SAT64(a, b) ((((a)-(b)) & 0x8000000000000000LL) == 0 ? \ + (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \ + ((((a)^0x8000000000000000LL) & (b) & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) ) + +/* Saturation for positive input values */ +#define silk_POS_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : (a)) + +/* Add with saturation for positive input values */ +#define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b))) +#define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b))) +#define silk_ADD_POS_SAT32(a, b) ((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b))) +#define silk_ADD_POS_SAT64(a, b) ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b))) + +#define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */ +#define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */ +#define silk_LSHIFT32(a, shift) ((opus_int32)((opus_uint32)(a)<<(shift))) /* shift >= 0, shift < 32 */ +#define silk_LSHIFT64(a, shift) ((opus_int64)((opus_uint64)(a)<<(shift))) /* shift >= 0, shift < 64 */ +#define silk_LSHIFT(a, shift) silk_LSHIFT32(a, shift) /* shift >= 0, shift < 32 */ + +#define silk_RSHIFT8(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 8 */ +#define silk_RSHIFT16(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 16 */ +#define silk_RSHIFT32(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 32 */ +#define silk_RSHIFT64(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 64 */ +#define silk_RSHIFT(a, shift) silk_RSHIFT32(a, shift) /* shift >= 0, shift < 32 */ + +/* saturates before shifting */ +#define silk_LSHIFT_SAT32(a, shift) (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \ + silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) )) + +#define silk_LSHIFT_ovflw(a, shift) ((opus_int32)((opus_uint32)(a) << (shift))) /* shift >= 0, allowed to overflow */ +#define silk_LSHIFT_uint(a, shift) ((a) << (shift)) /* shift >= 0 */ +#define silk_RSHIFT_uint(a, shift) ((a) >> (shift)) /* shift >= 0 */ + +#define silk_ADD_LSHIFT(a, b, shift) ((a) + silk_LSHIFT((b), (shift))) /* shift >= 0 */ +#define silk_ADD_LSHIFT32(a, b, shift) silk_ADD32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */ +#define silk_ADD_LSHIFT_uint(a, b, shift) ((a) + silk_LSHIFT_uint((b), (shift))) /* shift >= 0 */ +#define silk_ADD_RSHIFT(a, b, shift) ((a) + silk_RSHIFT((b), (shift))) /* shift >= 0 */ +#define silk_ADD_RSHIFT32(a, b, shift) silk_ADD32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */ +#define silk_ADD_RSHIFT_uint(a, b, shift) ((a) + silk_RSHIFT_uint((b), (shift))) /* shift >= 0 */ +#define silk_SUB_LSHIFT32(a, b, shift) silk_SUB32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */ +#define silk_SUB_RSHIFT32(a, b, shift) silk_SUB32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */ + +/* Requires that shift > 0 */ +#define silk_RSHIFT_ROUND(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1) +#define silk_RSHIFT_ROUND64(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1) + +/* Number of rightshift required to fit the multiplication */ +#define silk_NSHIFT_MUL_32_32(a, b) ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) ) +#define silk_NSHIFT_MUL_16_16(a, b) ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) ) + + +#define silk_min(a, b) (((a) < (b)) ? (a) : (b)) +#define silk_max(a, b) (((a) > (b)) ? (a) : (b)) + +/* Macro to convert floating-point constants to fixed-point */ +#define SILK_FIX_CONST( C, Q ) ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5)) + +/* silk_min() versions with typecast in the function call */ +static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b) +{ + return (((a) < (b)) ? (a) : (b)); +} +static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b) +{ + return (((a) < (b)) ? (a) : (b)); +} +static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b) +{ + return (((a) < (b)) ? (a) : (b)); +} +static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b) +{ + return (((a) < (b)) ? (a) : (b)); +} + +/* silk_min() versions with typecast in the function call */ +static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b) +{ + return (((a) > (b)) ? (a) : (b)); +} +static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b) +{ + return (((a) > (b)) ? (a) : (b)); +} +static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b) +{ + return (((a) > (b)) ? (a) : (b)); +} +static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) +{ + return (((a) > (b)) ? (a) : (b)); +} + +#define silk_LIMIT( a, limit1, limit2) ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ + : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))) + +#define silk_LIMIT_int silk_LIMIT +#define silk_LIMIT_16 silk_LIMIT +#define silk_LIMIT_32 silk_LIMIT + +#define silk_abs(a) (((a) > 0) ? (a) : -(a)) /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */ +#define silk_abs_int(a) (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1))) +#define silk_abs_int32(a) (((a) ^ ((a) >> 31)) - ((a) >> 31)) +#define silk_abs_int64(a) (((a) > 0) ? (a) : -(a)) + +#define silk_sign(a) ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 )) + +/* PSEUDO-RANDOM GENERATOR */ +/* Make sure to store the result as the seed for the next call (also in between */ +/* frames), otherwise result won't be random at all. When only using some of the */ +/* bits, take the most significant bits by right-shifting. */ +#define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165)) + +/* Add some multiplication functions that can be easily mapped to ARM. */ + +/* silk_SMMUL: Signed top word multiply. + ARMv6 2 instruction cycles. + ARMv3M+ 3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/ +/*#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/ +/* the following seems faster on x86 */ +#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32) + +#include "Inlines.h" +#include "MacroCount.h" +#include "MacroDebug.h" + +#ifdef OPUS_ARM_INLINE_ASM +#include "arm/SigProc_FIX_armv4.h" +#endif + +#ifdef OPUS_ARM_INLINE_EDSP +#include "arm/SigProc_FIX_armv5e.h" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* SILK_SIGPROC_FIX_H */ diff --git a/drivers/opus/silk/VAD.c b/drivers/opus/silk/VAD.c new file mode 100644 index 0000000000..3a5c566627 --- /dev/null +++ b/drivers/opus/silk/VAD.c @@ -0,0 +1,357 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "stack_alloc.h" + +/* Silk VAD noise level estimation */ +static OPUS_INLINE void silk_VAD_GetNoiseLevels( + const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ + silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ +); + +/**********************************/ +/* Initialization of the Silk VAD */ +/**********************************/ +opus_int silk_VAD_Init( /* O Return value, 0 if success */ + silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ +) +{ + opus_int b, ret = 0; + + /* reset state memory */ + silk_memset( psSilk_VAD, 0, sizeof( silk_VAD_state ) ); + + /* init noise levels */ + /* Initialize array with approx pink noise levels (psd proportional to inverse of frequency) */ + for( b = 0; b < VAD_N_BANDS; b++ ) { + psSilk_VAD->NoiseLevelBias[ b ] = silk_max_32( silk_DIV32_16( VAD_NOISE_LEVELS_BIAS, b + 1 ), 1 ); + } + + /* Initialize state */ + for( b = 0; b < VAD_N_BANDS; b++ ) { + psSilk_VAD->NL[ b ] = silk_MUL( 100, psSilk_VAD->NoiseLevelBias[ b ] ); + psSilk_VAD->inv_NL[ b ] = silk_DIV32( silk_int32_MAX, psSilk_VAD->NL[ b ] ); + } + psSilk_VAD->counter = 15; + + /* init smoothed energy-to-noise ratio*/ + for( b = 0; b < VAD_N_BANDS; b++ ) { + psSilk_VAD->NrgRatioSmth_Q8[ b ] = 100 * 256; /* 100 * 256 --> 20 dB SNR */ + } + + return( ret ); +} + +/* Weighting factors for tilt measure */ +static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 }; + +/***************************************/ +/* Get the speech activity level in Q8 */ +/***************************************/ +opus_int silk_VAD_GetSA_Q8( /* O Return value, 0 if success */ + silk_encoder_state *psEncC, /* I/O Encoder state */ + const opus_int16 pIn[] /* I PCM input */ +) +{ + opus_int SA_Q15, pSNR_dB_Q7, input_tilt; + opus_int decimated_framelength1, decimated_framelength2; + opus_int decimated_framelength; + opus_int dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s; + opus_int32 sumSquared, smooth_coef_Q16; + opus_int16 HPstateTmp; + VARDECL( opus_int16, X ); + opus_int32 Xnrg[ VAD_N_BANDS ]; + opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ]; + opus_int32 speech_nrg, x_tmp; + opus_int X_offset[ VAD_N_BANDS ]; + opus_int ret = 0; + silk_VAD_state *psSilk_VAD = &psEncC->sVAD; + SAVE_STACK; + + /* Safety checks */ + silk_assert( VAD_N_BANDS == 4 ); + silk_assert( MAX_FRAME_LENGTH >= psEncC->frame_length ); + silk_assert( psEncC->frame_length <= 512 ); + silk_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) ); + + /***********************/ + /* Filter and Decimate */ + /***********************/ + decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 ); + decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 ); + decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 ); + /* Decimate into 4 bands: + 0 L 3L L 3L 5L + - -- - -- -- + 8 8 2 4 4 + + [0-1 kHz| temp. |1-2 kHz| 2-4 kHz | 4-8 kHz | + + They're arranged to allow the minimal ( frame_length / 4 ) extra + scratch space during the downsampling process */ + X_offset[ 0 ] = 0; + X_offset[ 1 ] = decimated_framelength + decimated_framelength2; + X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength; + X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2; + ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 ); + + /* 0-8 kHz to 0-4 kHz and 4-8 kHz */ + silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[ 0 ], + X, &X[ X_offset[ 3 ] ], psEncC->frame_length ); + + /* 0-4 kHz to 0-2 kHz and 2-4 kHz */ + silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ], + X, &X[ X_offset[ 2 ] ], decimated_framelength1 ); + + /* 0-2 kHz to 0-1 kHz and 1-2 kHz */ + silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ], + X, &X[ X_offset[ 1 ] ], decimated_framelength2 ); + + /*********************************************/ + /* HP filter on lowest band (differentiator) */ + /*********************************************/ + X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 ); + HPstateTmp = X[ decimated_framelength - 1 ]; + for( i = decimated_framelength - 1; i > 0; i-- ) { + X[ i - 1 ] = silk_RSHIFT( X[ i - 1 ], 1 ); + X[ i ] -= X[ i - 1 ]; + } + X[ 0 ] -= psSilk_VAD->HPstate; + psSilk_VAD->HPstate = HPstateTmp; + + /*************************************/ + /* Calculate the energy in each band */ + /*************************************/ + for( b = 0; b < VAD_N_BANDS; b++ ) { + /* Find the decimated framelength in the non-uniformly divided bands */ + decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) ); + + /* Split length into subframe lengths */ + dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 ); + dec_subframe_offset = 0; + + /* Compute energy per sub-frame */ + /* initialize with summed energy of last subframe */ + Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ]; + for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) { + sumSquared = 0; + for( i = 0; i < dec_subframe_length; i++ ) { + /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2. */ + /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128) */ + x_tmp = silk_RSHIFT( + X[ X_offset[ b ] + i + dec_subframe_offset ], 3 ); + sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp ); + + /* Safety check */ + silk_assert( sumSquared >= 0 ); + } + + /* Add/saturate summed energy of current subframe */ + if( s < VAD_INTERNAL_SUBFRAMES - 1 ) { + Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared ); + } else { + /* Look-ahead subframe */ + Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) ); + } + + dec_subframe_offset += dec_subframe_length; + } + psSilk_VAD->XnrgSubfr[ b ] = sumSquared; + } + + /********************/ + /* Noise estimation */ + /********************/ + silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD ); + + /***********************************************/ + /* Signal-plus-noise to noise ratio estimation */ + /***********************************************/ + sumSquared = 0; + input_tilt = 0; + for( b = 0; b < VAD_N_BANDS; b++ ) { + speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ]; + if( speech_nrg > 0 ) { + /* Divide, with sufficient resolution */ + if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) { + NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 ); + } else { + NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 ); + } + + /* Convert to log domain */ + SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128; + + /* Sum-of-squares */ + sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 ); /* Q14 */ + + /* Tilt measure */ + if( speech_nrg < ( (opus_int32)1 << 20 ) ) { + /* Scale down SNR value for small subband speech energies */ + SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 ); + } + input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 ); + } else { + NrgToNoiseRatio_Q8[ b ] = 256; + } + } + + /* Mean-of-squares */ + sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */ + + /* Root-mean-square approximation, scale to dBs, and write to output pointer */ + pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */ + + /*********************************/ + /* Speech Probability Estimation */ + /*********************************/ + SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 ); + + /**************************/ + /* Frequency Tilt Measure */ + /**************************/ + psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 ); + + /**************************************************/ + /* Scale the sigmoid output based on power levels */ + /**************************************************/ + speech_nrg = 0; + for( b = 0; b < VAD_N_BANDS; b++ ) { + /* Accumulate signal-without-noise energies, higher frequency bands have more weight */ + speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 ); + } + + /* Power scaling */ + if( speech_nrg <= 0 ) { + SA_Q15 = silk_RSHIFT( SA_Q15, 1 ); + } else if( speech_nrg < 32768 ) { + if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { + speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 16 ); + } else { + speech_nrg = silk_LSHIFT_SAT32( speech_nrg, 15 ); + } + + /* square-root */ + speech_nrg = silk_SQRT_APPROX( speech_nrg ); + SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 ); + } + + /* Copy the resulting speech activity in Q8 */ + psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX ); + + /***********************************/ + /* Energy Level and SNR estimation */ + /***********************************/ + /* Smoothing coefficient */ + smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) ); + + if( psEncC->frame_length == 10 * psEncC->fs_kHz ) { + smooth_coef_Q16 >>= 1; + } + + for( b = 0; b < VAD_N_BANDS; b++ ) { + /* compute smoothed energy-to-noise ratio per band */ + psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ], + NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 ); + + /* signal to noise ratio in dB per band */ + SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 ); + /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */ + psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) ); + } + + RESTORE_STACK; + return( ret ); +} + +/**************************/ +/* Noise level estimation */ +/**************************/ +static OPUS_INLINE void silk_VAD_GetNoiseLevels( + const opus_int32 pX[ VAD_N_BANDS ], /* I subband energies */ + silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ +) +{ + opus_int k; + opus_int32 nl, nrg, inv_nrg; + opus_int coef, min_coef; + + /* Initially faster smoothing */ + if( psSilk_VAD->counter < 1000 ) { /* 1000 = 20 sec */ + min_coef = silk_DIV32_16( silk_int16_MAX, silk_RSHIFT( psSilk_VAD->counter, 4 ) + 1 ); + } else { + min_coef = 0; + } + + for( k = 0; k < VAD_N_BANDS; k++ ) { + /* Get old noise level estimate for current band */ + nl = psSilk_VAD->NL[ k ]; + silk_assert( nl >= 0 ); + + /* Add bias */ + nrg = silk_ADD_POS_SAT32( pX[ k ], psSilk_VAD->NoiseLevelBias[ k ] ); + silk_assert( nrg > 0 ); + + /* Invert energies */ + inv_nrg = silk_DIV32( silk_int32_MAX, nrg ); + silk_assert( inv_nrg >= 0 ); + + /* Less update when subband energy is high */ + if( nrg > silk_LSHIFT( nl, 3 ) ) { + coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 >> 3; + } else if( nrg < nl ) { + coef = VAD_NOISE_LEVEL_SMOOTH_COEF_Q16; + } else { + coef = silk_SMULWB( silk_SMULWW( inv_nrg, nl ), VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 << 1 ); + } + + /* Initially faster smoothing */ + coef = silk_max_int( coef, min_coef ); + + /* Smooth inverse energies */ + psSilk_VAD->inv_NL[ k ] = silk_SMLAWB( psSilk_VAD->inv_NL[ k ], inv_nrg - psSilk_VAD->inv_NL[ k ], coef ); + silk_assert( psSilk_VAD->inv_NL[ k ] >= 0 ); + + /* Compute noise level by inverting again */ + nl = silk_DIV32( silk_int32_MAX, psSilk_VAD->inv_NL[ k ] ); + silk_assert( nl >= 0 ); + + /* Limit noise levels (guarantee 7 bits of head room) */ + nl = silk_min( nl, 0x00FFFFFF ); + + /* Store as part of state */ + psSilk_VAD->NL[ k ] = nl; + } + + /* Increment frame counter */ + psSilk_VAD->counter++; +} diff --git a/drivers/opus/silk/VQ_WMat_EC.c b/drivers/opus/silk/VQ_WMat_EC.c new file mode 100644 index 0000000000..28c5fc7e6f --- /dev/null +++ b/drivers/opus/silk/VQ_WMat_EC.c @@ -0,0 +1,120 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */ +void silk_VQ_WMat_EC( + opus_int8 *ind, /* O index of best codebook vector */ + opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ + const opus_int16 *in_Q14, /* I input vector to be quantized */ + const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int8 *cb_Q7, /* I codebook */ + const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ + const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ + const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ + opus_int L /* I number of vectors in codebook */ +) +{ + opus_int k, gain_tmp_Q7; + const opus_int8 *cb_row_Q7; + opus_int16 diff_Q14[ 5 ]; + opus_int32 sum1_Q14, sum2_Q16; + + /* Loop over codebook */ + *rate_dist_Q14 = silk_int32_MAX; + cb_row_Q7 = cb_Q7; + for( k = 0; k < L; k++ ) { + gain_tmp_Q7 = cb_gain_Q7[k]; + + diff_Q14[ 0 ] = in_Q14[ 0 ] - silk_LSHIFT( cb_row_Q7[ 0 ], 7 ); + diff_Q14[ 1 ] = in_Q14[ 1 ] - silk_LSHIFT( cb_row_Q7[ 1 ], 7 ); + diff_Q14[ 2 ] = in_Q14[ 2 ] - silk_LSHIFT( cb_row_Q7[ 2 ], 7 ); + diff_Q14[ 3 ] = in_Q14[ 3 ] - silk_LSHIFT( cb_row_Q7[ 3 ], 7 ); + diff_Q14[ 4 ] = in_Q14[ 4 ] - silk_LSHIFT( cb_row_Q7[ 4 ], 7 ); + + /* Weighted rate */ + sum1_Q14 = silk_SMULBB( mu_Q9, cl_Q5[ k ] ); + + /* Penalty for too large gain */ + sum1_Q14 = silk_ADD_LSHIFT32( sum1_Q14, silk_max( silk_SUB32( gain_tmp_Q7, max_gain_Q7 ), 0 ), 10 ); + + silk_assert( sum1_Q14 >= 0 ); + + /* first row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 1 ], diff_Q14[ 1 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 2 ], diff_Q14[ 2 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 3 ], diff_Q14[ 3 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 4 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 0 ], diff_Q14[ 0 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 0 ] ); + + /* second row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 7 ], diff_Q14[ 2 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 8 ], diff_Q14[ 3 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 9 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 6 ], diff_Q14[ 1 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 1 ] ); + + /* third row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 13 ], diff_Q14[ 3 ] ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 14 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 12 ], diff_Q14[ 2 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 2 ] ); + + /* fourth row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 19 ], diff_Q14[ 4 ] ); + sum2_Q16 = silk_LSHIFT( sum2_Q16, 1 ); + sum2_Q16 = silk_SMLAWB( sum2_Q16, W_Q18[ 18 ], diff_Q14[ 3 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 3 ] ); + + /* last row of W_Q18 */ + sum2_Q16 = silk_SMULWB( W_Q18[ 24 ], diff_Q14[ 4 ] ); + sum1_Q14 = silk_SMLAWB( sum1_Q14, sum2_Q16, diff_Q14[ 4 ] ); + + silk_assert( sum1_Q14 >= 0 ); + + /* find best */ + if( sum1_Q14 < *rate_dist_Q14 ) { + *rate_dist_Q14 = sum1_Q14; + *ind = (opus_int8)k; + *gain_Q7 = gain_tmp_Q7; + } + + /* Go to next cbk vector */ + cb_row_Q7 += LTP_ORDER; + } +} diff --git a/drivers/opus/silk/ana_filt_bank_1.c b/drivers/opus/silk/ana_filt_bank_1.c new file mode 100644 index 0000000000..387dcd87e7 --- /dev/null +++ b/drivers/opus/silk/ana_filt_bank_1.c @@ -0,0 +1,74 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Coefficients for 2-band filter bank based on first-order allpass filters */ +static opus_int16 A_fb1_20 = 5394 << 1; +static opus_int16 A_fb1_21 = -24290; /* (opus_int16)(20623 << 1) */ + +/* Split signal into two decimated bands using first-order allpass filters */ +void silk_ana_filt_bank_1( + const opus_int16 *in, /* I Input signal [N] */ + opus_int32 *S, /* I/O State vector [2] */ + opus_int16 *outL, /* O Low band [N/2] */ + opus_int16 *outH, /* O High band [N/2] */ + const opus_int32 N /* I Number of input samples */ +) +{ + opus_int k, N2 = silk_RSHIFT( N, 1 ); + opus_int32 in32, X, Y, out_1, out_2; + + /* Internal variables and state are in Q10 format */ + for( k = 0; k < N2; k++ ) { + /* Convert to Q10 */ + in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 ); + + /* All-pass section for even input sample */ + Y = silk_SUB32( in32, S[ 0 ] ); + X = silk_SMLAWB( Y, Y, A_fb1_21 ); + out_1 = silk_ADD32( S[ 0 ], X ); + S[ 0 ] = silk_ADD32( in32, X ); + + /* Convert to Q10 */ + in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 ); + + /* All-pass section for odd input sample, and add to output of previous section */ + Y = silk_SUB32( in32, S[ 1 ] ); + X = silk_SMULWB( Y, A_fb1_20 ); + out_2 = silk_ADD32( S[ 1 ], X ); + S[ 1 ] = silk_ADD32( in32, X ); + + /* Add/subtract, convert back to int16 and store to output */ + outL[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_ADD32( out_2, out_1 ), 11 ) ); + outH[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SUB32( out_2, out_1 ), 11 ) ); + } +} diff --git a/drivers/opus/silk/arm/SigProc_FIX_armv4.h b/drivers/opus/silk/arm/SigProc_FIX_armv4.h new file mode 100644 index 0000000000..ff62b1e5d6 --- /dev/null +++ b/drivers/opus/silk/arm/SigProc_FIX_armv4.h @@ -0,0 +1,47 @@ +/*********************************************************************** +Copyright (C) 2013 Xiph.Org Foundation and contributors +Copyright (c) 2013 Parrot +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_SIGPROC_FIX_ARMv4_H +#define SILK_SIGPROC_FIX_ARMv4_H + +#undef silk_MLA +static OPUS_INLINE opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + opus_int32 res; + __asm__( + "#silk_MLA\n\t" + "mla %0, %1, %2, %3\n\t" + : "=&r"(res) + : "r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_MLA(a, b, c) (silk_MLA_armv4(a, b, c)) + +#endif diff --git a/drivers/opus/silk/arm/SigProc_FIX_armv5e.h b/drivers/opus/silk/arm/SigProc_FIX_armv5e.h new file mode 100644 index 0000000000..617a09cab1 --- /dev/null +++ b/drivers/opus/silk/arm/SigProc_FIX_armv5e.h @@ -0,0 +1,61 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Copyright (c) 2013 Parrot +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_SIGPROC_FIX_ARMv5E_H +#define SILK_SIGPROC_FIX_ARMv5E_H + +#undef silk_SMULTT +static OPUS_INLINE opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b) +{ + opus_int32 res; + __asm__( + "#silk_SMULTT\n\t" + "smultt %0, %1, %2\n\t" + : "=r"(res) + : "%r"(a), "r"(b) + ); + return res; +} +#define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b)) + +#undef silk_SMLATT +static OPUS_INLINE opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + opus_int32 res; + __asm__( + "#silk_SMLATT\n\t" + "smlatt %0, %1, %2, %3\n\t" + : "=r"(res) + : "%r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_SMLATT(a, b, c) (silk_SMLATT_armv5e(a, b, c)) + +#endif diff --git a/drivers/opus/silk/arm/macros_armv4.h b/drivers/opus/silk/arm/macros_armv4.h new file mode 100644 index 0000000000..3f30e97288 --- /dev/null +++ b/drivers/opus/silk/arm/macros_armv4.h @@ -0,0 +1,103 @@ +/*********************************************************************** +Copyright (C) 2013 Xiph.Org Foundation and contributors. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_MACROS_ARMv4_H +#define SILK_MACROS_ARMv4_H + +/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ +#undef silk_SMULWB +static OPUS_INLINE opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#silk_SMULWB\n\t" + "smull %0, %1, %2, %3\n\t" + : "=&r"(rd_lo), "=&r"(rd_hi) + : "%r"(a), "r"(b<<16) + ); + return rd_hi; +} +#define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b)) + +/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ +#undef silk_SMLAWB +#define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c)) + +/* (a32 * (b32 >> 16)) >> 16 */ +#undef silk_SMULWT +static OPUS_INLINE opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#silk_SMULWT\n\t" + "smull %0, %1, %2, %3\n\t" + : "=&r"(rd_lo), "=&r"(rd_hi) + : "%r"(a), "r"(b&~0xFFFF) + ); + return rd_hi; +} +#define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b)) + +/* a32 + (b32 * (c32 >> 16)) >> 16 */ +#undef silk_SMLAWT +#define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c)) + +/* (a32 * b32) >> 16 */ +#undef silk_SMULWW +static OPUS_INLINE opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#silk_SMULWW\n\t" + "smull %0, %1, %2, %3\n\t" + : "=&r"(rd_lo), "=&r"(rd_hi) + : "%r"(a), "r"(b) + ); + return (rd_hi<<16)+(rd_lo>>16); +} +#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b)) + +#undef silk_SMLAWW +static OPUS_INLINE opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#silk_SMLAWW\n\t" + "smull %0, %1, %2, %3\n\t" + : "=&r"(rd_lo), "=&r"(rd_hi) + : "%r"(b), "r"(c) + ); + return a+(rd_hi<<16)+(rd_lo>>16); +} +#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c)) + +#endif /* SILK_MACROS_ARMv4_H */ diff --git a/drivers/opus/silk/arm/macros_armv5e.h b/drivers/opus/silk/arm/macros_armv5e.h new file mode 100644 index 0000000000..aad4117e46 --- /dev/null +++ b/drivers/opus/silk/arm/macros_armv5e.h @@ -0,0 +1,213 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Copyright (c) 2013 Parrot +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_MACROS_ARMv5E_H +#define SILK_MACROS_ARMv5E_H + +/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ +#undef silk_SMULWB +static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b) +{ + int res; + __asm__( + "#silk_SMULWB\n\t" + "smulwb %0, %1, %2\n\t" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b)) + +/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ +#undef silk_SMLAWB +static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b, + opus_int16 c) +{ + int res; + __asm__( + "#silk_SMLAWB\n\t" + "smlawb %0, %1, %2, %3\n\t" + : "=r"(res) + : "r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c)) + +/* (a32 * (b32 >> 16)) >> 16 */ +#undef silk_SMULWT +static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b) +{ + int res; + __asm__( + "#silk_SMULWT\n\t" + "smulwt %0, %1, %2\n\t" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b)) + +/* a32 + (b32 * (c32 >> 16)) >> 16 */ +#undef silk_SMLAWT +static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + int res; + __asm__( + "#silk_SMLAWT\n\t" + "smlawt %0, %1, %2, %3\n\t" + : "=r"(res) + : "r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c)) + +/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */ +#undef silk_SMULBB +static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b) +{ + int res; + __asm__( + "#silk_SMULBB\n\t" + "smulbb %0, %1, %2\n\t" + : "=r"(res) + : "%r"(a), "r"(b) + ); + return res; +} +#define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b)) + +/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */ +#undef silk_SMLABB +static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + int res; + __asm__( + "#silk_SMLABB\n\t" + "smlabb %0, %1, %2, %3\n\t" + : "=r"(res) + : "%r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c)) + +/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */ +#undef silk_SMULBT +static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b) +{ + int res; + __asm__( + "#silk_SMULBT\n\t" + "smulbt %0, %1, %2\n\t" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b)) + +/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */ +#undef silk_SMLABT +static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + int res; + __asm__( + "#silk_SMLABT\n\t" + "smlabt %0, %1, %2, %3\n\t" + : "=r"(res) + : "r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c)) + +/* add/subtract with output saturated */ +#undef silk_ADD_SAT32 +static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b) +{ + int res; + __asm__( + "#silk_ADD_SAT32\n\t" + "qadd %0, %1, %2\n\t" + : "=r"(res) + : "%r"(a), "r"(b) + ); + return res; +} +#define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b)) + +#undef silk_SUB_SAT32 +static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b) +{ + int res; + __asm__( + "#silk_SUB_SAT32\n\t" + "qsub %0, %1, %2\n\t" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b)) + +#undef silk_CLZ16 +static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16) +{ + int res; + __asm__( + "#silk_CLZ16\n\t" + "clz %0, %1;\n" + : "=r"(res) + : "r"(in16<<16|0x8000) + ); + return res; +} +#define silk_CLZ16(in16) (silk_CLZ16_armv5(in16)) + +#undef silk_CLZ32 +static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32) +{ + int res; + __asm__( + "#silk_CLZ32\n\t" + "clz %0, %1\n\t" + : "=r"(res) + : "r"(in32) + ); + return res; +} +#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32)) + +#endif /* SILK_MACROS_ARMv5E_H */ diff --git a/drivers/opus/silk/biquad_alt.c b/drivers/opus/silk/biquad_alt.c new file mode 100644 index 0000000000..5f1d6d25c3 --- /dev/null +++ b/drivers/opus/silk/biquad_alt.c @@ -0,0 +1,78 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +/* * + * silk_biquad_alt.c * + * * + * Second order ARMA filter * + * Can handle slowly varying filter coefficients * + * */ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Second order ARMA filter, alternative implementation */ +void silk_biquad_alt( + const opus_int16 *in, /* I input signal */ + const opus_int32 *B_Q28, /* I MA coefficients [3] */ + const opus_int32 *A_Q28, /* I AR coefficients [2] */ + opus_int32 *S, /* I/O State vector [2] */ + opus_int16 *out, /* O output signal */ + const opus_int32 len, /* I signal length (must be even) */ + opus_int stride /* I Operate on interleaved signal if > 1 */ +) +{ + /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */ + opus_int k; + opus_int32 inval, A0_U_Q28, A0_L_Q28, A1_U_Q28, A1_L_Q28, out32_Q14; + + /* Negate A_Q28 values and split in two parts */ + A0_L_Q28 = ( -A_Q28[ 0 ] ) & 0x00003FFF; /* lower part */ + A0_U_Q28 = silk_RSHIFT( -A_Q28[ 0 ], 14 ); /* upper part */ + A1_L_Q28 = ( -A_Q28[ 1 ] ) & 0x00003FFF; /* lower part */ + A1_U_Q28 = silk_RSHIFT( -A_Q28[ 1 ], 14 ); /* upper part */ + + for( k = 0; k < len; k++ ) { + /* S[ 0 ], S[ 1 ]: Q12 */ + inval = in[ k * stride ]; + out32_Q14 = silk_LSHIFT( silk_SMLAWB( S[ 0 ], B_Q28[ 0 ], inval ), 2 ); + + S[ 0 ] = S[1] + silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A0_L_Q28 ), 14 ); + S[ 0 ] = silk_SMLAWB( S[ 0 ], out32_Q14, A0_U_Q28 ); + S[ 0 ] = silk_SMLAWB( S[ 0 ], B_Q28[ 1 ], inval); + + S[ 1 ] = silk_RSHIFT_ROUND( silk_SMULWB( out32_Q14, A1_L_Q28 ), 14 ); + S[ 1 ] = silk_SMLAWB( S[ 1 ], out32_Q14, A1_U_Q28 ); + S[ 1 ] = silk_SMLAWB( S[ 1 ], B_Q28[ 2 ], inval ); + + /* Scale back to Q0 and saturate */ + out[ k * stride ] = (opus_int16)silk_SAT16( silk_RSHIFT( out32_Q14 + (1<<14) - 1, 14 ) ); + } +} diff --git a/drivers/opus/silk/bwexpander.c b/drivers/opus/silk/bwexpander.c new file mode 100644 index 0000000000..d757483872 --- /dev/null +++ b/drivers/opus/silk/bwexpander.c @@ -0,0 +1,51 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Chirp (bandwidth expand) LP AR filter */ +void silk_bwexpander( + opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */ + const opus_int d, /* I Length of ar */ + opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */ +) +{ + opus_int i; + opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536; + + /* NB: Dont use silk_SMULWB, instead of silk_RSHIFT_ROUND( silk_MUL(), 16 ), below. */ + /* Bias in silk_SMULWB can lead to unstable filters */ + for( i = 0; i < d - 1; i++ ) { + ar[ i ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ i ] ), 16 ); + chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 ); + } + ar[ d - 1 ] = (opus_int16)silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, ar[ d - 1 ] ), 16 ); +} diff --git a/drivers/opus/silk/bwexpander_32.c b/drivers/opus/silk/bwexpander_32.c new file mode 100644 index 0000000000..8a60767c0d --- /dev/null +++ b/drivers/opus/silk/bwexpander_32.c @@ -0,0 +1,50 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Chirp (bandwidth expand) LP AR filter */ +void silk_bwexpander_32( + opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */ + const opus_int d, /* I Length of ar */ + opus_int32 chirp_Q16 /* I Chirp factor in Q16 */ +) +{ + opus_int i; + opus_int32 chirp_minus_one_Q16 = chirp_Q16 - 65536; + + for( i = 0; i < d - 1; i++ ) { + ar[ i ] = silk_SMULWW( chirp_Q16, ar[ i ] ); + chirp_Q16 += silk_RSHIFT_ROUND( silk_MUL( chirp_Q16, chirp_minus_one_Q16 ), 16 ); + } + ar[ d - 1 ] = silk_SMULWW( chirp_Q16, ar[ d - 1 ] ); +} + diff --git a/drivers/opus/silk/check_control_input.c b/drivers/opus/silk/check_control_input.c new file mode 100644 index 0000000000..0e02fff22d --- /dev/null +++ b/drivers/opus/silk/check_control_input.c @@ -0,0 +1,106 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "control.h" +#include "errors.h" + +/* Check encoder control struct */ +opus_int check_control_input( + silk_EncControlStruct *encControl /* I Control structure */ +) +{ + silk_assert( encControl != NULL ); + + if( ( ( encControl->API_sampleRate != 8000 ) && + ( encControl->API_sampleRate != 12000 ) && + ( encControl->API_sampleRate != 16000 ) && + ( encControl->API_sampleRate != 24000 ) && + ( encControl->API_sampleRate != 32000 ) && + ( encControl->API_sampleRate != 44100 ) && + ( encControl->API_sampleRate != 48000 ) ) || + ( ( encControl->desiredInternalSampleRate != 8000 ) && + ( encControl->desiredInternalSampleRate != 12000 ) && + ( encControl->desiredInternalSampleRate != 16000 ) ) || + ( ( encControl->maxInternalSampleRate != 8000 ) && + ( encControl->maxInternalSampleRate != 12000 ) && + ( encControl->maxInternalSampleRate != 16000 ) ) || + ( ( encControl->minInternalSampleRate != 8000 ) && + ( encControl->minInternalSampleRate != 12000 ) && + ( encControl->minInternalSampleRate != 16000 ) ) || + ( encControl->minInternalSampleRate > encControl->desiredInternalSampleRate ) || + ( encControl->maxInternalSampleRate < encControl->desiredInternalSampleRate ) || + ( encControl->minInternalSampleRate > encControl->maxInternalSampleRate ) ) { + silk_assert( 0 ); + return SILK_ENC_FS_NOT_SUPPORTED; + } + if( encControl->payloadSize_ms != 10 && + encControl->payloadSize_ms != 20 && + encControl->payloadSize_ms != 40 && + encControl->payloadSize_ms != 60 ) { + silk_assert( 0 ); + return SILK_ENC_PACKET_SIZE_NOT_SUPPORTED; + } + if( encControl->packetLossPercentage < 0 || encControl->packetLossPercentage > 100 ) { + silk_assert( 0 ); + return SILK_ENC_INVALID_LOSS_RATE; + } + if( encControl->useDTX < 0 || encControl->useDTX > 1 ) { + silk_assert( 0 ); + return SILK_ENC_INVALID_DTX_SETTING; + } + if( encControl->useCBR < 0 || encControl->useCBR > 1 ) { + silk_assert( 0 ); + return SILK_ENC_INVALID_CBR_SETTING; + } + if( encControl->useInBandFEC < 0 || encControl->useInBandFEC > 1 ) { + silk_assert( 0 ); + return SILK_ENC_INVALID_INBAND_FEC_SETTING; + } + if( encControl->nChannelsAPI < 1 || encControl->nChannelsAPI > ENCODER_NUM_CHANNELS ) { + silk_assert( 0 ); + return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; + } + if( encControl->nChannelsInternal < 1 || encControl->nChannelsInternal > ENCODER_NUM_CHANNELS ) { + silk_assert( 0 ); + return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; + } + if( encControl->nChannelsInternal > encControl->nChannelsAPI ) { + silk_assert( 0 ); + return SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR; + } + if( encControl->complexity < 0 || encControl->complexity > 10 ) { + silk_assert( 0 ); + return SILK_ENC_INVALID_COMPLEXITY_SETTING; + } + + return SILK_NO_ERROR; +} diff --git a/drivers/opus/silk/code_signs.c b/drivers/opus/silk/code_signs.c new file mode 100644 index 0000000000..8bcc6ecde1 --- /dev/null +++ b/drivers/opus/silk/code_signs.c @@ -0,0 +1,115 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/*#define silk_enc_map(a) ((a) > 0 ? 1 : 0)*/ +/*#define silk_dec_map(a) ((a) > 0 ? 1 : -1)*/ +/* shifting avoids if-statement */ +#define silk_enc_map(a) ( silk_RSHIFT( (a), 15 ) + 1 ) +#define silk_dec_map(a) ( silk_LSHIFT( (a), 1 ) - 1 ) + +/* Encodes signs of excitation */ +void silk_encode_signs( + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + const opus_int8 pulses[], /* I pulse signal */ + opus_int length, /* I length of input */ + const opus_int signalType, /* I Signal type */ + const opus_int quantOffsetType, /* I Quantization offset type */ + const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ +) +{ + opus_int i, j, p; + opus_uint8 icdf[ 2 ]; + const opus_int8 *q_ptr; + const opus_uint8 *icdf_ptr; + + icdf[ 1 ] = 0; + q_ptr = pulses; + i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) ); + icdf_ptr = &silk_sign_iCDF[ i ]; + length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH ); + for( i = 0; i < length; i++ ) { + p = sum_pulses[ i ]; + if( p > 0 ) { + icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ]; + for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) { + if( q_ptr[ j ] != 0 ) { + ec_enc_icdf( psRangeEnc, silk_enc_map( q_ptr[ j ]), icdf, 8 ); + } + } + } + q_ptr += SHELL_CODEC_FRAME_LENGTH; + } +} + +/* Decodes signs of excitation */ +void silk_decode_signs( + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int pulses[], /* I/O pulse signal */ + opus_int length, /* I length of input */ + const opus_int signalType, /* I Signal type */ + const opus_int quantOffsetType, /* I Quantization offset type */ + const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ +) +{ + opus_int i, j, p; + opus_uint8 icdf[ 2 ]; + opus_int *q_ptr; + const opus_uint8 *icdf_ptr; + + icdf[ 1 ] = 0; + q_ptr = pulses; + i = silk_SMULBB( 7, silk_ADD_LSHIFT( quantOffsetType, signalType, 1 ) ); + icdf_ptr = &silk_sign_iCDF[ i ]; + length = silk_RSHIFT( length + SHELL_CODEC_FRAME_LENGTH/2, LOG2_SHELL_CODEC_FRAME_LENGTH ); + for( i = 0; i < length; i++ ) { + p = sum_pulses[ i ]; + if( p > 0 ) { + icdf[ 0 ] = icdf_ptr[ silk_min( p & 0x1F, 6 ) ]; + for( j = 0; j < SHELL_CODEC_FRAME_LENGTH; j++ ) { + if( q_ptr[ j ] > 0 ) { + /* attach sign */ +#if 0 + /* conditional implementation */ + if( ec_dec_icdf( psRangeDec, icdf, 8 ) == 0 ) { + q_ptr[ j ] = -q_ptr[ j ]; + } +#else + /* implementation with shift, subtraction, multiplication */ + q_ptr[ j ] *= silk_dec_map( ec_dec_icdf( psRangeDec, icdf, 8 ) ); +#endif + } + } + } + q_ptr += SHELL_CODEC_FRAME_LENGTH; + } +} diff --git a/drivers/opus/silk/control.h b/drivers/opus/silk/control.h new file mode 100644 index 0000000000..747e5426a0 --- /dev/null +++ b/drivers/opus/silk/control.h @@ -0,0 +1,142 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_CONTROL_H +#define SILK_CONTROL_H + +#include "typedef.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* Decoder API flags */ +#define FLAG_DECODE_NORMAL 0 +#define FLAG_PACKET_LOST 1 +#define FLAG_DECODE_LBRR 2 + +/***********************************************/ +/* Structure for controlling encoder operation */ +/***********************************************/ +typedef struct { + /* I: Number of channels; 1/2 */ + opus_int32 nChannelsAPI; + + /* I: Number of channels; 1/2 */ + opus_int32 nChannelsInternal; + + /* I: Input signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000 */ + opus_int32 API_sampleRate; + + /* I: Maximum internal sampling rate in Hertz; 8000/12000/16000 */ + opus_int32 maxInternalSampleRate; + + /* I: Minimum internal sampling rate in Hertz; 8000/12000/16000 */ + opus_int32 minInternalSampleRate; + + /* I: Soft request for internal sampling rate in Hertz; 8000/12000/16000 */ + opus_int32 desiredInternalSampleRate; + + /* I: Number of samples per packet in milliseconds; 10/20/40/60 */ + opus_int payloadSize_ms; + + /* I: Bitrate during active speech in bits/second; internally limited */ + opus_int32 bitRate; + + /* I: Uplink packet loss in percent (0-100) */ + opus_int packetLossPercentage; + + /* I: Complexity mode; 0 is lowest, 10 is highest complexity */ + opus_int complexity; + + /* I: Flag to enable in-band Forward Error Correction (FEC); 0/1 */ + opus_int useInBandFEC; + + /* I: Flag to enable discontinuous transmission (DTX); 0/1 */ + opus_int useDTX; + + /* I: Flag to use constant bitrate */ + opus_int useCBR; + + /* I: Maximum number of bits allowed for the frame */ + opus_int maxBits; + + /* I: Causes a smooth downmix to mono */ + opus_int toMono; + + /* I: Opus encoder is allowing us to switch bandwidth */ + opus_int opusCanSwitch; + + /* I: Make frames as independent as possible (but still use LPC) */ + opus_int reducedDependency; + + /* O: Internal sampling rate used, in Hertz; 8000/12000/16000 */ + opus_int32 internalSampleRate; + + /* O: Flag that bandwidth switching is allowed (because low voice activity) */ + opus_int allowBandwidthSwitch; + + /* O: Flag that SILK runs in WB mode without variable LP filter (use for switching between WB/SWB/FB) */ + opus_int inWBmodeWithoutVariableLP; + + /* O: Stereo width */ + opus_int stereoWidth_Q14; + + /* O: Tells the Opus encoder we're ready to switch */ + opus_int switchReady; + +} silk_EncControlStruct; + +/**************************************************************************/ +/* Structure for controlling decoder operation and reading decoder status */ +/**************************************************************************/ +typedef struct { + /* I: Number of channels; 1/2 */ + opus_int32 nChannelsAPI; + + /* I: Number of channels; 1/2 */ + opus_int32 nChannelsInternal; + + /* I: Output signal sampling rate in Hertz; 8000/12000/16000/24000/32000/44100/48000 */ + opus_int32 API_sampleRate; + + /* I: Internal sampling rate used, in Hertz; 8000/12000/16000 */ + opus_int32 internalSampleRate; + + /* I: Number of samples per packet in milliseconds; 10/20/40/60 */ + opus_int payloadSize_ms; + + /* O: Pitch lag of previous frame (0 if unvoiced), measured in samples at 48 kHz */ + opus_int prevPitchLag; +} silk_DecControlStruct; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/opus/silk/control_SNR.c b/drivers/opus/silk/control_SNR.c new file mode 100644 index 0000000000..ae6351798b --- /dev/null +++ b/drivers/opus/silk/control_SNR.c @@ -0,0 +1,81 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "tuning_parameters.h" + +/* Control SNR of redidual quantizer */ +opus_int silk_control_SNR( + silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ + opus_int32 TargetRate_bps /* I Target max bitrate (bps) */ +) +{ + opus_int k, ret = SILK_NO_ERROR; + opus_int32 frac_Q6; + const opus_int32 *rateTable; + + /* Set bitrate/coding quality */ + TargetRate_bps = silk_LIMIT( TargetRate_bps, MIN_TARGET_RATE_BPS, MAX_TARGET_RATE_BPS ); + if( TargetRate_bps != psEncC->TargetRate_bps ) { + psEncC->TargetRate_bps = TargetRate_bps; + + /* If new TargetRate_bps, translate to SNR_dB value */ + if( psEncC->fs_kHz == 8 ) { + rateTable = silk_TargetRate_table_NB; + } else if( psEncC->fs_kHz == 12 ) { + rateTable = silk_TargetRate_table_MB; + } else { + rateTable = silk_TargetRate_table_WB; + } + + /* Reduce bitrate for 10 ms modes in these calculations */ + if( psEncC->nb_subfr == 2 ) { + TargetRate_bps -= REDUCE_BITRATE_10_MS_BPS; + } + + /* Find bitrate interval in table and interpolate */ + for( k = 1; k < TARGET_RATE_TAB_SZ; k++ ) { + if( TargetRate_bps <= rateTable[ k ] ) { + frac_Q6 = silk_DIV32( silk_LSHIFT( TargetRate_bps - rateTable[ k - 1 ], 6 ), + rateTable[ k ] - rateTable[ k - 1 ] ); + psEncC->SNR_dB_Q7 = silk_LSHIFT( silk_SNR_table_Q1[ k - 1 ], 6 ) + silk_MUL( frac_Q6, silk_SNR_table_Q1[ k ] - silk_SNR_table_Q1[ k - 1 ] ); + break; + } + } + + /* Reduce coding quality whenever LBRR is enabled, to free up some bits */ + if( psEncC->LBRR_enabled ) { + psEncC->SNR_dB_Q7 = silk_SMLABB( psEncC->SNR_dB_Q7, 12 - psEncC->LBRR_GainIncreases, SILK_FIX_CONST( -0.25, 7 ) ); + } + } + + return ret; +} diff --git a/drivers/opus/silk/control_audio_bandwidth.c b/drivers/opus/silk/control_audio_bandwidth.c new file mode 100644 index 0000000000..6f060bbd29 --- /dev/null +++ b/drivers/opus/silk/control_audio_bandwidth.c @@ -0,0 +1,126 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "tuning_parameters.h" + +/* Control internal sampling rate */ +opus_int silk_control_audio_bandwidth( + silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ + silk_EncControlStruct *encControl /* I Control structure */ +) +{ + opus_int fs_kHz; + opus_int32 fs_Hz; + + fs_kHz = psEncC->fs_kHz; + fs_Hz = silk_SMULBB( fs_kHz, 1000 ); + if( fs_Hz == 0 ) { + /* Encoder has just been initialized */ + fs_Hz = silk_min( psEncC->desiredInternal_fs_Hz, psEncC->API_fs_Hz ); + fs_kHz = silk_DIV32_16( fs_Hz, 1000 ); + } else if( fs_Hz > psEncC->API_fs_Hz || fs_Hz > psEncC->maxInternal_fs_Hz || fs_Hz < psEncC->minInternal_fs_Hz ) { + /* Make sure internal rate is not higher than external rate or maximum allowed, or lower than minimum allowed */ + fs_Hz = psEncC->API_fs_Hz; + fs_Hz = silk_min( fs_Hz, psEncC->maxInternal_fs_Hz ); + fs_Hz = silk_max( fs_Hz, psEncC->minInternal_fs_Hz ); + fs_kHz = silk_DIV32_16( fs_Hz, 1000 ); + } else { + /* State machine for the internal sampling rate switching */ + if( psEncC->sLP.transition_frame_no >= TRANSITION_FRAMES ) { + /* Stop transition phase */ + psEncC->sLP.mode = 0; + } + if( psEncC->allow_bandwidth_switch || encControl->opusCanSwitch ) { + /* Check if we should switch down */ + if( silk_SMULBB( psEncC->fs_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz ) + { + /* Switch down */ + if( psEncC->sLP.mode == 0 ) { + /* New transition */ + psEncC->sLP.transition_frame_no = TRANSITION_FRAMES; + + /* Reset transition filter state */ + silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) ); + } + if( encControl->opusCanSwitch ) { + /* Stop transition phase */ + psEncC->sLP.mode = 0; + + /* Switch to a lower sample frequency */ + fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8; + } else { + if( psEncC->sLP.transition_frame_no <= 0 ) { + encControl->switchReady = 1; + /* Make room for redundancy */ + encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 ); + } else { + /* Direction: down (at double speed) */ + psEncC->sLP.mode = -2; + } + } + } + else + /* Check if we should switch up */ + if( silk_SMULBB( psEncC->fs_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz ) + { + /* Switch up */ + if( encControl->opusCanSwitch ) { + /* Switch to a higher sample frequency */ + fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16; + + /* New transition */ + psEncC->sLP.transition_frame_no = 0; + + /* Reset transition filter state */ + silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) ); + + /* Direction: up */ + psEncC->sLP.mode = 1; + } else { + if( psEncC->sLP.mode == 0 ) { + encControl->switchReady = 1; + /* Make room for redundancy */ + encControl->maxBits -= encControl->maxBits * 5 / ( encControl->payloadSize_ms + 5 ); + } else { + /* Direction: up */ + psEncC->sLP.mode = 1; + } + } + } else { + if (psEncC->sLP.mode<0) + psEncC->sLP.mode = 1; + } + } + } + + return fs_kHz; +} diff --git a/drivers/opus/silk/control_codec.c b/drivers/opus/silk/control_codec.c new file mode 100644 index 0000000000..2d7b10e9b7 --- /dev/null +++ b/drivers/opus/silk/control_codec.c @@ -0,0 +1,422 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif +#ifdef OPUS_FIXED_POINT +#include "main_FIX.h" +#define silk_encoder_state_Fxx silk_encoder_state_FIX +#else +#include "main_FLP.h" +#define silk_encoder_state_Fxx silk_encoder_state_FLP +#endif +#include "stack_alloc.h" +#include "tuning_parameters.h" +#include "pitch_est_defines.h" + +static opus_int silk_setup_resamplers( + silk_encoder_state_Fxx *psEnc, /* I/O */ + opus_int fs_kHz /* I */ +); + +static opus_int silk_setup_fs( + silk_encoder_state_Fxx *psEnc, /* I/O */ + opus_int fs_kHz, /* I */ + opus_int PacketSize_ms /* I */ +); + +static opus_int silk_setup_complexity( + silk_encoder_state *psEncC, /* I/O */ + opus_int Complexity /* I */ +); + +static OPUS_INLINE opus_int silk_setup_LBRR( + silk_encoder_state *psEncC, /* I/O */ + const opus_int32 TargetRate_bps /* I */ +); + + +/* Control encoder */ +opus_int silk_control_encoder( + silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ + silk_EncControlStruct *encControl, /* I Control structure */ + const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ + const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ + const opus_int channelNb, /* I Channel number */ + const opus_int force_fs_kHz +) +{ + opus_int fs_kHz, ret = 0; + + psEnc->sCmn.useDTX = encControl->useDTX; + psEnc->sCmn.useCBR = encControl->useCBR; + psEnc->sCmn.API_fs_Hz = encControl->API_sampleRate; + psEnc->sCmn.maxInternal_fs_Hz = encControl->maxInternalSampleRate; + psEnc->sCmn.minInternal_fs_Hz = encControl->minInternalSampleRate; + psEnc->sCmn.desiredInternal_fs_Hz = encControl->desiredInternalSampleRate; + psEnc->sCmn.useInBandFEC = encControl->useInBandFEC; + psEnc->sCmn.nChannelsAPI = encControl->nChannelsAPI; + psEnc->sCmn.nChannelsInternal = encControl->nChannelsInternal; + psEnc->sCmn.allow_bandwidth_switch = allow_bw_switch; + psEnc->sCmn.channelNb = channelNb; + + if( psEnc->sCmn.controlled_since_last_payload != 0 && psEnc->sCmn.prefillFlag == 0 ) { + if( psEnc->sCmn.API_fs_Hz != psEnc->sCmn.prev_API_fs_Hz && psEnc->sCmn.fs_kHz > 0 ) { + /* Change in API sampling rate in the middle of encoding a packet */ + ret += silk_setup_resamplers( psEnc, psEnc->sCmn.fs_kHz ); + } + return ret; + } + + /* Beyond this point we know that there are no previously coded frames in the payload buffer */ + + /********************************************/ + /* Determine internal sampling rate */ + /********************************************/ + fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl ); + if( force_fs_kHz ) { + fs_kHz = force_fs_kHz; + } + /********************************************/ + /* Prepare resampler and buffered data */ + /********************************************/ + ret += silk_setup_resamplers( psEnc, fs_kHz ); + + /********************************************/ + /* Set internal sampling frequency */ + /********************************************/ + ret += silk_setup_fs( psEnc, fs_kHz, encControl->payloadSize_ms ); + + /********************************************/ + /* Set encoding complexity */ + /********************************************/ + ret += silk_setup_complexity( &psEnc->sCmn, encControl->complexity ); + + /********************************************/ + /* Set packet loss rate measured by farend */ + /********************************************/ + psEnc->sCmn.PacketLoss_perc = encControl->packetLossPercentage; + + /********************************************/ + /* Set LBRR usage */ + /********************************************/ + ret += silk_setup_LBRR( &psEnc->sCmn, TargetRate_bps ); + + psEnc->sCmn.controlled_since_last_payload = 1; + + return ret; +} + +static opus_int silk_setup_resamplers( + silk_encoder_state_Fxx *psEnc, /* I/O */ + opus_int fs_kHz /* I */ +) +{ + opus_int ret = SILK_NO_ERROR; + SAVE_STACK; + + if( psEnc->sCmn.fs_kHz != fs_kHz || psEnc->sCmn.prev_API_fs_Hz != psEnc->sCmn.API_fs_Hz ) + { + if( psEnc->sCmn.fs_kHz == 0 ) { + /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ + ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, fs_kHz * 1000, 1 ); + } else { + VARDECL( opus_int16, x_buf_API_fs_Hz ); + VARDECL( silk_resampler_state_struct, temp_resampler_state ); +#ifdef OPUS_FIXED_POINT + opus_int16 *x_bufFIX = psEnc->x_buf; +#else + VARDECL( opus_int16, x_bufFIX ); + opus_int32 new_buf_samples; +#endif + opus_int32 api_buf_samples; + opus_int32 old_buf_samples; + opus_int32 buf_length_ms; + + buf_length_ms = silk_LSHIFT( psEnc->sCmn.nb_subfr * 5, 1 ) + LA_SHAPE_MS; + old_buf_samples = buf_length_ms * psEnc->sCmn.fs_kHz; + +#ifndef OPUS_FIXED_POINT + new_buf_samples = buf_length_ms * fs_kHz; + ALLOC( x_bufFIX, silk_max( old_buf_samples, new_buf_samples ), + opus_int16 ); + silk_float2short_array( x_bufFIX, psEnc->x_buf, old_buf_samples ); +#endif + + /* Initialize resampler for temporary resampling of x_buf data to API_fs_Hz */ + ALLOC( temp_resampler_state, 1, silk_resampler_state_struct ); + ret += silk_resampler_init( temp_resampler_state, silk_SMULBB( psEnc->sCmn.fs_kHz, 1000 ), psEnc->sCmn.API_fs_Hz, 0 ); + + /* Calculate number of samples to temporarily upsample */ + api_buf_samples = buf_length_ms * silk_DIV32_16( psEnc->sCmn.API_fs_Hz, 1000 ); + + /* Temporary resampling of x_buf data to API_fs_Hz */ + ALLOC( x_buf_API_fs_Hz, api_buf_samples, opus_int16 ); + ret += silk_resampler( temp_resampler_state, x_buf_API_fs_Hz, x_bufFIX, old_buf_samples ); + + /* Initialize the resampler for enc_API.c preparing resampling from API_fs_Hz to fs_kHz */ + ret += silk_resampler_init( &psEnc->sCmn.resampler_state, psEnc->sCmn.API_fs_Hz, silk_SMULBB( fs_kHz, 1000 ), 1 ); + + /* Correct resampler state by resampling buffered data from API_fs_Hz to fs_kHz */ + ret += silk_resampler( &psEnc->sCmn.resampler_state, x_bufFIX, x_buf_API_fs_Hz, api_buf_samples ); + +#ifndef OPUS_FIXED_POINT + silk_short2float_array( psEnc->x_buf, x_bufFIX, new_buf_samples); +#endif + } + } + + psEnc->sCmn.prev_API_fs_Hz = psEnc->sCmn.API_fs_Hz; + + RESTORE_STACK; + return ret; +} + +static opus_int silk_setup_fs( + silk_encoder_state_Fxx *psEnc, /* I/O */ + opus_int fs_kHz, /* I */ + opus_int PacketSize_ms /* I */ +) +{ + opus_int ret = SILK_NO_ERROR; + + /* Set packet size */ + if( PacketSize_ms != psEnc->sCmn.PacketSize_ms ) { + if( ( PacketSize_ms != 10 ) && + ( PacketSize_ms != 20 ) && + ( PacketSize_ms != 40 ) && + ( PacketSize_ms != 60 ) ) { + ret = SILK_ENC_PACKET_SIZE_NOT_SUPPORTED; + } + if( PacketSize_ms <= 10 ) { + psEnc->sCmn.nFramesPerPacket = 1; + psEnc->sCmn.nb_subfr = PacketSize_ms == 10 ? 2 : 1; + psEnc->sCmn.frame_length = silk_SMULBB( PacketSize_ms, fs_kHz ); + psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); + if( psEnc->sCmn.fs_kHz == 8 ) { + psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; + } else { + psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; + } + } else { + psEnc->sCmn.nFramesPerPacket = silk_DIV32_16( PacketSize_ms, MAX_FRAME_LENGTH_MS ); + psEnc->sCmn.nb_subfr = MAX_NB_SUBFR; + psEnc->sCmn.frame_length = silk_SMULBB( 20, fs_kHz ); + psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); + if( psEnc->sCmn.fs_kHz == 8 ) { + psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; + } else { + psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; + } + } + psEnc->sCmn.PacketSize_ms = PacketSize_ms; + psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ + } + + /* Set internal sampling frequency */ + silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); + silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 ); + if( psEnc->sCmn.fs_kHz != fs_kHz ) { + /* reset part of the state */ + silk_memset( &psEnc->sShape, 0, sizeof( psEnc->sShape ) ); + silk_memset( &psEnc->sPrefilt, 0, sizeof( psEnc->sPrefilt ) ); + silk_memset( &psEnc->sCmn.sNSQ, 0, sizeof( psEnc->sCmn.sNSQ ) ); + silk_memset( psEnc->sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); + silk_memset( &psEnc->sCmn.sLP.In_LP_State, 0, sizeof( psEnc->sCmn.sLP.In_LP_State ) ); + psEnc->sCmn.inputBufIx = 0; + psEnc->sCmn.nFramesEncoded = 0; + psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ + + /* Initialize non-zero parameters */ + psEnc->sCmn.prevLag = 100; + psEnc->sCmn.first_frame_after_reset = 1; + psEnc->sPrefilt.lagPrev = 100; + psEnc->sShape.LastGainIndex = 10; + psEnc->sCmn.sNSQ.lagPrev = 100; + psEnc->sCmn.sNSQ.prev_gain_Q16 = 65536; + psEnc->sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; + + psEnc->sCmn.fs_kHz = fs_kHz; + if( psEnc->sCmn.fs_kHz == 8 ) { + if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { + psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; + } else { + psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; + } + } else { + if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { + psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_iCDF; + } else { + psEnc->sCmn.pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; + } + } + if( psEnc->sCmn.fs_kHz == 8 || psEnc->sCmn.fs_kHz == 12 ) { + psEnc->sCmn.predictLPCOrder = MIN_LPC_ORDER; + psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_NB_MB; + } else { + psEnc->sCmn.predictLPCOrder = MAX_LPC_ORDER; + psEnc->sCmn.psNLSF_CB = &silk_NLSF_CB_WB; + } + psEnc->sCmn.subfr_length = SUB_FRAME_LENGTH_MS * fs_kHz; + psEnc->sCmn.frame_length = silk_SMULBB( psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr ); + psEnc->sCmn.ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); + psEnc->sCmn.la_pitch = silk_SMULBB( LA_PITCH_MS, fs_kHz ); + psEnc->sCmn.max_pitch_lag = silk_SMULBB( 18, fs_kHz ); + if( psEnc->sCmn.nb_subfr == MAX_NB_SUBFR ) { + psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS, fs_kHz ); + } else { + psEnc->sCmn.pitch_LPC_win_length = silk_SMULBB( FIND_PITCH_LPC_WIN_MS_2_SF, fs_kHz ); + } + if( psEnc->sCmn.fs_kHz == 16 ) { + psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_WB, 9 ); + psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; + } else if( psEnc->sCmn.fs_kHz == 12 ) { + psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_MB, 9 ); + psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; + } else { + psEnc->sCmn.mu_LTP_Q9 = SILK_FIX_CONST( MU_LTP_QUANT_NB, 9 ); + psEnc->sCmn.pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; + } + } + + /* Check that settings are valid */ + silk_assert( ( psEnc->sCmn.subfr_length * psEnc->sCmn.nb_subfr ) == psEnc->sCmn.frame_length ); + + return ret; +} + +static opus_int silk_setup_complexity( + silk_encoder_state *psEncC, /* I/O */ + opus_int Complexity /* I */ +) +{ + opus_int ret = 0; + + /* Set encoding complexity */ + silk_assert( Complexity >= 0 && Complexity <= 10 ); + if( Complexity < 2 ) { + psEncC->pitchEstimationComplexity = SILK_PE_MIN_COMPLEX; + psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.8, 16 ); + psEncC->pitchEstimationLPCOrder = 6; + psEncC->shapingLPCOrder = 8; + psEncC->la_shape = 3 * psEncC->fs_kHz; + psEncC->nStatesDelayedDecision = 1; + psEncC->useInterpolatedNLSFs = 0; + psEncC->LTPQuantLowComplexity = 1; + psEncC->NLSF_MSVQ_Survivors = 2; + psEncC->warping_Q16 = 0; + } else if( Complexity < 4 ) { + psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; + psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.76, 16 ); + psEncC->pitchEstimationLPCOrder = 8; + psEncC->shapingLPCOrder = 10; + psEncC->la_shape = 5 * psEncC->fs_kHz; + psEncC->nStatesDelayedDecision = 1; + psEncC->useInterpolatedNLSFs = 0; + psEncC->LTPQuantLowComplexity = 0; + psEncC->NLSF_MSVQ_Survivors = 4; + psEncC->warping_Q16 = 0; + } else if( Complexity < 6 ) { + psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; + psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.74, 16 ); + psEncC->pitchEstimationLPCOrder = 10; + psEncC->shapingLPCOrder = 12; + psEncC->la_shape = 5 * psEncC->fs_kHz; + psEncC->nStatesDelayedDecision = 2; + psEncC->useInterpolatedNLSFs = 1; + psEncC->LTPQuantLowComplexity = 0; + psEncC->NLSF_MSVQ_Survivors = 8; + psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); + } else if( Complexity < 8 ) { + psEncC->pitchEstimationComplexity = SILK_PE_MID_COMPLEX; + psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.72, 16 ); + psEncC->pitchEstimationLPCOrder = 12; + psEncC->shapingLPCOrder = 14; + psEncC->la_shape = 5 * psEncC->fs_kHz; + psEncC->nStatesDelayedDecision = 3; + psEncC->useInterpolatedNLSFs = 1; + psEncC->LTPQuantLowComplexity = 0; + psEncC->NLSF_MSVQ_Survivors = 16; + psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); + } else { + psEncC->pitchEstimationComplexity = SILK_PE_MAX_COMPLEX; + psEncC->pitchEstimationThreshold_Q16 = SILK_FIX_CONST( 0.7, 16 ); + psEncC->pitchEstimationLPCOrder = 16; + psEncC->shapingLPCOrder = 16; + psEncC->la_shape = 5 * psEncC->fs_kHz; + psEncC->nStatesDelayedDecision = MAX_DEL_DEC_STATES; + psEncC->useInterpolatedNLSFs = 1; + psEncC->LTPQuantLowComplexity = 0; + psEncC->NLSF_MSVQ_Survivors = 32; + psEncC->warping_Q16 = psEncC->fs_kHz * SILK_FIX_CONST( WARPING_MULTIPLIER, 16 ); + } + + /* Do not allow higher pitch estimation LPC order than predict LPC order */ + psEncC->pitchEstimationLPCOrder = silk_min_int( psEncC->pitchEstimationLPCOrder, psEncC->predictLPCOrder ); + psEncC->shapeWinLength = SUB_FRAME_LENGTH_MS * psEncC->fs_kHz + 2 * psEncC->la_shape; + psEncC->Complexity = Complexity; + + silk_assert( psEncC->pitchEstimationLPCOrder <= MAX_FIND_PITCH_LPC_ORDER ); + silk_assert( psEncC->shapingLPCOrder <= MAX_SHAPE_LPC_ORDER ); + silk_assert( psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES ); + silk_assert( psEncC->warping_Q16 <= 32767 ); + silk_assert( psEncC->la_shape <= LA_SHAPE_MAX ); + silk_assert( psEncC->shapeWinLength <= SHAPE_LPC_WIN_MAX ); + silk_assert( psEncC->NLSF_MSVQ_Survivors <= NLSF_VQ_MAX_SURVIVORS ); + + return ret; +} + +static OPUS_INLINE opus_int silk_setup_LBRR( + silk_encoder_state *psEncC, /* I/O */ + const opus_int32 TargetRate_bps /* I */ +) +{ + opus_int ret = SILK_NO_ERROR; + opus_int32 LBRR_rate_thres_bps; + + psEncC->LBRR_enabled = 0; + if( psEncC->useInBandFEC && psEncC->PacketLoss_perc > 0 ) { + if( psEncC->fs_kHz == 8 ) { + LBRR_rate_thres_bps = LBRR_NB_MIN_RATE_BPS; + } else if( psEncC->fs_kHz == 12 ) { + LBRR_rate_thres_bps = LBRR_MB_MIN_RATE_BPS; + } else { + LBRR_rate_thres_bps = LBRR_WB_MIN_RATE_BPS; + } + LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps, 125 - silk_min( psEncC->PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) ); + + if( TargetRate_bps > LBRR_rate_thres_bps ) { + /* Set gain increase for coding LBRR excitation */ + psEncC->LBRR_enabled = 1; + psEncC->LBRR_GainIncreases = silk_max_int( 7 - silk_SMULWB( (opus_int32)psEncC->PacketLoss_perc, SILK_FIX_CONST( 0.4, 16 ) ), 2 ); + } + } + + return ret; +} diff --git a/drivers/opus/silk/debug.c b/drivers/opus/silk/debug.c new file mode 100644 index 0000000000..2230813fae --- /dev/null +++ b/drivers/opus/silk/debug.c @@ -0,0 +1,170 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "debug.h" +#include "SigProc_FIX.h" + +#if SILK_TIC_TOC + +#ifdef _WIN32 + +#if (defined(_WIN32) || defined(_WINCE)) +#include <windows.h> /* timer */ +#else /* Linux or Mac*/ +#include <sys/time.h> +#endif + +unsigned long silk_GetHighResolutionTime(void) /* O time in usec*/ +{ + /* Returns a time counter in microsec */ + /* the resolution is platform dependent */ + /* but is typically 1.62 us resolution */ + LARGE_INTEGER lpPerformanceCount; + LARGE_INTEGER lpFrequency; + QueryPerformanceCounter(&lpPerformanceCount); + QueryPerformanceFrequency(&lpFrequency); + return (unsigned long)((1000000*(lpPerformanceCount.QuadPart)) / lpFrequency.QuadPart); +} +#else /* Linux or Mac*/ +unsigned long GetHighResolutionTime(void) /* O time in usec*/ +{ + struct timeval tv; + gettimeofday(&tv, 0); + return((tv.tv_sec*1000000)+(tv.tv_usec)); +} +#endif + +int silk_Timer_nTimers = 0; +int silk_Timer_depth_ctr = 0; +char silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN]; +#ifdef WIN32 +LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX]; +#else +unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX]; +#endif +unsigned int silk_Timer_cnt[silk_NUM_TIMERS_MAX]; +opus_int64 silk_Timer_min[silk_NUM_TIMERS_MAX]; +opus_int64 silk_Timer_sum[silk_NUM_TIMERS_MAX]; +opus_int64 silk_Timer_max[silk_NUM_TIMERS_MAX]; +opus_int64 silk_Timer_depth[silk_NUM_TIMERS_MAX]; + +#ifdef WIN32 +void silk_TimerSave(char *file_name) +{ + if( silk_Timer_nTimers > 0 ) + { + int k; + FILE *fp; + LARGE_INTEGER lpFrequency; + LARGE_INTEGER lpPerformanceCount1, lpPerformanceCount2; + int del = 0x7FFFFFFF; + double avg, sum_avg; + /* estimate overhead of calling performance counters */ + for( k = 0; k < 1000; k++ ) { + QueryPerformanceCounter(&lpPerformanceCount1); + QueryPerformanceCounter(&lpPerformanceCount2); + lpPerformanceCount2.QuadPart -= lpPerformanceCount1.QuadPart; + if( (int)lpPerformanceCount2.LowPart < del ) + del = lpPerformanceCount2.LowPart; + } + QueryPerformanceFrequency(&lpFrequency); + /* print results to file */ + sum_avg = 0.0f; + for( k = 0; k < silk_Timer_nTimers; k++ ) { + if (silk_Timer_depth[k] == 0) { + sum_avg += (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart * silk_Timer_cnt[k]; + } + } + fp = fopen(file_name, "w"); + fprintf(fp, " min avg %% max count\n"); + for( k = 0; k < silk_Timer_nTimers; k++ ) { + if (silk_Timer_depth[k] == 0) { + fprintf(fp, "%-28s", silk_Timer_tags[k]); + } else if (silk_Timer_depth[k] == 1) { + fprintf(fp, " %-27s", silk_Timer_tags[k]); + } else if (silk_Timer_depth[k] == 2) { + fprintf(fp, " %-26s", silk_Timer_tags[k]); + } else if (silk_Timer_depth[k] == 3) { + fprintf(fp, " %-25s", silk_Timer_tags[k]); + } else { + fprintf(fp, " %-24s", silk_Timer_tags[k]); + } + avg = (1e6 * silk_Timer_sum[k] / silk_Timer_cnt[k] - del) / lpFrequency.QuadPart; + fprintf(fp, "%8.2f", (1e6 * (silk_max_64(silk_Timer_min[k] - del, 0))) / lpFrequency.QuadPart); + fprintf(fp, "%12.2f %6.2f", avg, 100.0 * avg / sum_avg * silk_Timer_cnt[k]); + fprintf(fp, "%12.2f", (1e6 * (silk_max_64(silk_Timer_max[k] - del, 0))) / lpFrequency.QuadPart); + fprintf(fp, "%10d\n", silk_Timer_cnt[k]); + } + fprintf(fp, " microseconds\n"); + fclose(fp); + } +} +#else +void silk_TimerSave(char *file_name) +{ + if( silk_Timer_nTimers > 0 ) + { + int k; + FILE *fp; + /* print results to file */ + fp = fopen(file_name, "w"); + fprintf(fp, " min avg max count\n"); + for( k = 0; k < silk_Timer_nTimers; k++ ) + { + if (silk_Timer_depth[k] == 0) { + fprintf(fp, "%-28s", silk_Timer_tags[k]); + } else if (silk_Timer_depth[k] == 1) { + fprintf(fp, " %-27s", silk_Timer_tags[k]); + } else if (silk_Timer_depth[k] == 2) { + fprintf(fp, " %-26s", silk_Timer_tags[k]); + } else if (silk_Timer_depth[k] == 3) { + fprintf(fp, " %-25s", silk_Timer_tags[k]); + } else { + fprintf(fp, " %-24s", silk_Timer_tags[k]); + } + fprintf(fp, "%d ", silk_Timer_min[k]); + fprintf(fp, "%f ", (double)silk_Timer_sum[k] / (double)silk_Timer_cnt[k]); + fprintf(fp, "%d ", silk_Timer_max[k]); + fprintf(fp, "%10d\n", silk_Timer_cnt[k]); + } + fprintf(fp, " microseconds\n"); + fclose(fp); + } +} +#endif + +#endif /* SILK_TIC_TOC */ + +#if SILK_DEBUG +FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ]; +int silk_debug_store_count = 0; +#endif /* SILK_DEBUG */ + diff --git a/drivers/opus/silk/debug.h b/drivers/opus/silk/debug.h new file mode 100644 index 0000000000..efb6d3e99e --- /dev/null +++ b/drivers/opus/silk/debug.h @@ -0,0 +1,279 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_DEBUG_H +#define SILK_DEBUG_H + +#include "typedef.h" +#include <stdio.h> /* file writing */ +#include <string.h> /* strcpy, strcmp */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +unsigned long GetHighResolutionTime(void); /* O time in usec*/ + +/* make SILK_DEBUG dependent on compiler's _DEBUG */ +#if defined _WIN32 + #ifdef _DEBUG + #define SILK_DEBUG 1 + #else + #define SILK_DEBUG 0 + #endif + + /* overrule the above */ + #if 0 + /* #define NO_ASSERTS*/ + #undef SILK_DEBUG + #define SILK_DEBUG 1 + #endif +#else + #define SILK_DEBUG 0 +#endif + +/* Flag for using timers */ +#define SILK_TIC_TOC 0 + + +#if SILK_TIC_TOC + +#if (defined(_WIN32) || defined(_WINCE)) +#include <windows.h> /* timer */ +#else /* Linux or Mac*/ +#include <sys/time.h> +#endif + +/*********************************/ +/* timer functions for profiling */ +/*********************************/ +/* example: */ +/* */ +/* TIC(LPC) */ +/* do_LPC(in_vec, order, acoef); // do LPC analysis */ +/* TOC(LPC) */ +/* */ +/* and call the following just before exiting (from main) */ +/* */ +/* silk_TimerSave("silk_TimingData.txt"); */ +/* */ +/* results are now in silk_TimingData.txt */ + +void silk_TimerSave(char *file_name); + +/* max number of timers (in different locations) */ +#define silk_NUM_TIMERS_MAX 50 +/* max length of name tags in TIC(..), TOC(..) */ +#define silk_NUM_TIMERS_MAX_TAG_LEN 30 + +extern int silk_Timer_nTimers; +extern int silk_Timer_depth_ctr; +extern char silk_Timer_tags[silk_NUM_TIMERS_MAX][silk_NUM_TIMERS_MAX_TAG_LEN]; +#ifdef _WIN32 +extern LARGE_INTEGER silk_Timer_start[silk_NUM_TIMERS_MAX]; +#else +extern unsigned long silk_Timer_start[silk_NUM_TIMERS_MAX]; +#endif +extern unsigned int silk_Timer_cnt[silk_NUM_TIMERS_MAX]; +extern opus_int64 silk_Timer_sum[silk_NUM_TIMERS_MAX]; +extern opus_int64 silk_Timer_max[silk_NUM_TIMERS_MAX]; +extern opus_int64 silk_Timer_min[silk_NUM_TIMERS_MAX]; +extern opus_int64 silk_Timer_depth[silk_NUM_TIMERS_MAX]; + +/* WARNING: TIC()/TOC can measure only up to 0.1 seconds at a time */ +#ifdef _WIN32 +#define TIC(TAG_NAME) { \ + static int init = 0; \ + static int ID = -1; \ + if( init == 0 ) \ + { \ + int k; \ + init = 1; \ + for( k = 0; k < silk_Timer_nTimers; k++ ) { \ + if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ + ID = k; \ + break; \ + } \ + } \ + if (ID == -1) { \ + ID = silk_Timer_nTimers; \ + silk_Timer_nTimers++; \ + silk_Timer_depth[ID] = silk_Timer_depth_ctr; \ + strcpy(silk_Timer_tags[ID], #TAG_NAME); \ + silk_Timer_cnt[ID] = 0; \ + silk_Timer_sum[ID] = 0; \ + silk_Timer_min[ID] = 0xFFFFFFFF; \ + silk_Timer_max[ID] = 0; \ + } \ + } \ + silk_Timer_depth_ctr++; \ + QueryPerformanceCounter(&silk_Timer_start[ID]); \ +} +#else +#define TIC(TAG_NAME) { \ + static int init = 0; \ + static int ID = -1; \ + if( init == 0 ) \ + { \ + int k; \ + init = 1; \ + for( k = 0; k < silk_Timer_nTimers; k++ ) { \ + if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ + ID = k; \ + break; \ + } \ + } \ + if (ID == -1) { \ + ID = silk_Timer_nTimers; \ + silk_Timer_nTimers++; \ + silk_Timer_depth[ID] = silk_Timer_depth_ctr; \ + strcpy(silk_Timer_tags[ID], #TAG_NAME); \ + silk_Timer_cnt[ID] = 0; \ + silk_Timer_sum[ID] = 0; \ + silk_Timer_min[ID] = 0xFFFFFFFF; \ + silk_Timer_max[ID] = 0; \ + } \ + } \ + silk_Timer_depth_ctr++; \ + silk_Timer_start[ID] = GetHighResolutionTime(); \ +} +#endif + +#ifdef _WIN32 +#define TOC(TAG_NAME) { \ + LARGE_INTEGER lpPerformanceCount; \ + static int init = 0; \ + static int ID = 0; \ + if( init == 0 ) \ + { \ + int k; \ + init = 1; \ + for( k = 0; k < silk_Timer_nTimers; k++ ) { \ + if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ + ID = k; \ + break; \ + } \ + } \ + } \ + QueryPerformanceCounter(&lpPerformanceCount); \ + lpPerformanceCount.QuadPart -= silk_Timer_start[ID].QuadPart; \ + if((lpPerformanceCount.QuadPart < 100000000) && \ + (lpPerformanceCount.QuadPart >= 0)) { \ + silk_Timer_cnt[ID]++; \ + silk_Timer_sum[ID] += lpPerformanceCount.QuadPart; \ + if( lpPerformanceCount.QuadPart > silk_Timer_max[ID] ) \ + silk_Timer_max[ID] = lpPerformanceCount.QuadPart; \ + if( lpPerformanceCount.QuadPart < silk_Timer_min[ID] ) \ + silk_Timer_min[ID] = lpPerformanceCount.QuadPart; \ + } \ + silk_Timer_depth_ctr--; \ +} +#else +#define TOC(TAG_NAME) { \ + unsigned long endTime; \ + static int init = 0; \ + static int ID = 0; \ + if( init == 0 ) \ + { \ + int k; \ + init = 1; \ + for( k = 0; k < silk_Timer_nTimers; k++ ) { \ + if( strcmp(silk_Timer_tags[k], #TAG_NAME) == 0 ) { \ + ID = k; \ + break; \ + } \ + } \ + } \ + endTime = GetHighResolutionTime(); \ + endTime -= silk_Timer_start[ID]; \ + if((endTime < 100000000) && \ + (endTime >= 0)) { \ + silk_Timer_cnt[ID]++; \ + silk_Timer_sum[ID] += endTime; \ + if( endTime > silk_Timer_max[ID] ) \ + silk_Timer_max[ID] = endTime; \ + if( endTime < silk_Timer_min[ID] ) \ + silk_Timer_min[ID] = endTime; \ + } \ + silk_Timer_depth_ctr--; \ +} +#endif + +#else /* SILK_TIC_TOC */ + +/* define macros as empty strings */ +#define TIC(TAG_NAME) +#define TOC(TAG_NAME) +#define silk_TimerSave(FILE_NAME) + +#endif /* SILK_TIC_TOC */ + + +#if SILK_DEBUG +/************************************/ +/* write data to file for debugging */ +/************************************/ +/* Example: DEBUG_STORE_DATA(testfile.pcm, &RIN[0], 160*sizeof(opus_int16)); */ + +#define silk_NUM_STORES_MAX 100 +extern FILE *silk_debug_store_fp[ silk_NUM_STORES_MAX ]; +extern int silk_debug_store_count; + +/* Faster way of storing the data */ +#define DEBUG_STORE_DATA( FILE_NAME, DATA_PTR, N_BYTES ) { \ + static opus_int init = 0, cnt = 0; \ + static FILE **fp; \ + if (init == 0) { \ + init = 1; \ + cnt = silk_debug_store_count++; \ + silk_debug_store_fp[ cnt ] = fopen(#FILE_NAME, "wb"); \ + } \ + fwrite((DATA_PTR), (N_BYTES), 1, silk_debug_store_fp[ cnt ]); \ +} + +/* Call this at the end of main() */ +#define SILK_DEBUG_STORE_CLOSE_FILES { \ + opus_int i; \ + for( i = 0; i < silk_debug_store_count; i++ ) { \ + fclose( silk_debug_store_fp[ i ] ); \ + } \ +} + +#else /* SILK_DEBUG */ + +/* define macros as empty strings */ +#define DEBUG_STORE_DATA(FILE_NAME, DATA_PTR, N_BYTES) +#define SILK_DEBUG_STORE_CLOSE_FILES + +#endif /* SILK_DEBUG */ + +#ifdef __cplusplus +} +#endif + +#endif /* SILK_DEBUG_H */ diff --git a/drivers/opus/silk/dec_API.c b/drivers/opus/silk/dec_API.c new file mode 100644 index 0000000000..cd72115a20 --- /dev/null +++ b/drivers/opus/silk/dec_API.c @@ -0,0 +1,397 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif +#include "API.h" +#include "silk_main.h" +#include "stack_alloc.h" + +/************************/ +/* Decoder Super Struct */ +/************************/ +typedef struct { + silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ]; + stereo_dec_state sStereo; + opus_int nChannelsAPI; + opus_int nChannelsInternal; + opus_int prev_decode_only_middle; +} silk_decoder; + +/*********************/ +/* Decoder functions */ +/*********************/ + +opus_int silk_Get_Decoder_Size( /* O Returns error code */ + opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ +) +{ + opus_int ret = SILK_NO_ERROR; + + *decSizeBytes = sizeof( silk_decoder ); + + return ret; +} + +/* Reset decoder state */ +opus_int silk_InitDecoder( /* O Returns error code */ + void *decState /* I/O State */ +) +{ + opus_int n, ret = SILK_NO_ERROR; + silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; + + for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { + ret = silk_init_decoder( &channel_state[ n ] ); + } + silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo)); + /* Not strictly needed, but it's cleaner that way */ + ((silk_decoder *)decState)->prev_decode_only_middle = 0; + + return ret; +} + +/* Decode a frame */ +opus_int silk_Decode( /* O Returns error code */ + void* decState, /* I/O State */ + silk_DecControlStruct* decControl, /* I/O Control Structure */ + opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ + opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int16 *samplesOut, /* O Decoded output speech vector */ + opus_int32 *nSamplesOut /* O Number of samples decoded */ +) +{ + opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; + opus_int32 nSamplesOutDec, LBRR_symbol; + opus_int16 *samplesOut1_tmp[ 2 ]; + VARDECL( opus_int16, samplesOut1_tmp_storage ); + VARDECL( opus_int16, samplesOut2_tmp ); + opus_int32 MS_pred_Q13[ 2 ] = { 0 }; + opus_int16 *resample_out_ptr; + silk_decoder *psDec = ( silk_decoder * )decState; + silk_decoder_state *channel_state = psDec->channel_state; + opus_int has_side; + opus_int stereo_to_mono; + SAVE_STACK; + + silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); + + /**********************************/ + /* Test if first frame in payload */ + /**********************************/ + if( newPacketFlag ) { + for( n = 0; n < decControl->nChannelsInternal; n++ ) { + channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ + } + } + + /* If Mono -> Stereo transition in bitstream: init state of second channel */ + if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { + ret += silk_init_decoder( &channel_state[ 1 ] ); + } + + stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && + ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); + + if( channel_state[ 0 ].nFramesDecoded == 0 ) { + for( n = 0; n < decControl->nChannelsInternal; n++ ) { + opus_int fs_kHz_dec; + if( decControl->payloadSize_ms == 0 ) { + /* Assuming packet loss, use 10 ms */ + channel_state[ n ].nFramesPerPacket = 1; + channel_state[ n ].nb_subfr = 2; + } else if( decControl->payloadSize_ms == 10 ) { + channel_state[ n ].nFramesPerPacket = 1; + channel_state[ n ].nb_subfr = 2; + } else if( decControl->payloadSize_ms == 20 ) { + channel_state[ n ].nFramesPerPacket = 1; + channel_state[ n ].nb_subfr = 4; + } else if( decControl->payloadSize_ms == 40 ) { + channel_state[ n ].nFramesPerPacket = 2; + channel_state[ n ].nb_subfr = 4; + } else if( decControl->payloadSize_ms == 60 ) { + channel_state[ n ].nFramesPerPacket = 3; + channel_state[ n ].nb_subfr = 4; + } else { + silk_assert( 0 ); + RESTORE_STACK; + return SILK_DEC_INVALID_FRAME_SIZE; + } + fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; + if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { + silk_assert( 0 ); + RESTORE_STACK; + return SILK_DEC_INVALID_SAMPLING_FREQUENCY; + } + ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); + } + } + + if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { + silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); + silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); + silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); + } + psDec->nChannelsAPI = decControl->nChannelsAPI; + psDec->nChannelsInternal = decControl->nChannelsInternal; + + if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { + ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; + RESTORE_STACK; + return( ret ); + } + + if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { + /* First decoder call for this payload */ + /* Decode VAD flags and LBRR flag */ + for( n = 0; n < decControl->nChannelsInternal; n++ ) { + for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { + channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); + } + channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); + } + /* Decode LBRR flags */ + for( n = 0; n < decControl->nChannelsInternal; n++ ) { + silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); + if( channel_state[ n ].LBRR_flag ) { + if( channel_state[ n ].nFramesPerPacket == 1 ) { + channel_state[ n ].LBRR_flags[ 0 ] = 1; + } else { + LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; + for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { + channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; + } + } + } + } + + if( lostFlag == FLAG_DECODE_NORMAL ) { + /* Regular decoding: skip all LBRR data */ + for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { + for( n = 0; n < decControl->nChannelsInternal; n++ ) { + if( channel_state[ n ].LBRR_flags[ i ] ) { + opus_int pulses[ MAX_FRAME_LENGTH ]; + opus_int condCoding; + + if( decControl->nChannelsInternal == 2 && n == 0 ) { + silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); + if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { + silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); + } + } + /* Use conditional coding if previous frame available */ + if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { + condCoding = CODE_CONDITIONALLY; + } else { + condCoding = CODE_INDEPENDENTLY; + } + silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); + silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, + channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); + } + } + } + } + } + + /* Get MS predictor index */ + if( decControl->nChannelsInternal == 2 ) { + if( lostFlag == FLAG_DECODE_NORMAL || + ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) + { + silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); + /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ + if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || + ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) + { + silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); + } else { + decode_only_middle = 0; + } + } else { + for( n = 0; n < 2; n++ ) { + MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; + } + } + } + + /* Reset side channel decoder prediction memory for first frame with side coding */ + if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { + silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); + silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); + psDec->channel_state[ 1 ].lagPrev = 100; + psDec->channel_state[ 1 ].LastGainIndex = 10; + psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; + psDec->channel_state[ 1 ].first_frame_after_reset = 1; + } + + ALLOC( samplesOut1_tmp_storage, + decControl->nChannelsInternal*( + channel_state[ 0 ].frame_length + 2 ), + opus_int16 ); + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage + + channel_state[ 0 ].frame_length + 2; + + if( lostFlag == FLAG_DECODE_NORMAL ) { + has_side = !decode_only_middle; + } else { + has_side = !psDec->prev_decode_only_middle + || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); + } + /* Call decoder for one frame */ + for( n = 0; n < decControl->nChannelsInternal; n++ ) { + if( n == 0 || has_side ) { + opus_int FrameIndex; + opus_int condCoding; + + FrameIndex = channel_state[ 0 ].nFramesDecoded - n; + /* Use independent coding if no previous frame available */ + if( FrameIndex <= 0 ) { + condCoding = CODE_INDEPENDENTLY; + } else if( lostFlag == FLAG_DECODE_LBRR ) { + condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; + } else if( n > 0 && psDec->prev_decode_only_middle ) { + /* If we skipped a side frame in this packet, we don't + need LTP scaling; the LTP state is well-defined. */ + condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; + } else { + condCoding = CODE_CONDITIONALLY; + } + ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding); + } else { + silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); + } + channel_state[ n ].nFramesDecoded++; + } + + if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { + /* Convert Mid/Side to Left/Right */ + silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); + } else { + /* Buffering */ + silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); + silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); + } + + /* Number of output samples */ + *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); + + /* Set up pointers to temp buffers */ + ALLOC( samplesOut2_tmp, + decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 ); + if( decControl->nChannelsAPI == 2 ) { + resample_out_ptr = samplesOut2_tmp; + } else { + resample_out_ptr = samplesOut; + } + + for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { + + /* Resample decoded signal to API_sampleRate */ + ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); + + /* Interleave if stereo output and stereo stream */ + if( decControl->nChannelsAPI == 2 ) { + for( i = 0; i < *nSamplesOut; i++ ) { + samplesOut[ n + 2 * i ] = resample_out_ptr[ i ]; + } + } + } + + /* Create two channel output from mono stream */ + if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { + if ( stereo_to_mono ){ + /* Resample right channel for newly collapsed stereo just in case + we weren't doing collapsing when switching to mono */ + ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); + + for( i = 0; i < *nSamplesOut; i++ ) { + samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ]; + } + } else { + for( i = 0; i < *nSamplesOut; i++ ) { + samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; + } + } + } + + /* Export pitch lag, measured at 48 kHz sampling rate */ + if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { + int mult_tab[ 3 ] = { 6, 4, 3 }; + decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; + } else { + decControl->prevPitchLag = 0; + } + + if( lostFlag == FLAG_PACKET_LOST ) { + /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" + if we lose packets when the energy is going down */ + for ( i = 0; i < psDec->nChannelsInternal; i++ ) + psDec->channel_state[ i ].LastGainIndex = 10; + } else { + psDec->prev_decode_only_middle = decode_only_middle; + } + RESTORE_STACK; + return ret; +} + +#if 0 +/* Getting table of contents for a packet */ +opus_int silk_get_TOC( + const opus_uint8 *payload, /* I Payload data */ + const opus_int nBytesIn, /* I Number of input bytes */ + const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ + silk_TOC_struct *Silk_TOC /* O Type of content */ +) +{ + opus_int i, flags, ret = SILK_NO_ERROR; + + if( nBytesIn < 1 ) { + return -1; + } + if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) { + return -1; + } + + silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) ); + + /* For stereo, extract the flags for the mid channel */ + flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 ); + + Silk_TOC->inbandFECFlag = flags & 1; + for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) { + flags = silk_RSHIFT( flags, 1 ); + Silk_TOC->VADFlags[ i ] = flags & 1; + Silk_TOC->VADFlag |= flags & 1; + } + + return ret; +} +#endif diff --git a/drivers/opus/silk/decode_core.c b/drivers/opus/silk/decode_core.c new file mode 100644 index 0000000000..8f801ea7ad --- /dev/null +++ b/drivers/opus/silk/decode_core.c @@ -0,0 +1,238 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "stack_alloc.h" + +/**********************************************************/ +/* Core decoder. Performs inverse NSQ operation LTP + LPC */ +/**********************************************************/ +void silk_decode_core( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I Decoder control */ + opus_int16 xq[], /* O Decoded speech */ + const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ +) +{ + opus_int i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType; + opus_int16 *A_Q12, *B_Q14, *pxq, A_Q12_tmp[ MAX_LPC_ORDER ]; + VARDECL( opus_int16, sLTP ); + VARDECL( opus_int32, sLTP_Q15 ); + opus_int32 LTP_pred_Q13, LPC_pred_Q10, Gain_Q10, inv_gain_Q31, gain_adj_Q16, rand_seed, offset_Q10; + opus_int32 *pred_lag_ptr, *pexc_Q14, *pres_Q14; + VARDECL( opus_int32, res_Q14 ); + VARDECL( opus_int32, sLPC_Q14 ); + SAVE_STACK; + + silk_assert( psDec->prev_gain_Q16 != 0 ); + + ALLOC( sLTP, psDec->ltp_mem_length, opus_int16 ); + ALLOC( sLTP_Q15, psDec->ltp_mem_length + psDec->frame_length, opus_int32 ); + ALLOC( res_Q14, psDec->subfr_length, opus_int32 ); + ALLOC( sLPC_Q14, psDec->subfr_length + MAX_LPC_ORDER, opus_int32 ); + + offset_Q10 = silk_Quantization_Offsets_Q10[ psDec->indices.signalType >> 1 ][ psDec->indices.quantOffsetType ]; + + if( psDec->indices.NLSFInterpCoef_Q2 < 1 << 2 ) { + NLSF_interpolation_flag = 1; + } else { + NLSF_interpolation_flag = 0; + } + + /* Decode excitation */ + rand_seed = psDec->indices.Seed; + for( i = 0; i < psDec->frame_length; i++ ) { + rand_seed = silk_RAND( rand_seed ); + psDec->exc_Q14[ i ] = silk_LSHIFT( (opus_int32)pulses[ i ], 14 ); + if( psDec->exc_Q14[ i ] > 0 ) { + psDec->exc_Q14[ i ] -= QUANT_LEVEL_ADJUST_Q10 << 4; + } else + if( psDec->exc_Q14[ i ] < 0 ) { + psDec->exc_Q14[ i ] += QUANT_LEVEL_ADJUST_Q10 << 4; + } + psDec->exc_Q14[ i ] += offset_Q10 << 4; + if( rand_seed < 0 ) { + psDec->exc_Q14[ i ] = -psDec->exc_Q14[ i ]; + } + + rand_seed = silk_ADD32_ovflw( rand_seed, pulses[ i ] ); + } + + /* Copy LPC state */ + silk_memcpy( sLPC_Q14, psDec->sLPC_Q14_buf, MAX_LPC_ORDER * sizeof( opus_int32 ) ); + + pexc_Q14 = psDec->exc_Q14; + pxq = xq; + sLTP_buf_idx = psDec->ltp_mem_length; + /* Loop over subframes */ + for( k = 0; k < psDec->nb_subfr; k++ ) { + pres_Q14 = res_Q14; + A_Q12 = psDecCtrl->PredCoef_Q12[ k >> 1 ]; + + /* Preload LPC coeficients to array on stack. Gives small performance gain */ + silk_memcpy( A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof( opus_int16 ) ); + B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ]; + signalType = psDec->indices.signalType; + + Gain_Q10 = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 ); + inv_gain_Q31 = silk_INVERSE32_varQ( psDecCtrl->Gains_Q16[ k ], 47 ); + + /* Calculate gain adjustment factor */ + if( psDecCtrl->Gains_Q16[ k ] != psDec->prev_gain_Q16 ) { + gain_adj_Q16 = silk_DIV32_varQ( psDec->prev_gain_Q16, psDecCtrl->Gains_Q16[ k ], 16 ); + + /* Scale short term state */ + for( i = 0; i < MAX_LPC_ORDER; i++ ) { + sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, sLPC_Q14[ i ] ); + } + } else { + gain_adj_Q16 = (opus_int32)1 << 16; + } + + /* Save inv_gain */ + silk_assert( inv_gain_Q31 != 0 ); + psDec->prev_gain_Q16 = psDecCtrl->Gains_Q16[ k ]; + + /* Avoid abrupt transition from voiced PLC to unvoiced normal decoding */ + if( psDec->lossCnt && psDec->prevSignalType == TYPE_VOICED && + psDec->indices.signalType != TYPE_VOICED && k < MAX_NB_SUBFR/2 ) { + + silk_memset( B_Q14, 0, LTP_ORDER * sizeof( opus_int16 ) ); + B_Q14[ LTP_ORDER/2 ] = SILK_FIX_CONST( 0.25, 14 ); + + signalType = TYPE_VOICED; + psDecCtrl->pitchL[ k ] = psDec->lagPrev; + } + + if( signalType == TYPE_VOICED ) { + /* Voiced */ + lag = psDecCtrl->pitchL[ k ]; + + /* Re-whitening */ + if( k == 0 || ( k == 2 && NLSF_interpolation_flag ) ) { + /* Rewhiten with new A coefs */ + start_idx = psDec->ltp_mem_length - lag - psDec->LPC_order - LTP_ORDER / 2; + silk_assert( start_idx > 0 ); + + if( k == 2 ) { + silk_memcpy( &psDec->outBuf[ psDec->ltp_mem_length ], xq, 2 * psDec->subfr_length * sizeof( opus_int16 ) ); + } + + silk_LPC_analysis_filter( &sLTP[ start_idx ], &psDec->outBuf[ start_idx + k * psDec->subfr_length ], + A_Q12, psDec->ltp_mem_length - start_idx, psDec->LPC_order ); + + /* After rewhitening the LTP state is unscaled */ + if( k == 0 ) { + /* Do LTP downscaling to reduce inter-packet dependency */ + inv_gain_Q31 = silk_LSHIFT( silk_SMULWB( inv_gain_Q31, psDecCtrl->LTP_scale_Q14 ), 2 ); + } + for( i = 0; i < lag + LTP_ORDER/2; i++ ) { + sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWB( inv_gain_Q31, sLTP[ psDec->ltp_mem_length - i - 1 ] ); + } + } else { + /* Update LTP state when Gain changes */ + if( gain_adj_Q16 != (opus_int32)1 << 16 ) { + for( i = 0; i < lag + LTP_ORDER/2; i++ ) { + sLTP_Q15[ sLTP_buf_idx - i - 1 ] = silk_SMULWW( gain_adj_Q16, sLTP_Q15[ sLTP_buf_idx - i - 1 ] ); + } + } + } + } + + /* Long-term prediction */ + if( signalType == TYPE_VOICED ) { + /* Set up pointer */ + pred_lag_ptr = &sLTP_Q15[ sLTP_buf_idx - lag + LTP_ORDER / 2 ]; + for( i = 0; i < psDec->subfr_length; i++ ) { + /* Unrolled loop */ + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LTP_pred_Q13 = 2; + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ 0 ], B_Q14[ 0 ] ); + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -1 ], B_Q14[ 1 ] ); + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -2 ], B_Q14[ 2 ] ); + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -3 ], B_Q14[ 3 ] ); + LTP_pred_Q13 = silk_SMLAWB( LTP_pred_Q13, pred_lag_ptr[ -4 ], B_Q14[ 4 ] ); + pred_lag_ptr++; + + /* Generate LPC excitation */ + pres_Q14[ i ] = silk_ADD_LSHIFT32( pexc_Q14[ i ], LTP_pred_Q13, 1 ); + + /* Update states */ + sLTP_Q15[ sLTP_buf_idx ] = silk_LSHIFT( pres_Q14[ i ], 1 ); + sLTP_buf_idx++; + } + } else { + pres_Q14 = pexc_Q14; + } + + for( i = 0; i < psDec->subfr_length; i++ ) { + /* Short-term prediction */ + silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); + /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ + LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12_tmp[ 0 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 2 ], A_Q12_tmp[ 1 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 3 ], A_Q12_tmp[ 2 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 4 ], A_Q12_tmp[ 3 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 5 ], A_Q12_tmp[ 4 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 6 ], A_Q12_tmp[ 5 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 7 ], A_Q12_tmp[ 6 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 8 ], A_Q12_tmp[ 7 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 9 ], A_Q12_tmp[ 8 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12_tmp[ 9 ] ); + if( psDec->LPC_order == 16 ) { + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12_tmp[ 10 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12_tmp[ 11 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12_tmp[ 12 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12_tmp[ 13 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12_tmp[ 14 ] ); + LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, sLPC_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12_tmp[ 15 ] ); + } + + /* Add prediction to LPC excitation */ + sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( pres_Q14[ i ], LPC_pred_Q10, 4 ); + + /* Scale with gain */ + pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) ); + } + + /* DEBUG_STORE_DATA( dec.pcm, pxq, psDec->subfr_length * sizeof( opus_int16 ) ) */ + + /* Update LPC filter state */ + silk_memcpy( sLPC_Q14, &sLPC_Q14[ psDec->subfr_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); + pexc_Q14 += psDec->subfr_length; + pxq += psDec->subfr_length; + } + + /* Save LPC state */ + silk_memcpy( psDec->sLPC_Q14_buf, sLPC_Q14, MAX_LPC_ORDER * sizeof( opus_int32 ) ); + RESTORE_STACK; +} diff --git a/drivers/opus/silk/decode_frame.c b/drivers/opus/silk/decode_frame.c new file mode 100644 index 0000000000..38500227c2 --- /dev/null +++ b/drivers/opus/silk/decode_frame.c @@ -0,0 +1,128 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "stack_alloc.h" +#include "PLC.h" + +/****************/ +/* Decode frame */ +/****************/ +opus_int silk_decode_frame( + silk_decoder_state *psDec, /* I/O Pointer to Silk decoder state */ + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int16 pOut[], /* O Pointer to output speech frame */ + opus_int32 *pN, /* O Pointer to size of output frame */ + opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ + opus_int condCoding /* I The type of conditional coding to use */ +) +{ + VARDECL( silk_decoder_control, psDecCtrl ); + opus_int L, mv_len, ret = 0; + VARDECL( opus_int, pulses ); + SAVE_STACK; + + L = psDec->frame_length; + ALLOC( psDecCtrl, 1, silk_decoder_control ); + ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) & + ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int ); + psDecCtrl->LTP_scale_Q14 = 0; + + /* Safety checks */ + silk_assert( L > 0 && L <= MAX_FRAME_LENGTH ); + + if( lostFlag == FLAG_DECODE_NORMAL || + ( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) ) + { + /*********************************************/ + /* Decode quantization indices of side info */ + /*********************************************/ + silk_decode_indices( psDec, psRangeDec, psDec->nFramesDecoded, lostFlag, condCoding ); + + /*********************************************/ + /* Decode quantization indices of excitation */ + /*********************************************/ + silk_decode_pulses( psRangeDec, pulses, psDec->indices.signalType, + psDec->indices.quantOffsetType, psDec->frame_length ); + + /********************************************/ + /* Decode parameters and pulse signal */ + /********************************************/ + silk_decode_parameters( psDec, psDecCtrl, condCoding ); + + /********************************************************/ + /* Run inverse NSQ */ + /********************************************************/ + silk_decode_core( psDec, psDecCtrl, pOut, pulses ); + + /********************************************************/ + /* Update PLC state */ + /********************************************************/ + silk_PLC( psDec, psDecCtrl, pOut, 0 ); + + psDec->lossCnt = 0; + psDec->prevSignalType = psDec->indices.signalType; + silk_assert( psDec->prevSignalType >= 0 && psDec->prevSignalType <= 2 ); + + /* A frame has been decoded without errors */ + psDec->first_frame_after_reset = 0; + } else { + /* Handle packet loss by extrapolation */ + silk_PLC( psDec, psDecCtrl, pOut, 1 ); + } + + /*************************/ + /* Update output buffer. */ + /*************************/ + silk_assert( psDec->ltp_mem_length >= psDec->frame_length ); + mv_len = psDec->ltp_mem_length - psDec->frame_length; + silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) ); + silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) ); + + /****************************************************************/ + /* Ensure smooth connection of extrapolated and good frames */ + /****************************************************************/ + silk_PLC_glue_frames( psDec, pOut, L ); + + /************************************************/ + /* Comfort noise generation / estimation */ + /************************************************/ + silk_CNG( psDec, psDecCtrl, pOut, L ); + + /* Update some decoder state variables */ + psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ]; + + /* Set output frame length */ + *pN = L; + + RESTORE_STACK; + return ret; +} diff --git a/drivers/opus/silk/decode_indices.c b/drivers/opus/silk/decode_indices.c new file mode 100644 index 0000000000..c2aaad2606 --- /dev/null +++ b/drivers/opus/silk/decode_indices.c @@ -0,0 +1,151 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Decode side-information parameters from payload */ +void silk_decode_indices( + silk_decoder_state *psDec, /* I/O State */ + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int FrameIndex, /* I Frame number */ + opus_int decode_LBRR, /* I Flag indicating LBRR data is being decoded */ + opus_int condCoding /* I The type of conditional coding to use */ +) +{ + opus_int i, k, Ix; + opus_int decode_absolute_lagIndex, delta_lagIndex; + opus_int16 ec_ix[ MAX_LPC_ORDER ]; + opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; + + /*******************************************/ + /* Decode signal type and quantizer offset */ + /*******************************************/ + if( decode_LBRR || psDec->VAD_flags[ FrameIndex ] ) { + Ix = ec_dec_icdf( psRangeDec, silk_type_offset_VAD_iCDF, 8 ) + 2; + } else { + Ix = ec_dec_icdf( psRangeDec, silk_type_offset_no_VAD_iCDF, 8 ); + } + psDec->indices.signalType = (opus_int8)silk_RSHIFT( Ix, 1 ); + psDec->indices.quantOffsetType = (opus_int8)( Ix & 1 ); + + /****************/ + /* Decode gains */ + /****************/ + /* First subframe */ + if( condCoding == CODE_CONDITIONALLY ) { + /* Conditional coding */ + psDec->indices.GainsIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 ); + } else { + /* Independent coding, in two stages: MSB bits followed by 3 LSBs */ + psDec->indices.GainsIndices[ 0 ] = (opus_int8)silk_LSHIFT( ec_dec_icdf( psRangeDec, silk_gain_iCDF[ psDec->indices.signalType ], 8 ), 3 ); + psDec->indices.GainsIndices[ 0 ] += (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform8_iCDF, 8 ); + } + + /* Remaining subframes */ + for( i = 1; i < psDec->nb_subfr; i++ ) { + psDec->indices.GainsIndices[ i ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_delta_gain_iCDF, 8 ); + } + + /**********************/ + /* Decode LSF Indices */ + /**********************/ + psDec->indices.NLSFIndices[ 0 ] = (opus_int8)ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->CB1_iCDF[ ( psDec->indices.signalType >> 1 ) * psDec->psNLSF_CB->nVectors ], 8 ); + silk_NLSF_unpack( ec_ix, pred_Q8, psDec->psNLSF_CB, psDec->indices.NLSFIndices[ 0 ] ); + silk_assert( psDec->psNLSF_CB->order == psDec->LPC_order ); + for( i = 0; i < psDec->psNLSF_CB->order; i++ ) { + Ix = ec_dec_icdf( psRangeDec, &psDec->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); + if( Ix == 0 ) { + Ix -= ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 ); + } else if( Ix == 2 * NLSF_QUANT_MAX_AMPLITUDE ) { + Ix += ec_dec_icdf( psRangeDec, silk_NLSF_EXT_iCDF, 8 ); + } + psDec->indices.NLSFIndices[ i+1 ] = (opus_int8)( Ix - NLSF_QUANT_MAX_AMPLITUDE ); + } + + /* Decode LSF interpolation factor */ + if( psDec->nb_subfr == MAX_NB_SUBFR ) { + psDec->indices.NLSFInterpCoef_Q2 = (opus_int8)ec_dec_icdf( psRangeDec, silk_NLSF_interpolation_factor_iCDF, 8 ); + } else { + psDec->indices.NLSFInterpCoef_Q2 = 4; + } + + if( psDec->indices.signalType == TYPE_VOICED ) + { + /*********************/ + /* Decode pitch lags */ + /*********************/ + /* Get lag index */ + decode_absolute_lagIndex = 1; + if( condCoding == CODE_CONDITIONALLY && psDec->ec_prevSignalType == TYPE_VOICED ) { + /* Decode Delta index */ + delta_lagIndex = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_delta_iCDF, 8 ); + if( delta_lagIndex > 0 ) { + delta_lagIndex = delta_lagIndex - 9; + psDec->indices.lagIndex = (opus_int16)( psDec->ec_prevLagIndex + delta_lagIndex ); + decode_absolute_lagIndex = 0; + } + } + if( decode_absolute_lagIndex ) { + /* Absolute decoding */ + psDec->indices.lagIndex = (opus_int16)ec_dec_icdf( psRangeDec, silk_pitch_lag_iCDF, 8 ) * silk_RSHIFT( psDec->fs_kHz, 1 ); + psDec->indices.lagIndex += (opus_int16)ec_dec_icdf( psRangeDec, psDec->pitch_lag_low_bits_iCDF, 8 ); + } + psDec->ec_prevLagIndex = psDec->indices.lagIndex; + + /* Get countour index */ + psDec->indices.contourIndex = (opus_int8)ec_dec_icdf( psRangeDec, psDec->pitch_contour_iCDF, 8 ); + + /********************/ + /* Decode LTP gains */ + /********************/ + /* Decode PERIndex value */ + psDec->indices.PERIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_per_index_iCDF, 8 ); + + for( k = 0; k < psDec->nb_subfr; k++ ) { + psDec->indices.LTPIndex[ k ] = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTP_gain_iCDF_ptrs[ psDec->indices.PERIndex ], 8 ); + } + + /**********************/ + /* Decode LTP scaling */ + /**********************/ + if( condCoding == CODE_INDEPENDENTLY ) { + psDec->indices.LTP_scaleIndex = (opus_int8)ec_dec_icdf( psRangeDec, silk_LTPscale_iCDF, 8 ); + } else { + psDec->indices.LTP_scaleIndex = 0; + } + } + psDec->ec_prevSignalType = psDec->indices.signalType; + + /***************/ + /* Decode seed */ + /***************/ + psDec->indices.Seed = (opus_int8)ec_dec_icdf( psRangeDec, silk_uniform4_iCDF, 8 ); +} diff --git a/drivers/opus/silk/decode_parameters.c b/drivers/opus/silk/decode_parameters.c new file mode 100644 index 0000000000..72df4fcdb2 --- /dev/null +++ b/drivers/opus/silk/decode_parameters.c @@ -0,0 +1,115 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Decode parameters from payload */ +void silk_decode_parameters( + silk_decoder_state *psDec, /* I/O State */ + silk_decoder_control *psDecCtrl, /* I/O Decoder control */ + opus_int condCoding /* I The type of conditional coding to use */ +) +{ + opus_int i, k, Ix; + opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], pNLSF0_Q15[ MAX_LPC_ORDER ]; + const opus_int8 *cbk_ptr_Q7; + + /* Dequant Gains */ + silk_gains_dequant( psDecCtrl->Gains_Q16, psDec->indices.GainsIndices, + &psDec->LastGainIndex, condCoding == CODE_CONDITIONALLY, psDec->nb_subfr ); + + /****************/ + /* Decode NLSFs */ + /****************/ + silk_NLSF_decode( pNLSF_Q15, psDec->indices.NLSFIndices, psDec->psNLSF_CB ); + + /* Convert NLSF parameters to AR prediction filter coefficients */ + silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 1 ], pNLSF_Q15, psDec->LPC_order ); + + /* If just reset, e.g., because internal Fs changed, do not allow interpolation */ + /* improves the case of packet loss in the first frame after a switch */ + if( psDec->first_frame_after_reset == 1 ) { + psDec->indices.NLSFInterpCoef_Q2 = 4; + } + + if( psDec->indices.NLSFInterpCoef_Q2 < 4 ) { + /* Calculation of the interpolated NLSF0 vector from the interpolation factor, */ + /* the previous NLSF1, and the current NLSF1 */ + for( i = 0; i < psDec->LPC_order; i++ ) { + pNLSF0_Q15[ i ] = psDec->prevNLSF_Q15[ i ] + silk_RSHIFT( silk_MUL( psDec->indices.NLSFInterpCoef_Q2, + pNLSF_Q15[ i ] - psDec->prevNLSF_Q15[ i ] ), 2 ); + } + + /* Convert NLSF parameters to AR prediction filter coefficients */ + silk_NLSF2A( psDecCtrl->PredCoef_Q12[ 0 ], pNLSF0_Q15, psDec->LPC_order ); + } else { + /* Copy LPC coefficients for first half from second half */ + silk_memcpy( psDecCtrl->PredCoef_Q12[ 0 ], psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order * sizeof( opus_int16 ) ); + } + + silk_memcpy( psDec->prevNLSF_Q15, pNLSF_Q15, psDec->LPC_order * sizeof( opus_int16 ) ); + + /* After a packet loss do BWE of LPC coefs */ + if( psDec->lossCnt ) { + silk_bwexpander( psDecCtrl->PredCoef_Q12[ 0 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 ); + silk_bwexpander( psDecCtrl->PredCoef_Q12[ 1 ], psDec->LPC_order, BWE_AFTER_LOSS_Q16 ); + } + + if( psDec->indices.signalType == TYPE_VOICED ) { + /*********************/ + /* Decode pitch lags */ + /*********************/ + + /* Decode pitch values */ + silk_decode_pitch( psDec->indices.lagIndex, psDec->indices.contourIndex, psDecCtrl->pitchL, psDec->fs_kHz, psDec->nb_subfr ); + + /* Decode Codebook Index */ + cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ psDec->indices.PERIndex ]; /* set pointer to start of codebook */ + + for( k = 0; k < psDec->nb_subfr; k++ ) { + Ix = psDec->indices.LTPIndex[ k ]; + for( i = 0; i < LTP_ORDER; i++ ) { + psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER + i ] = silk_LSHIFT( cbk_ptr_Q7[ Ix * LTP_ORDER + i ], 7 ); + } + } + + /**********************/ + /* Decode LTP scaling */ + /**********************/ + Ix = psDec->indices.LTP_scaleIndex; + psDecCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ Ix ]; + } else { + silk_memset( psDecCtrl->pitchL, 0, psDec->nb_subfr * sizeof( opus_int ) ); + silk_memset( psDecCtrl->LTPCoef_Q14, 0, LTP_ORDER * psDec->nb_subfr * sizeof( opus_int16 ) ); + psDec->indices.PERIndex = 0; + psDecCtrl->LTP_scale_Q14 = 0; + } +} diff --git a/drivers/opus/silk/decode_pitch.c b/drivers/opus/silk/decode_pitch.c new file mode 100644 index 0000000000..3e1dd2d35b --- /dev/null +++ b/drivers/opus/silk/decode_pitch.c @@ -0,0 +1,77 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/*********************************************************** +* Pitch analyser function +********************************************************** */ +#include "SigProc_FIX.h" +#include "pitch_est_defines.h" + +void silk_decode_pitch( + opus_int16 lagIndex, /* I */ + opus_int8 contourIndex, /* O */ + opus_int pitch_lags[], /* O 4 pitch values */ + const opus_int Fs_kHz, /* I sampling frequency (kHz) */ + const opus_int nb_subfr /* I number of sub frames */ +) +{ + opus_int lag, k, min_lag, max_lag, cbk_size; + const opus_int8 *Lag_CB_ptr; + + if( Fs_kHz == 8 ) { + if( nb_subfr == PE_MAX_NB_SUBFR ) { + Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; + cbk_size = PE_NB_CBKS_STAGE2_EXT; + } else { + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 ); + Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; + cbk_size = PE_NB_CBKS_STAGE2_10MS; + } + } else { + if( nb_subfr == PE_MAX_NB_SUBFR ) { + Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; + cbk_size = PE_NB_CBKS_STAGE3_MAX; + } else { + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1 ); + Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; + cbk_size = PE_NB_CBKS_STAGE3_10MS; + } + } + + min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz ); + max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz ); + lag = min_lag + lagIndex; + + for( k = 0; k < nb_subfr; k++ ) { + pitch_lags[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, contourIndex, cbk_size ); + pitch_lags[ k ] = silk_LIMIT( pitch_lags[ k ], min_lag, max_lag ); + } +} diff --git a/drivers/opus/silk/decode_pulses.c b/drivers/opus/silk/decode_pulses.c new file mode 100644 index 0000000000..13772f8a57 --- /dev/null +++ b/drivers/opus/silk/decode_pulses.c @@ -0,0 +1,115 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/*********************************************/ +/* Decode quantization indices of excitation */ +/*********************************************/ +void silk_decode_pulses( + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int pulses[], /* O Excitation signal */ + const opus_int signalType, /* I Sigtype */ + const opus_int quantOffsetType, /* I quantOffsetType */ + const opus_int frame_length /* I Frame length */ +) +{ + opus_int i, j, k, iter, abs_q, nLS, RateLevelIndex; + opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ]; + opus_int *pulses_ptr; + const opus_uint8 *cdf_ptr; + + /*********************/ + /* Decode rate level */ + /*********************/ + RateLevelIndex = ec_dec_icdf( psRangeDec, silk_rate_levels_iCDF[ signalType >> 1 ], 8 ); + + /* Calculate number of shell blocks */ + silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH ); + iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH ); + if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) { + silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */ + iter++; + } + + /***************************************************/ + /* Sum-Weighted-Pulses Decoding */ + /***************************************************/ + cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ]; + for( i = 0; i < iter; i++ ) { + nLshifts[ i ] = 0; + sum_pulses[ i ] = ec_dec_icdf( psRangeDec, cdf_ptr, 8 ); + + /* LSB indication */ + while( sum_pulses[ i ] == MAX_PULSES + 1 ) { + nLshifts[ i ]++; + /* When we've already got 10 LSBs, we shift the table to not allow (MAX_PULSES + 1) */ + sum_pulses[ i ] = ec_dec_icdf( psRangeDec, + silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1] + ( nLshifts[ i ] == 10 ), 8 ); + } + } + + /***************************************************/ + /* Shell decoding */ + /***************************************************/ + for( i = 0; i < iter; i++ ) { + if( sum_pulses[ i ] > 0 ) { + silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] ); + } else { + silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) ); + } + } + + /***************************************************/ + /* LSB Decoding */ + /***************************************************/ + for( i = 0; i < iter; i++ ) { + if( nLshifts[ i ] > 0 ) { + nLS = nLshifts[ i ]; + pulses_ptr = &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ]; + for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) { + abs_q = pulses_ptr[ k ]; + for( j = 0; j < nLS; j++ ) { + abs_q = silk_LSHIFT( abs_q, 1 ); + abs_q += ec_dec_icdf( psRangeDec, silk_lsb_iCDF, 8 ); + } + pulses_ptr[ k ] = abs_q; + } + /* Mark the number of pulses non-zero for sign decoding. */ + sum_pulses[ i ] |= nLS << 5; + } + } + + /****************************************/ + /* Decode and add signs to pulse signal */ + /****************************************/ + silk_decode_signs( psRangeDec, pulses, frame_length, signalType, quantOffsetType, sum_pulses ); +} diff --git a/drivers/opus/silk/decoder_set_fs.c b/drivers/opus/silk/decoder_set_fs.c new file mode 100644 index 0000000000..6d2de56647 --- /dev/null +++ b/drivers/opus/silk/decoder_set_fs.c @@ -0,0 +1,108 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Set decoder sampling rate */ +opus_int silk_decoder_set_fs( + silk_decoder_state *psDec, /* I/O Decoder state pointer */ + opus_int fs_kHz, /* I Sampling frequency (kHz) */ + opus_int32 fs_API_Hz /* I API Sampling frequency (Hz) */ +) +{ + opus_int frame_length, ret = 0; + + silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); + silk_assert( psDec->nb_subfr == MAX_NB_SUBFR || psDec->nb_subfr == MAX_NB_SUBFR/2 ); + + /* New (sub)frame length */ + psDec->subfr_length = silk_SMULBB( SUB_FRAME_LENGTH_MS, fs_kHz ); + frame_length = silk_SMULBB( psDec->nb_subfr, psDec->subfr_length ); + + /* Initialize resampler when switching internal or external sampling frequency */ + if( psDec->fs_kHz != fs_kHz || psDec->fs_API_hz != fs_API_Hz ) { + /* Initialize the resampler for dec_API.c preparing resampling from fs_kHz to API_fs_Hz */ + ret += silk_resampler_init( &psDec->resampler_state, silk_SMULBB( fs_kHz, 1000 ), fs_API_Hz, 0 ); + + psDec->fs_API_hz = fs_API_Hz; + } + + if( psDec->fs_kHz != fs_kHz || frame_length != psDec->frame_length ) { + if( fs_kHz == 8 ) { + if( psDec->nb_subfr == MAX_NB_SUBFR ) { + psDec->pitch_contour_iCDF = silk_pitch_contour_NB_iCDF; + } else { + psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_NB_iCDF; + } + } else { + if( psDec->nb_subfr == MAX_NB_SUBFR ) { + psDec->pitch_contour_iCDF = silk_pitch_contour_iCDF; + } else { + psDec->pitch_contour_iCDF = silk_pitch_contour_10_ms_iCDF; + } + } + if( psDec->fs_kHz != fs_kHz ) { + psDec->ltp_mem_length = silk_SMULBB( LTP_MEM_LENGTH_MS, fs_kHz ); + if( fs_kHz == 8 || fs_kHz == 12 ) { + psDec->LPC_order = MIN_LPC_ORDER; + psDec->psNLSF_CB = &silk_NLSF_CB_NB_MB; + } else { + psDec->LPC_order = MAX_LPC_ORDER; + psDec->psNLSF_CB = &silk_NLSF_CB_WB; + } + if( fs_kHz == 16 ) { + psDec->pitch_lag_low_bits_iCDF = silk_uniform8_iCDF; + } else if( fs_kHz == 12 ) { + psDec->pitch_lag_low_bits_iCDF = silk_uniform6_iCDF; + } else if( fs_kHz == 8 ) { + psDec->pitch_lag_low_bits_iCDF = silk_uniform4_iCDF; + } else { + /* unsupported sampling rate */ + silk_assert( 0 ); + } + psDec->first_frame_after_reset = 1; + psDec->lagPrev = 100; + psDec->LastGainIndex = 10; + psDec->prevSignalType = TYPE_NO_VOICE_ACTIVITY; + silk_memset( psDec->outBuf, 0, sizeof(psDec->outBuf)); + silk_memset( psDec->sLPC_Q14_buf, 0, sizeof(psDec->sLPC_Q14_buf) ); + } + + psDec->fs_kHz = fs_kHz; + psDec->frame_length = frame_length; + } + + /* Check that settings are valid */ + silk_assert( psDec->frame_length > 0 && psDec->frame_length <= MAX_FRAME_LENGTH ); + + return ret; +} + diff --git a/drivers/opus/silk/define.h b/drivers/opus/silk/define.h new file mode 100644 index 0000000000..c47aca9f58 --- /dev/null +++ b/drivers/opus/silk/define.h @@ -0,0 +1,235 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_DEFINE_H +#define SILK_DEFINE_H + +#include "errors.h" +#include "typedef.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* Max number of encoder channels (1/2) */ +#define ENCODER_NUM_CHANNELS 2 +/* Number of decoder channels (1/2) */ +#define DECODER_NUM_CHANNELS 2 + +#define MAX_FRAMES_PER_PACKET 3 + +/* Limits on bitrate */ +#define MIN_TARGET_RATE_BPS 5000 +#define MAX_TARGET_RATE_BPS 80000 +#define TARGET_RATE_TAB_SZ 8 + +/* LBRR thresholds */ +#define LBRR_NB_MIN_RATE_BPS 12000 +#define LBRR_MB_MIN_RATE_BPS 14000 +#define LBRR_WB_MIN_RATE_BPS 16000 + +/* DTX settings */ +#define NB_SPEECH_FRAMES_BEFORE_DTX 10 /* eq 200 ms */ +#define MAX_CONSECUTIVE_DTX 20 /* eq 400 ms */ + +/* Maximum sampling frequency */ +#define MAX_FS_KHZ 16 +#define MAX_API_FS_KHZ 48 + +/* Signal types */ +#define TYPE_NO_VOICE_ACTIVITY 0 +#define TYPE_UNVOICED 1 +#define TYPE_VOICED 2 + +/* Conditional coding types */ +#define CODE_INDEPENDENTLY 0 +#define CODE_INDEPENDENTLY_NO_LTP_SCALING 1 +#define CODE_CONDITIONALLY 2 + +/* Settings for stereo processing */ +#define STEREO_QUANT_TAB_SIZE 16 +#define STEREO_QUANT_SUB_STEPS 5 +#define STEREO_INTERP_LEN_MS 8 /* must be even */ +#define STEREO_RATIO_SMOOTH_COEF 0.01 /* smoothing coef for signal norms and stereo width */ + +/* Range of pitch lag estimates */ +#define PITCH_EST_MIN_LAG_MS 2 /* 2 ms -> 500 Hz */ +#define PITCH_EST_MAX_LAG_MS 18 /* 18 ms -> 56 Hz */ + +/* Maximum number of subframes */ +#define MAX_NB_SUBFR 4 + +/* Number of samples per frame */ +#define LTP_MEM_LENGTH_MS 20 +#define SUB_FRAME_LENGTH_MS 5 +#define MAX_SUB_FRAME_LENGTH ( SUB_FRAME_LENGTH_MS * MAX_FS_KHZ ) +#define MAX_FRAME_LENGTH_MS ( SUB_FRAME_LENGTH_MS * MAX_NB_SUBFR ) +#define MAX_FRAME_LENGTH ( MAX_FRAME_LENGTH_MS * MAX_FS_KHZ ) + +/* Milliseconds of lookahead for pitch analysis */ +#define LA_PITCH_MS 2 +#define LA_PITCH_MAX ( LA_PITCH_MS * MAX_FS_KHZ ) + +/* Order of LPC used in find pitch */ +#define MAX_FIND_PITCH_LPC_ORDER 16 + +/* Length of LPC window used in find pitch */ +#define FIND_PITCH_LPC_WIN_MS ( 20 + (LA_PITCH_MS << 1) ) +#define FIND_PITCH_LPC_WIN_MS_2_SF ( 10 + (LA_PITCH_MS << 1) ) +#define FIND_PITCH_LPC_WIN_MAX ( FIND_PITCH_LPC_WIN_MS * MAX_FS_KHZ ) + +/* Milliseconds of lookahead for noise shape analysis */ +#define LA_SHAPE_MS 5 +#define LA_SHAPE_MAX ( LA_SHAPE_MS * MAX_FS_KHZ ) + +/* Maximum length of LPC window used in noise shape analysis */ +#define SHAPE_LPC_WIN_MAX ( 15 * MAX_FS_KHZ ) + +/* dB level of lowest gain quantization level */ +#define MIN_QGAIN_DB 2 +/* dB level of highest gain quantization level */ +#define MAX_QGAIN_DB 88 +/* Number of gain quantization levels */ +#define N_LEVELS_QGAIN 64 +/* Max increase in gain quantization index */ +#define MAX_DELTA_GAIN_QUANT 36 +/* Max decrease in gain quantization index */ +#define MIN_DELTA_GAIN_QUANT -4 + +/* Quantization offsets (multiples of 4) */ +#define OFFSET_VL_Q10 32 +#define OFFSET_VH_Q10 100 +#define OFFSET_UVL_Q10 100 +#define OFFSET_UVH_Q10 240 + +#define QUANT_LEVEL_ADJUST_Q10 80 + +/* Maximum numbers of iterations used to stabilize an LPC vector */ +#define MAX_LPC_STABILIZE_ITERATIONS 16 +#define MAX_PREDICTION_POWER_GAIN 1e4f +#define MAX_PREDICTION_POWER_GAIN_AFTER_RESET 1e2f + +#define MAX_LPC_ORDER 16 +#define MIN_LPC_ORDER 10 + +/* Find Pred Coef defines */ +#define LTP_ORDER 5 + +/* LTP quantization settings */ +#define NB_LTP_CBKS 3 + +/* Flag to use harmonic noise shaping */ +#define USE_HARM_SHAPING 1 + +/* Max LPC order of noise shaping filters */ +#define MAX_SHAPE_LPC_ORDER 16 + +#define HARM_SHAPE_FIR_TAPS 3 + +/* Maximum number of delayed decision states */ +#define MAX_DEL_DEC_STATES 4 + +#define LTP_BUF_LENGTH 512 +#define LTP_MASK ( LTP_BUF_LENGTH - 1 ) + +#define DECISION_DELAY 32 +#define DECISION_DELAY_MASK ( DECISION_DELAY - 1 ) + +/* Number of subframes for excitation entropy coding */ +#define SHELL_CODEC_FRAME_LENGTH 16 +#define LOG2_SHELL_CODEC_FRAME_LENGTH 4 +#define MAX_NB_SHELL_BLOCKS ( MAX_FRAME_LENGTH / SHELL_CODEC_FRAME_LENGTH ) + +/* Number of rate levels, for entropy coding of excitation */ +#define N_RATE_LEVELS 10 + +/* Maximum sum of pulses per shell coding frame */ +#define MAX_PULSES 16 + +#define MAX_MATRIX_SIZE MAX_LPC_ORDER /* Max of LPC Order and LTP order */ + +#if( MAX_LPC_ORDER > DECISION_DELAY ) +# define NSQ_LPC_BUF_LENGTH MAX_LPC_ORDER +#else +# define NSQ_LPC_BUF_LENGTH DECISION_DELAY +#endif + +/***************************/ +/* Voice activity detector */ +/***************************/ +#define VAD_N_BANDS 4 + +#define VAD_INTERNAL_SUBFRAMES_LOG2 2 +#define VAD_INTERNAL_SUBFRAMES ( 1 << VAD_INTERNAL_SUBFRAMES_LOG2 ) + +#define VAD_NOISE_LEVEL_SMOOTH_COEF_Q16 1024 /* Must be < 4096 */ +#define VAD_NOISE_LEVELS_BIAS 50 + +/* Sigmoid settings */ +#define VAD_NEGATIVE_OFFSET_Q5 128 /* sigmoid is 0 at -128 */ +#define VAD_SNR_FACTOR_Q16 45000 + +/* smoothing for SNR measurement */ +#define VAD_SNR_SMOOTH_COEF_Q18 4096 + +/* Size of the piecewise linear cosine approximation table for the LSFs */ +#define LSF_COS_TAB_SZ_FIX 128 + +/******************/ +/* NLSF quantizer */ +/******************/ +#define NLSF_W_Q 2 +#define NLSF_VQ_MAX_VECTORS 32 +#define NLSF_VQ_MAX_SURVIVORS 32 +#define NLSF_QUANT_MAX_AMPLITUDE 4 +#define NLSF_QUANT_MAX_AMPLITUDE_EXT 10 +#define NLSF_QUANT_LEVEL_ADJ 0.1 +#define NLSF_QUANT_DEL_DEC_STATES_LOG2 2 +#define NLSF_QUANT_DEL_DEC_STATES ( 1 << NLSF_QUANT_DEL_DEC_STATES_LOG2 ) + +/* Transition filtering for mode switching */ +#define TRANSITION_TIME_MS 5120 /* 5120 = 64 * FRAME_LENGTH_MS * ( TRANSITION_INT_NUM - 1 ) = 64*(20*4)*/ +#define TRANSITION_NB 3 /* Hardcoded in tables */ +#define TRANSITION_NA 2 /* Hardcoded in tables */ +#define TRANSITION_INT_NUM 5 /* Hardcoded in tables */ +#define TRANSITION_FRAMES ( TRANSITION_TIME_MS / MAX_FRAME_LENGTH_MS ) +#define TRANSITION_INT_STEPS ( TRANSITION_FRAMES / ( TRANSITION_INT_NUM - 1 ) ) + +/* BWE factors to apply after packet loss */ +#define BWE_AFTER_LOSS_Q16 63570 + +/* Defines for CN generation */ +#define CNG_BUF_MASK_MAX 255 /* 2^floor(log2(MAX_FRAME_LENGTH))-1 */ +#define CNG_GAIN_SMTH_Q16 4634 /* 0.25^(1/4) */ +#define CNG_NLSF_SMTH_Q16 16348 /* 0.25 */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/opus/silk/enc_API.c b/drivers/opus/silk/enc_API.c new file mode 100644 index 0000000000..66a9bb67de --- /dev/null +++ b/drivers/opus/silk/enc_API.c @@ -0,0 +1,556 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif +#include "define.h" +#include "API.h" +#include "control.h" +#include "typedef.h" +#include "stack_alloc.h" +#include "structs.h" +#include "tuning_parameters.h" +#ifdef OPUS_FIXED_POINT +#include "main_FIX.h" +#else +#include "main_FLP.h" +#endif + +/***************************************/ +/* Read control structure from encoder */ +/***************************************/ +static opus_int silk_QueryEncoder( /* O Returns error code */ + const void *encState, /* I State */ + silk_EncControlStruct *encStatus /* O Encoder Status */ +); + +/****************************************/ +/* Encoder functions */ +/****************************************/ + +opus_int silk_Get_Encoder_Size( /* O Returns error code */ + opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */ +) +{ + opus_int ret = SILK_NO_ERROR; + + *encSizeBytes = sizeof( silk_encoder ); + + return ret; +} + +/*************************/ +/* Init or Reset encoder */ +/*************************/ +opus_int silk_InitEncoder( /* O Returns error code */ + void *encState, /* I/O State */ + int arch, /* I Run-time architecture */ + silk_EncControlStruct *encStatus /* O Encoder Status */ +) +{ + silk_encoder *psEnc; + opus_int n, ret = SILK_NO_ERROR; + + psEnc = (silk_encoder *)encState; + + /* Reset encoder */ + silk_memset( psEnc, 0, sizeof( silk_encoder ) ); + for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) { + if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) { + silk_assert( 0 ); + } + } + + psEnc->nChannelsAPI = 1; + psEnc->nChannelsInternal = 1; + + /* Read control structure */ + if( ret += silk_QueryEncoder( encState, encStatus ) ) { + silk_assert( 0 ); + } + + return ret; +} + +/***************************************/ +/* Read control structure from encoder */ +/***************************************/ +static opus_int silk_QueryEncoder( /* O Returns error code */ + const void *encState, /* I State */ + silk_EncControlStruct *encStatus /* O Encoder Status */ +) +{ + opus_int ret = SILK_NO_ERROR; + silk_encoder_state_Fxx *state_Fxx; + silk_encoder *psEnc = (silk_encoder *)encState; + + state_Fxx = psEnc->state_Fxx; + + encStatus->nChannelsAPI = psEnc->nChannelsAPI; + encStatus->nChannelsInternal = psEnc->nChannelsInternal; + encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz; + encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz; + encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz; + encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz; + encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms; + encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps; + encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc; + encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity; + encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC; + encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX; + encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR; + encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); + encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch; + encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0; + + return ret; +} + + +/**************************/ +/* Encode frame with Silk */ +/**************************/ +/* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */ +/* encControl->payloadSize_ms is set to */ +opus_int silk_Encode( /* O Returns error code */ + void *encState, /* I/O State */ + silk_EncControlStruct *encControl, /* I Control status */ + const opus_int16 *samplesIn, /* I Speech sample input vector */ + opus_int nSamplesIn, /* I Number of samples in input vector */ + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */ + const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */ +) +{ + opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0; + opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms; + opus_int nSamplesFromInput = 0, nSamplesFromInputMax; + opus_int speech_act_thr_for_switch_Q8; + opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum; + silk_encoder *psEnc = ( silk_encoder * )encState; + VARDECL( opus_int16, buf ); + opus_int transition, curr_block, tot_blocks; + SAVE_STACK; + + if (encControl->reducedDependency) + { + psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1; + psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1; + } + psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0; + + /* Check values in encoder control structure */ + if( ( ret = check_control_input( encControl ) != 0 ) ) { + silk_assert( 0 ); + RESTORE_STACK; + return ret; + } + + encControl->switchReady = 0; + + if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) { + /* Mono -> Stereo transition: init state of second channel and stereo state */ + ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch ); + silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) ); + silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) ); + psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0; + psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1; + psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0; + psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1; + psEnc->sStereo.width_prev_Q14 = 0; + psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 ); + if( psEnc->nChannelsAPI == 2 ) { + silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) ); + silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) ); + } + } + + transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal); + + psEnc->nChannelsAPI = encControl->nChannelsAPI; + psEnc->nChannelsInternal = encControl->nChannelsInternal; + + nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate ); + tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1; + curr_block = 0; + if( prefillFlag ) { + /* Only accept input length of 10 ms */ + if( nBlocksOf10ms != 1 ) { + silk_assert( 0 ); + RESTORE_STACK; + return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; + } + /* Reset Encoder */ + for( n = 0; n < encControl->nChannelsInternal; n++ ) { + ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch ); + silk_assert( !ret ); + } + tmp_payloadSize_ms = encControl->payloadSize_ms; + encControl->payloadSize_ms = 10; + tmp_complexity = encControl->complexity; + encControl->complexity = 0; + for( n = 0; n < encControl->nChannelsInternal; n++ ) { + psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; + psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1; + } + } else { + /* Only accept input lengths that are a multiple of 10 ms */ + if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) { + silk_assert( 0 ); + RESTORE_STACK; + return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; + } + /* Make sure no more than one packet can be produced */ + if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) { + silk_assert( 0 ); + RESTORE_STACK; + return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES; + } + } + + TargetRate_bps = silk_RSHIFT32( encControl->bitRate, encControl->nChannelsInternal - 1 ); + for( n = 0; n < encControl->nChannelsInternal; n++ ) { + /* Force the side channel to the same rate as the mid */ + opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0; + if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, TargetRate_bps, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) { + silk_assert( 0 ); + RESTORE_STACK; + return ret; + } + if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) { + for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { + psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0; + } + } + psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX; + } + silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); + + /* Input buffering/resampling and encoding */ + nSamplesToBufferMax = + 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz; + nSamplesFromInputMax = + silk_DIV32_16( nSamplesToBufferMax * + psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, + psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); + ALLOC( buf, nSamplesFromInputMax, opus_int16 ); + while( 1 ) { + nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx; + nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax ); + nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 ); + /* Resample and write to buffer */ + if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) { + opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; + for( n = 0; n < nSamplesFromInput; n++ ) { + buf[ n ] = samplesIn[ 2 * n ]; + } + /* Making sure to start both resamplers from the same state when switching from mono to stereo */ + if( psEnc->nPrevChannelsInternal == 1 && id==0 ) { + silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); + } + + ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, + &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); + psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; + + nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx; + nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); + for( n = 0; n < nSamplesFromInput; n++ ) { + buf[ n ] = samplesIn[ 2 * n + 1 ]; + } + ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, + &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); + + psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer; + } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) { + /* Combine left and right channels before resampling */ + for( n = 0; n < nSamplesFromInput; n++ ) { + sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ]; + buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); + } + ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, + &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); + /* On the first mono frame, average the results for the two resampler states */ + if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) { + ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, + &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); + for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) { + psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] = + silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] + + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1); + } + } + psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; + } else { + silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); + silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16)); + ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, + &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); + psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; + } + + samplesIn += nSamplesFromInput * encControl->nChannelsAPI; + nSamplesIn -= nSamplesFromInput; + + /* Default */ + psEnc->allowBandwidthSwitch = 0; + + /* Silk encoder */ + if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) { + /* Enough data in input buffer, so encode */ + silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length ); + silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length ); + + /* Deal with LBRR data */ + if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) { + /* Create space at start of payload for VAD and FEC flags */ + opus_uint8 iCDF[ 2 ] = { 0, 0 }; + iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); + ec_enc_icdf( psRangeEnc, 0, iCDF, 8 ); + + /* Encode any LBRR data from previous packet */ + /* Encode LBRR flags */ + for( n = 0; n < encControl->nChannelsInternal; n++ ) { + LBRR_symbol = 0; + for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { + LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i ); + } + psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0; + if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) { + ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 ); + } + } + + /* Code LBRR indices and excitation signals */ + for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) { + for( n = 0; n < encControl->nChannelsInternal; n++ ) { + if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) { + opus_int condCoding; + + if( encControl->nChannelsInternal == 2 && n == 0 ) { + silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] ); + /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */ + if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) { + silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] ); + } + } + /* Use conditional coding if previous frame available */ + if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) { + condCoding = CODE_CONDITIONALLY; + } else { + condCoding = CODE_INDEPENDENTLY; + } + silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding ); + silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType, + psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length ); + } + } + } + + /* Reset LBRR flags */ + for( n = 0; n < encControl->nChannelsInternal; n++ ) { + silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) ); + } + } + + silk_HP_variable_cutoff( psEnc->state_Fxx ); + + /* Total target bits for packet */ + nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); + /* Subtract half of the bits already used */ + if( !prefillFlag ) { + nBits -= ec_tell( psRangeEnc ) >> 1; + } + /* Divide by number of uncoded frames left in packet */ + nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket - psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ); + /* Convert to bits/second */ + if( encControl->payloadSize_ms == 10 ) { + TargetRate_bps = silk_SMULBB( nBits, 100 ); + } else { + TargetRate_bps = silk_SMULBB( nBits, 50 ); + } + /* Subtract fraction of bits in excess of target in previous packets */ + TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS ); + /* Never exceed input bitrate */ + TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 ); + + /* Convert Left/Right to Mid/Side */ + if( encControl->nChannelsInternal == 2 ) { + silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ], + psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], + MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono, + psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length ); + if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { + /* Reset side channel encoder memory for first frame with side coding */ + if( psEnc->prev_decode_only_middle == 1 ) { + silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) ); + silk_memset( &psEnc->state_Fxx[ 1 ].sPrefilt, 0, sizeof( psEnc->state_Fxx[ 1 ].sPrefilt ) ); + silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) ); + silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) ); + silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) ); + psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100; + psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100; + psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10; + psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY; + psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536; + psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1; + } + silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] ); + } else { + psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0; + } + if( !prefillFlag ) { + silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); + if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) { + silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); + } + } + } else { + /* Buffering */ + silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) ); + silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) ); + } + silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] ); + + /* Encode */ + for( n = 0; n < encControl->nChannelsInternal; n++ ) { + opus_int maxBits, useCBR; + + /* Handling rate constraints */ + maxBits = encControl->maxBits; + if( tot_blocks == 2 && curr_block == 0 ) { + maxBits = maxBits * 3 / 5; + } else if( tot_blocks == 3 ) { + if( curr_block == 0 ) { + maxBits = maxBits * 2 / 5; + } else if( curr_block == 1 ) { + maxBits = maxBits * 3 / 4; + } + } + useCBR = encControl->useCBR && curr_block == tot_blocks - 1; + + if( encControl->nChannelsInternal == 1 ) { + channelRate_bps = TargetRate_bps; + } else { + channelRate_bps = MStargetRates_bps[ n ]; + if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) { + useCBR = 0; + /* Give mid up to 1/2 of the max bits for that frame */ + maxBits -= encControl->maxBits / ( tot_blocks * 2 ); + } + } + + if( channelRate_bps > 0 ) { + opus_int condCoding; + + silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps ); + + /* Use independent coding if no previous frame available */ + if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) { + condCoding = CODE_INDEPENDENTLY; + } else if( n > 0 && psEnc->prev_decode_only_middle ) { + /* If we skipped a side frame in this packet, we don't + need LTP scaling; the LTP state is well-defined. */ + condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; + } else { + condCoding = CODE_CONDITIONALLY; + } + if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) { + silk_assert( 0 ); + } + } + psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; + psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0; + psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++; + } + psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ]; + + /* Insert VAD and FEC flags at beginning of bitstream */ + if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) { + flags = 0; + for( n = 0; n < encControl->nChannelsInternal; n++ ) { + for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) { + flags = silk_LSHIFT( flags, 1 ); + flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ]; + } + flags = silk_LSHIFT( flags, 1 ); + flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag; + } + if( !prefillFlag ) { + ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); + } + + /* Return zero bytes if all channels DTXed */ + if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) { + *nBytesOut = 0; + } + + psEnc->nBitsExceeded += *nBytesOut * 8; + psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 ); + psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 ); + + /* Update flag indicating if bandwidth switching is allowed */ + speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ), + SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms ); + if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) { + psEnc->allowBandwidthSwitch = 1; + psEnc->timeSinceSwitchAllowed_ms = 0; + } else { + psEnc->allowBandwidthSwitch = 0; + psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms; + } + } + + if( nSamplesIn == 0 ) { + break; + } + } else { + break; + } + curr_block++; + } + + psEnc->nPrevChannelsInternal = encControl->nChannelsInternal; + + encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch; + encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0; + encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 ); + encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14; + if( prefillFlag ) { + encControl->payloadSize_ms = tmp_payloadSize_ms; + encControl->complexity = tmp_complexity; + for( n = 0; n < encControl->nChannelsInternal; n++ ) { + psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0; + psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0; + } + } + + RESTORE_STACK; + return ret; +} + diff --git a/drivers/opus/silk/encode_indices.c b/drivers/opus/silk/encode_indices.c new file mode 100644 index 0000000000..c6679b34f6 --- /dev/null +++ b/drivers/opus/silk/encode_indices.c @@ -0,0 +1,181 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Encode side-information parameters to payload */ +void silk_encode_indices( + silk_encoder_state *psEncC, /* I/O Encoder state */ + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + opus_int FrameIndex, /* I Frame number */ + opus_int encode_LBRR, /* I Flag indicating LBRR data is being encoded */ + opus_int condCoding /* I The type of conditional coding to use */ +) +{ + opus_int i, k, typeOffset; + opus_int encode_absolute_lagIndex, delta_lagIndex; + opus_int16 ec_ix[ MAX_LPC_ORDER ]; + opus_uint8 pred_Q8[ MAX_LPC_ORDER ]; + const SideInfoIndices *psIndices; + + if( encode_LBRR ) { + psIndices = &psEncC->indices_LBRR[ FrameIndex ]; + } else { + psIndices = &psEncC->indices; + } + + /*******************************************/ + /* Encode signal type and quantizer offset */ + /*******************************************/ + typeOffset = 2 * psIndices->signalType + psIndices->quantOffsetType; + silk_assert( typeOffset >= 0 && typeOffset < 6 ); + silk_assert( encode_LBRR == 0 || typeOffset >= 2 ); + if( encode_LBRR || typeOffset >= 2 ) { + ec_enc_icdf( psRangeEnc, typeOffset - 2, silk_type_offset_VAD_iCDF, 8 ); + } else { + ec_enc_icdf( psRangeEnc, typeOffset, silk_type_offset_no_VAD_iCDF, 8 ); + } + + /****************/ + /* Encode gains */ + /****************/ + /* first subframe */ + if( condCoding == CODE_CONDITIONALLY ) { + /* conditional coding */ + silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ); + ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ], silk_delta_gain_iCDF, 8 ); + } else { + /* independent coding, in two stages: MSB bits followed by 3 LSBs */ + silk_assert( psIndices->GainsIndices[ 0 ] >= 0 && psIndices->GainsIndices[ 0 ] < N_LEVELS_QGAIN ); + ec_enc_icdf( psRangeEnc, silk_RSHIFT( psIndices->GainsIndices[ 0 ], 3 ), silk_gain_iCDF[ psIndices->signalType ], 8 ); + ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ 0 ] & 7, silk_uniform8_iCDF, 8 ); + } + + /* remaining subframes */ + for( i = 1; i < psEncC->nb_subfr; i++ ) { + silk_assert( psIndices->GainsIndices[ i ] >= 0 && psIndices->GainsIndices[ i ] < MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ); + ec_enc_icdf( psRangeEnc, psIndices->GainsIndices[ i ], silk_delta_gain_iCDF, 8 ); + } + + /****************/ + /* Encode NLSFs */ + /****************/ + ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ 0 ], &psEncC->psNLSF_CB->CB1_iCDF[ ( psIndices->signalType >> 1 ) * psEncC->psNLSF_CB->nVectors ], 8 ); + silk_NLSF_unpack( ec_ix, pred_Q8, psEncC->psNLSF_CB, psIndices->NLSFIndices[ 0 ] ); + silk_assert( psEncC->psNLSF_CB->order == psEncC->predictLPCOrder ); + for( i = 0; i < psEncC->psNLSF_CB->order; i++ ) { + if( psIndices->NLSFIndices[ i+1 ] >= NLSF_QUANT_MAX_AMPLITUDE ) { + ec_enc_icdf( psRangeEnc, 2 * NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); + ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 ); + } else if( psIndices->NLSFIndices[ i+1 ] <= -NLSF_QUANT_MAX_AMPLITUDE ) { + ec_enc_icdf( psRangeEnc, 0, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); + ec_enc_icdf( psRangeEnc, -psIndices->NLSFIndices[ i+1 ] - NLSF_QUANT_MAX_AMPLITUDE, silk_NLSF_EXT_iCDF, 8 ); + } else { + ec_enc_icdf( psRangeEnc, psIndices->NLSFIndices[ i+1 ] + NLSF_QUANT_MAX_AMPLITUDE, &psEncC->psNLSF_CB->ec_iCDF[ ec_ix[ i ] ], 8 ); + } + } + + /* Encode NLSF interpolation factor */ + if( psEncC->nb_subfr == MAX_NB_SUBFR ) { + silk_assert( psIndices->NLSFInterpCoef_Q2 >= 0 && psIndices->NLSFInterpCoef_Q2 < 5 ); + ec_enc_icdf( psRangeEnc, psIndices->NLSFInterpCoef_Q2, silk_NLSF_interpolation_factor_iCDF, 8 ); + } + + if( psIndices->signalType == TYPE_VOICED ) + { + /*********************/ + /* Encode pitch lags */ + /*********************/ + /* lag index */ + encode_absolute_lagIndex = 1; + if( condCoding == CODE_CONDITIONALLY && psEncC->ec_prevSignalType == TYPE_VOICED ) { + /* Delta Encoding */ + delta_lagIndex = psIndices->lagIndex - psEncC->ec_prevLagIndex; + if( delta_lagIndex < -8 || delta_lagIndex > 11 ) { + delta_lagIndex = 0; + } else { + delta_lagIndex = delta_lagIndex + 9; + encode_absolute_lagIndex = 0; /* Only use delta */ + } + silk_assert( delta_lagIndex >= 0 && delta_lagIndex < 21 ); + ec_enc_icdf( psRangeEnc, delta_lagIndex, silk_pitch_delta_iCDF, 8 ); + } + if( encode_absolute_lagIndex ) { + /* Absolute encoding */ + opus_int32 pitch_high_bits, pitch_low_bits; + pitch_high_bits = silk_DIV32_16( psIndices->lagIndex, silk_RSHIFT( psEncC->fs_kHz, 1 ) ); + pitch_low_bits = psIndices->lagIndex - silk_SMULBB( pitch_high_bits, silk_RSHIFT( psEncC->fs_kHz, 1 ) ); + silk_assert( pitch_low_bits < psEncC->fs_kHz / 2 ); + silk_assert( pitch_high_bits < 32 ); + ec_enc_icdf( psRangeEnc, pitch_high_bits, silk_pitch_lag_iCDF, 8 ); + ec_enc_icdf( psRangeEnc, pitch_low_bits, psEncC->pitch_lag_low_bits_iCDF, 8 ); + } + psEncC->ec_prevLagIndex = psIndices->lagIndex; + + /* Countour index */ + silk_assert( psIndices->contourIndex >= 0 ); + silk_assert( ( psIndices->contourIndex < 34 && psEncC->fs_kHz > 8 && psEncC->nb_subfr == 4 ) || + ( psIndices->contourIndex < 11 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 4 ) || + ( psIndices->contourIndex < 12 && psEncC->fs_kHz > 8 && psEncC->nb_subfr == 2 ) || + ( psIndices->contourIndex < 3 && psEncC->fs_kHz == 8 && psEncC->nb_subfr == 2 ) ); + ec_enc_icdf( psRangeEnc, psIndices->contourIndex, psEncC->pitch_contour_iCDF, 8 ); + + /********************/ + /* Encode LTP gains */ + /********************/ + /* PERIndex value */ + silk_assert( psIndices->PERIndex >= 0 && psIndices->PERIndex < 3 ); + ec_enc_icdf( psRangeEnc, psIndices->PERIndex, silk_LTP_per_index_iCDF, 8 ); + + /* Codebook Indices */ + for( k = 0; k < psEncC->nb_subfr; k++ ) { + silk_assert( psIndices->LTPIndex[ k ] >= 0 && psIndices->LTPIndex[ k ] < ( 8 << psIndices->PERIndex ) ); + ec_enc_icdf( psRangeEnc, psIndices->LTPIndex[ k ], silk_LTP_gain_iCDF_ptrs[ psIndices->PERIndex ], 8 ); + } + + /**********************/ + /* Encode LTP scaling */ + /**********************/ + if( condCoding == CODE_INDEPENDENTLY ) { + silk_assert( psIndices->LTP_scaleIndex >= 0 && psIndices->LTP_scaleIndex < 3 ); + ec_enc_icdf( psRangeEnc, psIndices->LTP_scaleIndex, silk_LTPscale_iCDF, 8 ); + } + silk_assert( !condCoding || psIndices->LTP_scaleIndex == 0 ); + } + + psEncC->ec_prevSignalType = psIndices->signalType; + + /***************/ + /* Encode seed */ + /***************/ + silk_assert( psIndices->Seed >= 0 && psIndices->Seed < 4 ); + ec_enc_icdf( psRangeEnc, psIndices->Seed, silk_uniform4_iCDF, 8 ); +} diff --git a/drivers/opus/silk/encode_pulses.c b/drivers/opus/silk/encode_pulses.c new file mode 100644 index 0000000000..d148b9d1e6 --- /dev/null +++ b/drivers/opus/silk/encode_pulses.c @@ -0,0 +1,206 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "stack_alloc.h" + +/*********************************************/ +/* Encode quantization indices of excitation */ +/*********************************************/ + +static OPUS_INLINE opus_int combine_and_check( /* return ok */ + opus_int *pulses_comb, /* O */ + const opus_int *pulses_in, /* I */ + opus_int max_pulses, /* I max value for sum of pulses */ + opus_int len /* I number of output values */ +) +{ + opus_int k, sum; + + for( k = 0; k < len; k++ ) { + sum = pulses_in[ 2 * k ] + pulses_in[ 2 * k + 1 ]; + if( sum > max_pulses ) { + return 1; + } + pulses_comb[ k ] = sum; + } + + return 0; +} + +/* Encode quantization indices of excitation */ +void silk_encode_pulses( + ec_enc *psRangeEnc, /* I/O compressor data structure */ + const opus_int signalType, /* I Signal type */ + const opus_int quantOffsetType, /* I quantOffsetType */ + opus_int8 pulses[], /* I quantization indices */ + const opus_int frame_length /* I Frame length */ +) +{ + opus_int i, k, j, iter, bit, nLS, scale_down, RateLevelIndex = 0; + opus_int32 abs_q, minSumBits_Q5, sumBits_Q5; + VARDECL( opus_int, abs_pulses ); + VARDECL( opus_int, sum_pulses ); + VARDECL( opus_int, nRshifts ); + opus_int pulses_comb[ 8 ]; + opus_int *abs_pulses_ptr; + const opus_int8 *pulses_ptr; + const opus_uint8 *cdf_ptr; + const opus_uint8 *nBits_ptr; + SAVE_STACK; + + silk_memset( pulses_comb, 0, 8 * sizeof( opus_int ) ); /* Fixing Valgrind reported problem*/ + + /****************************/ + /* Prepare for shell coding */ + /****************************/ + /* Calculate number of shell blocks */ + silk_assert( 1 << LOG2_SHELL_CODEC_FRAME_LENGTH == SHELL_CODEC_FRAME_LENGTH ); + iter = silk_RSHIFT( frame_length, LOG2_SHELL_CODEC_FRAME_LENGTH ); + if( iter * SHELL_CODEC_FRAME_LENGTH < frame_length ) { + silk_assert( frame_length == 12 * 10 ); /* Make sure only happens for 10 ms @ 12 kHz */ + iter++; + silk_memset( &pulses[ frame_length ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof(opus_int8)); + } + + /* Take the absolute value of the pulses */ + ALLOC( abs_pulses, iter * SHELL_CODEC_FRAME_LENGTH, opus_int ); + silk_assert( !( SHELL_CODEC_FRAME_LENGTH & 3 ) ); + for( i = 0; i < iter * SHELL_CODEC_FRAME_LENGTH; i+=4 ) { + abs_pulses[i+0] = ( opus_int )silk_abs( pulses[ i + 0 ] ); + abs_pulses[i+1] = ( opus_int )silk_abs( pulses[ i + 1 ] ); + abs_pulses[i+2] = ( opus_int )silk_abs( pulses[ i + 2 ] ); + abs_pulses[i+3] = ( opus_int )silk_abs( pulses[ i + 3 ] ); + } + + /* Calc sum pulses per shell code frame */ + ALLOC( sum_pulses, iter, opus_int ); + ALLOC( nRshifts, iter, opus_int ); + abs_pulses_ptr = abs_pulses; + for( i = 0; i < iter; i++ ) { + nRshifts[ i ] = 0; + + while( 1 ) { + /* 1+1 -> 2 */ + scale_down = combine_and_check( pulses_comb, abs_pulses_ptr, silk_max_pulses_table[ 0 ], 8 ); + /* 2+2 -> 4 */ + scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 1 ], 4 ); + /* 4+4 -> 8 */ + scale_down += combine_and_check( pulses_comb, pulses_comb, silk_max_pulses_table[ 2 ], 2 ); + /* 8+8 -> 16 */ + scale_down += combine_and_check( &sum_pulses[ i ], pulses_comb, silk_max_pulses_table[ 3 ], 1 ); + + if( scale_down ) { + /* We need to downscale the quantization signal */ + nRshifts[ i ]++; + for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) { + abs_pulses_ptr[ k ] = silk_RSHIFT( abs_pulses_ptr[ k ], 1 ); + } + } else { + /* Jump out of while(1) loop and go to next shell coding frame */ + break; + } + } + abs_pulses_ptr += SHELL_CODEC_FRAME_LENGTH; + } + + /**************/ + /* Rate level */ + /**************/ + /* find rate level that leads to fewest bits for coding of pulses per block info */ + minSumBits_Q5 = silk_int32_MAX; + for( k = 0; k < N_RATE_LEVELS - 1; k++ ) { + nBits_ptr = silk_pulses_per_block_BITS_Q5[ k ]; + sumBits_Q5 = silk_rate_levels_BITS_Q5[ signalType >> 1 ][ k ]; + for( i = 0; i < iter; i++ ) { + if( nRshifts[ i ] > 0 ) { + sumBits_Q5 += nBits_ptr[ MAX_PULSES + 1 ]; + } else { + sumBits_Q5 += nBits_ptr[ sum_pulses[ i ] ]; + } + } + if( sumBits_Q5 < minSumBits_Q5 ) { + minSumBits_Q5 = sumBits_Q5; + RateLevelIndex = k; + } + } + ec_enc_icdf( psRangeEnc, RateLevelIndex, silk_rate_levels_iCDF[ signalType >> 1 ], 8 ); + + /***************************************************/ + /* Sum-Weighted-Pulses Encoding */ + /***************************************************/ + cdf_ptr = silk_pulses_per_block_iCDF[ RateLevelIndex ]; + for( i = 0; i < iter; i++ ) { + if( nRshifts[ i ] == 0 ) { + ec_enc_icdf( psRangeEnc, sum_pulses[ i ], cdf_ptr, 8 ); + } else { + ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, cdf_ptr, 8 ); + for( k = 0; k < nRshifts[ i ] - 1; k++ ) { + ec_enc_icdf( psRangeEnc, MAX_PULSES + 1, silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); + } + ec_enc_icdf( psRangeEnc, sum_pulses[ i ], silk_pulses_per_block_iCDF[ N_RATE_LEVELS - 1 ], 8 ); + } + } + + /******************/ + /* Shell Encoding */ + /******************/ + for( i = 0; i < iter; i++ ) { + if( sum_pulses[ i ] > 0 ) { + silk_shell_encoder( psRangeEnc, &abs_pulses[ i * SHELL_CODEC_FRAME_LENGTH ] ); + } + } + + /****************/ + /* LSB Encoding */ + /****************/ + for( i = 0; i < iter; i++ ) { + if( nRshifts[ i ] > 0 ) { + pulses_ptr = &pulses[ i * SHELL_CODEC_FRAME_LENGTH ]; + nLS = nRshifts[ i ] - 1; + for( k = 0; k < SHELL_CODEC_FRAME_LENGTH; k++ ) { + abs_q = (opus_int8)silk_abs( pulses_ptr[ k ] ); + for( j = nLS; j > 0; j-- ) { + bit = silk_RSHIFT( abs_q, j ) & 1; + ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 ); + } + bit = abs_q & 1; + ec_enc_icdf( psRangeEnc, bit, silk_lsb_iCDF, 8 ); + } + } + } + + /****************/ + /* Encode signs */ + /****************/ + silk_encode_signs( psRangeEnc, pulses, frame_length, signalType, quantOffsetType, sum_pulses ); + RESTORE_STACK; +} diff --git a/drivers/opus/silk/errors.h b/drivers/opus/silk/errors.h new file mode 100644 index 0000000000..45070800f2 --- /dev/null +++ b/drivers/opus/silk/errors.h @@ -0,0 +1,98 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_ERRORS_H +#define SILK_ERRORS_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +/******************/ +/* Error messages */ +/******************/ +#define SILK_NO_ERROR 0 + +/**************************/ +/* Encoder error messages */ +/**************************/ + +/* Input length is not a multiple of 10 ms, or length is longer than the packet length */ +#define SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES -101 + +/* Sampling frequency not 8000, 12000 or 16000 Hertz */ +#define SILK_ENC_FS_NOT_SUPPORTED -102 + +/* Packet size not 10, 20, 40, or 60 ms */ +#define SILK_ENC_PACKET_SIZE_NOT_SUPPORTED -103 + +/* Allocated payload buffer too short */ +#define SILK_ENC_PAYLOAD_BUF_TOO_SHORT -104 + +/* Loss rate not between 0 and 100 percent */ +#define SILK_ENC_INVALID_LOSS_RATE -105 + +/* Complexity setting not valid, use 0...10 */ +#define SILK_ENC_INVALID_COMPLEXITY_SETTING -106 + +/* Inband FEC setting not valid, use 0 or 1 */ +#define SILK_ENC_INVALID_INBAND_FEC_SETTING -107 + +/* DTX setting not valid, use 0 or 1 */ +#define SILK_ENC_INVALID_DTX_SETTING -108 + +/* CBR setting not valid, use 0 or 1 */ +#define SILK_ENC_INVALID_CBR_SETTING -109 + +/* Internal encoder error */ +#define SILK_ENC_INTERNAL_ERROR -110 + +/* Internal encoder error */ +#define SILK_ENC_INVALID_NUMBER_OF_CHANNELS_ERROR -111 + +/**************************/ +/* Decoder error messages */ +/**************************/ + +/* Output sampling frequency lower than internal decoded sampling frequency */ +#define SILK_DEC_INVALID_SAMPLING_FREQUENCY -200 + +/* Payload size exceeded the maximum allowed 1024 bytes */ +#define SILK_DEC_PAYLOAD_TOO_LARGE -201 + +/* Payload has bit errors */ +#define SILK_DEC_PAYLOAD_ERROR -202 + +/* Payload has bit errors */ +#define SILK_DEC_INVALID_FRAME_SIZE -203 + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c b/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c new file mode 100644 index 0000000000..1df4b01d20 --- /dev/null +++ b/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c @@ -0,0 +1,85 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" + +void silk_LTP_analysis_filter_FIX( + opus_int16 *LTP_res, /* O LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length ) */ + const opus_int16 *x, /* I Pointer to input signal with at least max( pitchL ) preceding samples */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag, one for each subframe */ + const opus_int32 invGains_Q16[ MAX_NB_SUBFR ], /* I Inverse quantization gains, one for each subframe */ + const opus_int subfr_length, /* I Length of each subframe */ + const opus_int nb_subfr, /* I Number of subframes */ + const opus_int pre_length /* I Length of the preceding samples starting at &x[0] for each subframe */ +) +{ + const opus_int16 *x_ptr, *x_lag_ptr; + opus_int16 Btmp_Q14[ LTP_ORDER ]; + opus_int16 *LTP_res_ptr; + opus_int k, i, j; + opus_int32 LTP_est; + + x_ptr = x; + LTP_res_ptr = LTP_res; + for( k = 0; k < nb_subfr; k++ ) { + + x_lag_ptr = x_ptr - pitchL[ k ]; + for( i = 0; i < LTP_ORDER; i++ ) { + Btmp_Q14[ i ] = LTPCoef_Q14[ k * LTP_ORDER + i ]; + } + + /* LTP analysis FIR filter */ + for( i = 0; i < subfr_length + pre_length; i++ ) { + LTP_res_ptr[ i ] = x_ptr[ i ]; + + /* Long-term prediction */ + LTP_est = silk_SMULBB( x_lag_ptr[ LTP_ORDER / 2 ], Btmp_Q14[ 0 ] ); + for( j = 1; j < LTP_ORDER; j++ ) { + LTP_est = silk_SMLABB_ovflw( LTP_est, x_lag_ptr[ LTP_ORDER / 2 - j ], Btmp_Q14[ j ] ); + } + LTP_est = silk_RSHIFT_ROUND( LTP_est, 14 ); /* round and -> Q0*/ + + /* Subtract long-term prediction */ + LTP_res_ptr[ i ] = (opus_int16)silk_SAT16( (opus_int32)x_ptr[ i ] - LTP_est ); + + /* Scale residual */ + LTP_res_ptr[ i ] = silk_SMULWB( invGains_Q16[ k ], LTP_res_ptr[ i ] ); + + x_lag_ptr++; + } + + /* Update pointers */ + LTP_res_ptr += subfr_length + pre_length; + x_ptr += subfr_length; + } +} + diff --git a/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c b/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c new file mode 100644 index 0000000000..ab6923c5c9 --- /dev/null +++ b/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c @@ -0,0 +1,53 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" + +/* Calculation of LTP state scaling */ +void silk_LTP_scale_ctrl_FIX( + silk_encoder_state_FIX *psEnc, /* I/O encoder state */ + silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ + opus_int condCoding /* I The type of conditional coding to use */ +) +{ + opus_int round_loss; + + if( condCoding == CODE_INDEPENDENTLY ) { + /* Only scale if first frame in packet */ + round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket; + psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( + silk_SMULWB( silk_SMULBB( round_loss, psEncCtrl->LTPredCodGain_Q7 ), SILK_FIX_CONST( 0.1, 9 ) ), 0, 2 ); + } else { + /* Default is minimum scaling */ + psEnc->sCmn.indices.LTP_scaleIndex = 0; + } + psEncCtrl->LTP_scale_Q14 = silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ]; +} diff --git a/drivers/opus/silk/fixed/apply_sine_window_FIX.c b/drivers/opus/silk/fixed/apply_sine_window_FIX.c new file mode 100644 index 0000000000..0998b49eca --- /dev/null +++ b/drivers/opus/silk/fixed/apply_sine_window_FIX.c @@ -0,0 +1,101 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Apply sine window to signal vector. */ +/* Window types: */ +/* 1 -> sine window from 0 to pi/2 */ +/* 2 -> sine window from pi/2 to pi */ +/* Every other sample is linearly interpolated, for speed. */ +/* Window length must be between 16 and 120 (incl) and a multiple of 4. */ + +/* Matlab code for table: + for k=16:9*4:16+2*9*4, fprintf(' %7.d,', -round(65536*pi ./ (k:4:k+8*4))); fprintf('\n'); end +*/ +static const opus_int16 freq_table_Q16[ 27 ] = { + 12111, 9804, 8235, 7100, 6239, 5565, 5022, 4575, 4202, + 3885, 3612, 3375, 3167, 2984, 2820, 2674, 2542, 2422, + 2313, 2214, 2123, 2038, 1961, 1889, 1822, 1760, 1702, +}; + +void silk_apply_sine_window( + opus_int16 px_win[], /* O Pointer to windowed signal */ + const opus_int16 px[], /* I Pointer to input signal */ + const opus_int win_type, /* I Selects a window type */ + const opus_int length /* I Window length, multiple of 4 */ +) +{ + opus_int k, f_Q16, c_Q16; + opus_int32 S0_Q16, S1_Q16; + + silk_assert( win_type == 1 || win_type == 2 ); + + /* Length must be in a range from 16 to 120 and a multiple of 4 */ + silk_assert( length >= 16 && length <= 120 ); + silk_assert( ( length & 3 ) == 0 ); + + /* Frequency */ + k = ( length >> 2 ) - 4; + silk_assert( k >= 0 && k <= 26 ); + f_Q16 = (opus_int)freq_table_Q16[ k ]; + + /* Factor used for cosine approximation */ + c_Q16 = silk_SMULWB( (opus_int32)f_Q16, -f_Q16 ); + silk_assert( c_Q16 >= -32768 ); + + /* initialize state */ + if( win_type == 1 ) { + /* start from 0 */ + S0_Q16 = 0; + /* approximation of sin(f) */ + S1_Q16 = f_Q16 + silk_RSHIFT( length, 3 ); + } else { + /* start from 1 */ + S0_Q16 = ( (opus_int32)1 << 16 ); + /* approximation of cos(f) */ + S1_Q16 = ( (opus_int32)1 << 16 ) + silk_RSHIFT( c_Q16, 1 ) + silk_RSHIFT( length, 4 ); + } + + /* Uses the recursive equation: sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f) */ + /* 4 samples at a time */ + for( k = 0; k < length; k += 4 ) { + px_win[ k ] = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k ] ); + px_win[ k + 1 ] = (opus_int16)silk_SMULWB( S1_Q16, px[ k + 1] ); + S0_Q16 = silk_SMULWB( S1_Q16, c_Q16 ) + silk_LSHIFT( S1_Q16, 1 ) - S0_Q16 + 1; + S0_Q16 = silk_min( S0_Q16, ( (opus_int32)1 << 16 ) ); + + px_win[ k + 2 ] = (opus_int16)silk_SMULWB( silk_RSHIFT( S0_Q16 + S1_Q16, 1 ), px[ k + 2] ); + px_win[ k + 3 ] = (opus_int16)silk_SMULWB( S0_Q16, px[ k + 3 ] ); + S1_Q16 = silk_SMULWB( S0_Q16, c_Q16 ) + silk_LSHIFT( S0_Q16, 1 ) - S1_Q16; + S1_Q16 = silk_min( S1_Q16, ( (opus_int32)1 << 16 ) ); + } +} diff --git a/drivers/opus/silk/fixed/autocorr_FIX.c b/drivers/opus/silk/fixed/autocorr_FIX.c new file mode 100644 index 0000000000..438b42f85b --- /dev/null +++ b/drivers/opus/silk/fixed/autocorr_FIX.c @@ -0,0 +1,48 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "celt_lpc.h" + +/* Compute autocorrelation */ +void silk_autocorr( + opus_int32 *results, /* O Result (length correlationCount) */ + opus_int *scale, /* O Scaling of the correlation vector */ + const opus_int16 *inputData, /* I Input data to correlate */ + const opus_int inputDataSize, /* I Length of input */ + const opus_int correlationCount, /* I Number of correlation taps to compute */ + int arch /* I Run-time architecture */ +) +{ + opus_int corrCount; + corrCount = silk_min_int( inputDataSize, correlationCount ); + *scale = _celt_autocorr(inputData, results, NULL, 0, corrCount-1, inputDataSize, arch); +} diff --git a/drivers/opus/silk/fixed/burg_modified_FIX.c b/drivers/opus/silk/fixed/burg_modified_FIX.c new file mode 100644 index 0000000000..ce2a560e6d --- /dev/null +++ b/drivers/opus/silk/fixed/burg_modified_FIX.c @@ -0,0 +1,279 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "define.h" +#include "tuning_parameters.h" +#include "pitch.h" + +#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ + +#define QA 25 +#define N_BITS_HEAD_ROOM 2 +#define MIN_RSHIFTS -16 +#define MAX_RSHIFTS (32 - QA) + +/* Compute reflection coefficients from input signal */ +void silk_burg_modified( + opus_int32 *res_nrg, /* O Residual energy */ + opus_int *res_nrg_Q, /* O Residual energy Q value */ + opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ + const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ + const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ + const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ + const opus_int nb_subfr, /* I Number of subframes stacked in x */ + const opus_int D, /* I Order */ + int arch /* I Run-time architecture */ +) +{ + opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; + opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; + const opus_int16 *x_ptr; + opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; + opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; + opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; + opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; + opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; + opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; + + silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); + + /* Compute autocorrelations, added over subframes */ + silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); + if( rshifts > MAX_RSHIFTS ) { + C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); + silk_assert( C0 > 0 ); + rshifts = MAX_RSHIFTS; + } else { + lz = silk_CLZ32( C0 ) - 1; + rshifts_extra = N_BITS_HEAD_ROOM - lz; + if( rshifts_extra > 0 ) { + rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); + C0 = silk_RSHIFT32( C0, rshifts_extra ); + } else { + rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); + C0 = silk_LSHIFT32( C0, -rshifts_extra ); + } + rshifts += rshifts_extra; + } + CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ + silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); + if( rshifts > 0 ) { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + for( n = 1; n < D + 1; n++ ) { + C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( + silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n ), rshifts ); + } + } + } else { + for( s = 0; s < nb_subfr; s++ ) { + int i; + opus_int32 d; + x_ptr = x + s * subfr_length; + celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); + for( n = 1; n < D + 1; n++ ) { + for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) + d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); + xcorr[ n - 1 ] += d; + } + for( n = 1; n < D + 1; n++ ) { + C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); + } + } + } + silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); + + /* Initialize */ + CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ + + invGain_Q30 = (opus_int32)1 << 30; + reached_max_gain = 0; + for( n = 0; n < D; n++ ) { + /* Update first row of correlation matrix (without first element) */ + /* Update last row of correlation matrix (without last element, stored in reversed order) */ + /* Update C * Af */ + /* Update C * flipud(Af) (stored in reversed order) */ + if( rshifts > -2 ) { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ + x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */ + tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ + tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ + for( k = 0; k < n; k++ ) { + C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ + C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ + Atmp_QA = Af_QA[ k ]; + tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ + tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ + } + tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ + tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ + for( k = 0; k <= n; k++ ) { + CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ + CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ + } + } + } else { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ + x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ + tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ + tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ + for( k = 0; k < n; k++ ) { + C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ + C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ + Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ + tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ + tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ + } + tmp1 = -tmp1; /* Q17 */ + tmp2 = -tmp2; /* Q17 */ + for( k = 0; k <= n; k++ ) { + CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, + silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ + CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, + silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ + } + } + } + + /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ + tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ + tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ + num = 0; /* Q( -rshifts ) */ + nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ + for( k = 0; k < n; k++ ) { + Atmp_QA = Af_QA[ k ]; + lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; + lz = silk_min( 32 - QA, lz ); + Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ + + tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ + tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ + num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ + nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), + Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ + } + CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ + CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ + num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ + num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ + + /* Calculate the next order reflection (parcor) coefficient */ + if( silk_abs( num ) < nrg ) { + rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); + } else { + rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; + } + + /* Update inverse prediction gain */ + tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); + tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); + if( tmp1 <= minInvGain_Q30 ) { + /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ + tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ + rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ + /* Newton-Raphson iteration */ + rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ + rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ + if( num < 0 ) { + /* Ensure adjusted reflection coefficients has the original sign */ + rc_Q31 = -rc_Q31; + } + invGain_Q30 = minInvGain_Q30; + reached_max_gain = 1; + } else { + invGain_Q30 = tmp1; + } + + /* Update the AR coefficients */ + for( k = 0; k < (n + 1) >> 1; k++ ) { + tmp1 = Af_QA[ k ]; /* QA */ + tmp2 = Af_QA[ n - k - 1 ]; /* QA */ + Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ + Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ + } + Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ + + if( reached_max_gain ) { + /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ + for( k = n + 1; k < D; k++ ) { + Af_QA[ k ] = 0; + } + break; + } + + /* Update C * Af and C * Ab */ + for( k = 0; k <= n + 1; k++ ) { + tmp1 = CAf[ k ]; /* Q( -rshifts ) */ + tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ + CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ + CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ + } + } + + if( reached_max_gain ) { + for( k = 0; k < D; k++ ) { + /* Scale coefficients */ + A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); + } + /* Subtract energy of preceding samples from C0 */ + if( rshifts > 0 ) { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D ), rshifts ); + } + } else { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D ), -rshifts ); + } + } + /* Approximate residual energy */ + *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); + *res_nrg_Q = -rshifts; + } else { + /* Return residual energy */ + nrg = CAf[ 0 ]; /* Q( -rshifts ) */ + tmp1 = (opus_int32)1 << 16; /* Q16 */ + for( k = 0; k < D; k++ ) { + Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ + nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ + tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ + A_Q16[ k ] = -Atmp1; + } + *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ + *res_nrg_Q = -rshifts; + } +} diff --git a/drivers/opus/silk/fixed/corrMatrix_FIX.c b/drivers/opus/silk/fixed/corrMatrix_FIX.c new file mode 100644 index 0000000000..28543fc204 --- /dev/null +++ b/drivers/opus/silk/fixed/corrMatrix_FIX.c @@ -0,0 +1,156 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/********************************************************************** + * Correlation Matrix Computations for LS estimate. + **********************************************************************/ + +#include "main_FIX.h" + +/* Calculates correlation vector X'*t */ +void silk_corrVector_FIX( + const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ + const opus_int16 *t, /* I Target vector [L] */ + const opus_int L, /* I Length of vectors */ + const opus_int order, /* I Max lag for correlation */ + opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */ + const opus_int rshifts /* I Right shifts of correlations */ +) +{ + opus_int lag, i; + const opus_int16 *ptr1, *ptr2; + opus_int32 inner_prod; + + ptr1 = &x[ order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */ + ptr2 = t; + /* Calculate X'*t */ + if( rshifts > 0 ) { + /* Right shifting used */ + for( lag = 0; lag < order; lag++ ) { + inner_prod = 0; + for( i = 0; i < L; i++ ) { + inner_prod += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts ); + } + Xt[ lag ] = inner_prod; /* X[:,lag]'*t */ + ptr1--; /* Go to next column of X */ + } + } else { + silk_assert( rshifts == 0 ); + for( lag = 0; lag < order; lag++ ) { + Xt[ lag ] = silk_inner_prod_aligned( ptr1, ptr2, L ); /* X[:,lag]'*t */ + ptr1--; /* Go to next column of X */ + } + } +} + +/* Calculates correlation matrix X'*X */ +void silk_corrMatrix_FIX( + const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ + const opus_int L, /* I Length of vectors */ + const opus_int order, /* I Max lag for correlation */ + const opus_int head_room, /* I Desired headroom */ + opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ + opus_int *rshifts /* I/O Right shifts of correlations */ +) +{ + opus_int i, j, lag, rshifts_local, head_room_rshifts; + opus_int32 energy; + const opus_int16 *ptr1, *ptr2; + + /* Calculate energy to find shift used to fit in 32 bits */ + silk_sum_sqr_shift( &energy, &rshifts_local, x, L + order - 1 ); + /* Add shifts to get the desired head room */ + head_room_rshifts = silk_max( head_room - silk_CLZ32( energy ), 0 ); + + energy = silk_RSHIFT32( energy, head_room_rshifts ); + rshifts_local += head_room_rshifts; + + /* Calculate energy of first column (0) of X: X[:,0]'*X[:,0] */ + /* Remove contribution of first order - 1 samples */ + for( i = 0; i < order - 1; i++ ) { + energy -= silk_RSHIFT32( silk_SMULBB( x[ i ], x[ i ] ), rshifts_local ); + } + if( rshifts_local < *rshifts ) { + /* Adjust energy */ + energy = silk_RSHIFT32( energy, *rshifts - rshifts_local ); + rshifts_local = *rshifts; + } + + /* Calculate energy of remaining columns of X: X[:,j]'*X[:,j] */ + /* Fill out the diagonal of the correlation matrix */ + matrix_ptr( XX, 0, 0, order ) = energy; + ptr1 = &x[ order - 1 ]; /* First sample of column 0 of X */ + for( j = 1; j < order; j++ ) { + energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr1[ L - j ] ), rshifts_local ) ); + energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr1[ -j ] ), rshifts_local ) ); + matrix_ptr( XX, j, j, order ) = energy; + } + + ptr2 = &x[ order - 2 ]; /* First sample of column 1 of X */ + /* Calculate the remaining elements of the correlation matrix */ + if( rshifts_local > 0 ) { + /* Right shifting used */ + for( lag = 1; lag < order; lag++ ) { + /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ + energy = 0; + for( i = 0; i < L; i++ ) { + energy += silk_RSHIFT32( silk_SMULBB( ptr1[ i ], ptr2[i] ), rshifts_local ); + } + /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ + matrix_ptr( XX, lag, 0, order ) = energy; + matrix_ptr( XX, 0, lag, order ) = energy; + for( j = 1; j < ( order - lag ); j++ ) { + energy = silk_SUB32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ), rshifts_local ) ); + energy = silk_ADD32( energy, silk_RSHIFT32( silk_SMULBB( ptr1[ -j ], ptr2[ -j ] ), rshifts_local ) ); + matrix_ptr( XX, lag + j, j, order ) = energy; + matrix_ptr( XX, j, lag + j, order ) = energy; + } + ptr2--; /* Update pointer to first sample of next column (lag) in X */ + } + } else { + for( lag = 1; lag < order; lag++ ) { + /* Inner product of column 0 and column lag: X[:,0]'*X[:,lag] */ + energy = silk_inner_prod_aligned( ptr1, ptr2, L ); + matrix_ptr( XX, lag, 0, order ) = energy; + matrix_ptr( XX, 0, lag, order ) = energy; + /* Calculate remaining off diagonal: X[:,j]'*X[:,j + lag] */ + for( j = 1; j < ( order - lag ); j++ ) { + energy = silk_SUB32( energy, silk_SMULBB( ptr1[ L - j ], ptr2[ L - j ] ) ); + energy = silk_SMLABB( energy, ptr1[ -j ], ptr2[ -j ] ); + matrix_ptr( XX, lag + j, j, order ) = energy; + matrix_ptr( XX, j, lag + j, order ) = energy; + } + ptr2--;/* Update pointer to first sample of next column (lag) in X */ + } + } + *rshifts = rshifts_local; +} + diff --git a/drivers/opus/silk/fixed/encode_frame_FIX.c b/drivers/opus/silk/fixed/encode_frame_FIX.c new file mode 100644 index 0000000000..2d80ca3583 --- /dev/null +++ b/drivers/opus/silk/fixed/encode_frame_FIX.c @@ -0,0 +1,385 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ +static OPUS_INLINE void silk_LBRR_encode_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ + silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ + const opus_int32 xfw_Q3[], /* I Input signal */ + opus_int condCoding /* I The type of conditional coding used so far for this frame */ +); + +void silk_encode_do_VAD_FIX( + silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ +) +{ + /****************************/ + /* Voice Activity Detection */ + /****************************/ + silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 ); + + /**************************************************/ + /* Convert speech activity into VAD and DTX flags */ + /**************************************************/ + if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { + psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; + psEnc->sCmn.noSpeechCounter++; + if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { + psEnc->sCmn.inDTX = 0; + } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { + psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; + psEnc->sCmn.inDTX = 0; + } + psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0; + } else { + psEnc->sCmn.noSpeechCounter = 0; + psEnc->sCmn.inDTX = 0; + psEnc->sCmn.indices.signalType = TYPE_UNVOICED; + psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1; + } +} + +/****************/ +/* Encode frame */ +/****************/ +opus_int silk_encode_frame_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ + opus_int32 *pnBytesOut, /* O Pointer to number of payload bytes; */ + ec_enc *psRangeEnc, /* I/O compressor data structure */ + opus_int condCoding, /* I The type of conditional coding to use */ + opus_int maxBits, /* I If > 0: maximum number of output bits */ + opus_int useCBR /* I Flag to force constant-bitrate operation */ +) +{ + silk_encoder_control_FIX sEncCtrl; + opus_int i, iter, maxIter, found_upper, found_lower, ret = 0; + opus_int16 *x_frame; + ec_enc sRangeEnc_copy, sRangeEnc_copy2; + silk_nsq_state sNSQ_copy, sNSQ_copy2; + opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper; + opus_int32 gainsID, gainsID_lower, gainsID_upper; + opus_int16 gainMult_Q8; + opus_int16 ec_prevLagIndex_copy; + opus_int ec_prevSignalType_copy; + opus_int8 LastGainIndex_copy2; + SAVE_STACK; + + /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ + LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; + + psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3; + + /**************************************************************/ + /* Set up Input Pointers, and insert frame in input buffer */ + /*************************************************************/ + /* start of frame to encode */ + x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; + + /***************************************/ + /* Ensure smooth bandwidth transitions */ + /***************************************/ + silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); + + /*******************************************/ + /* Copy new frame to front of input buffer */ + /*******************************************/ + silk_memcpy( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length * sizeof( opus_int16 ) ); + + if( !psEnc->sCmn.prefillFlag ) { + VARDECL( opus_int32, xfw_Q3 ); + VARDECL( opus_int16, res_pitch ); + VARDECL( opus_uint8, ec_buf_copy ); + opus_int16 *res_pitch_frame; + + ALLOC( res_pitch, + psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + + psEnc->sCmn.ltp_mem_length, opus_int16 ); + /* start of pitch LPC residual frame */ + res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; + + /*****************************************/ + /* Find pitch lags, initial LPC analysis */ + /*****************************************/ + silk_find_pitch_lags_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); + + /************************/ + /* Noise shape analysis */ + /************************/ + silk_noise_shape_analysis_FIX( psEnc, &sEncCtrl, res_pitch_frame, x_frame, psEnc->sCmn.arch ); + + /***************************************************/ + /* Find linear prediction coefficients (LPC + LTP) */ + /***************************************************/ + silk_find_pred_coefs_FIX( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); + + /****************************************/ + /* Process gains */ + /****************************************/ + silk_process_gains_FIX( psEnc, &sEncCtrl, condCoding ); + + /*****************************************/ + /* Prefiltering for noise shaper */ + /*****************************************/ + ALLOC( xfw_Q3, psEnc->sCmn.frame_length, opus_int32 ); + silk_prefilter_FIX( psEnc, &sEncCtrl, xfw_Q3, x_frame ); + + /****************************************/ + /* Low Bitrate Redundant Encoding */ + /****************************************/ + silk_LBRR_encode_FIX( psEnc, &sEncCtrl, xfw_Q3, condCoding ); + + /* Loop over quantizer and entropy coding to control bitrate */ + maxIter = 6; + gainMult_Q8 = SILK_FIX_CONST( 1, 8 ); + found_lower = 0; + found_upper = 0; + gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); + gainsID_lower = -1; + gainsID_upper = -1; + /* Copy part of the input state */ + silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) ); + silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); + seed_copy = psEnc->sCmn.indices.Seed; + ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex; + ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType; + ALLOC( ec_buf_copy, 1275, opus_uint8 ); + for( iter = 0; ; iter++ ) { + if( gainsID == gainsID_lower ) { + nBits = nBits_lower; + } else if( gainsID == gainsID_upper ) { + nBits = nBits_upper; + } else { + /* Restore part of the input state */ + if( iter > 0 ) { + silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) ); + silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) ); + psEnc->sCmn.indices.Seed = seed_copy; + psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; + psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; + } + + /*****************************************/ + /* Noise shaping quantization */ + /*****************************************/ + if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { + silk_NSQ_del_dec( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, + sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, + sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 ); + } else { + silk_NSQ( &psEnc->sCmn, &psEnc->sCmn.sNSQ, &psEnc->sCmn.indices, xfw_Q3, psEnc->sCmn.pulses, + sEncCtrl.PredCoef_Q12[ 0 ], sEncCtrl.LTPCoef_Q14, sEncCtrl.AR2_Q13, sEncCtrl.HarmShapeGain_Q14, + sEncCtrl.Tilt_Q14, sEncCtrl.LF_shp_Q14, sEncCtrl.Gains_Q16, sEncCtrl.pitchL, sEncCtrl.Lambda_Q10, sEncCtrl.LTP_scale_Q14 ); + } + + /****************************************/ + /* Encode Parameters */ + /****************************************/ + silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); + + /****************************************/ + /* Encode Excitation Signal */ + /****************************************/ + silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, + psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); + + nBits = ec_tell( psRangeEnc ); + + if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { + break; + } + } + + if( iter == maxIter ) { + if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { + /* Restore output state from earlier iteration that did meet the bitrate budget */ + silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); + silk_assert( sRangeEnc_copy2.offs <= 1275 ); + silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); + silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); + psEnc->sShape.LastGainIndex = LastGainIndex_copy2; + } + break; + } + + if( nBits > maxBits ) { + if( found_lower == 0 && iter >= 2 ) { + /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ + sEncCtrl.Lambda_Q10 = silk_ADD_RSHIFT32( sEncCtrl.Lambda_Q10, sEncCtrl.Lambda_Q10, 1 ); + found_upper = 0; + gainsID_upper = -1; + } else { + found_upper = 1; + nBits_upper = nBits; + gainMult_upper = gainMult_Q8; + gainsID_upper = gainsID; + } + } else if( nBits < maxBits - 5 ) { + found_lower = 1; + nBits_lower = nBits; + gainMult_lower = gainMult_Q8; + if( gainsID != gainsID_lower ) { + gainsID_lower = gainsID; + /* Copy part of the output state */ + silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); + silk_assert( psRangeEnc->offs <= 1275 ); + silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); + silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); + LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; + } + } else { + /* Within 5 bits of budget: close enough */ + break; + } + + if( ( found_lower & found_upper ) == 0 ) { + /* Adjust gain according to high-rate rate/distortion curve */ + opus_int32 gain_factor_Q16; + gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); + gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); + if( nBits > maxBits ) { + gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); + } + gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); + } else { + /* Adjust gain by interpolating */ + gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower ); + /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */ + if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) { + gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ); + } else + if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) { + gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ); + } + } + + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + sEncCtrl.Gains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); + } + + /* Quantize gains */ + psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; + silk_gains_quant( psEnc->sCmn.indices.GainsIndices, sEncCtrl.Gains_Q16, + &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); + + /* Unique identifier of gains vector */ + gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); + } + } + + /* Update input buffer */ + silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], + ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( opus_int16 ) ); + + /* Exit without entropy coding */ + if( psEnc->sCmn.prefillFlag ) { + /* No payload */ + *pnBytesOut = 0; + RESTORE_STACK; + return ret; + } + + /* Parameters needed for next frame */ + psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; + psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; + + /****************************************/ + /* Finalize payload */ + /****************************************/ + psEnc->sCmn.first_frame_after_reset = 0; + /* Payload size */ + *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); + + RESTORE_STACK; + return ret; +} + +/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ +static OPUS_INLINE void silk_LBRR_encode_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ + silk_encoder_control_FIX *psEncCtrl, /* I/O Pointer to Silk FIX encoder control struct */ + const opus_int32 xfw_Q3[], /* I Input signal */ + opus_int condCoding /* I The type of conditional coding used so far for this frame */ +) +{ + opus_int32 TempGains_Q16[ MAX_NB_SUBFR ]; + SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ]; + silk_nsq_state sNSQ_LBRR; + + /*******************************************/ + /* Control use of inband LBRR */ + /*******************************************/ + if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) { + psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1; + + /* Copy noise shaping quantizer state and quantization indices from regular encoding */ + silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); + silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) ); + + /* Save original gains */ + silk_memcpy( TempGains_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); + + if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) { + /* First frame in packet or previous frame not LBRR coded */ + psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex; + + /* Increase Gains to get target LBRR rate */ + psIndices_LBRR->GainsIndices[ 0 ] = psIndices_LBRR->GainsIndices[ 0 ] + psEnc->sCmn.LBRR_GainIncreases; + psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 ); + } + + /* Decode to get gains in sync with decoder */ + /* Overwrite unquantized gains with quantized gains */ + silk_gains_dequant( psEncCtrl->Gains_Q16, psIndices_LBRR->GainsIndices, + &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); + + /*****************************************/ + /* Noise shaping quantization */ + /*****************************************/ + if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { + silk_NSQ_del_dec( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, + psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, + psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, + psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 ); + } else { + silk_NSQ( &psEnc->sCmn, &sNSQ_LBRR, psIndices_LBRR, xfw_Q3, + psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], psEncCtrl->PredCoef_Q12[ 0 ], psEncCtrl->LTPCoef_Q14, + psEncCtrl->AR2_Q13, psEncCtrl->HarmShapeGain_Q14, psEncCtrl->Tilt_Q14, psEncCtrl->LF_shp_Q14, + psEncCtrl->Gains_Q16, psEncCtrl->pitchL, psEncCtrl->Lambda_Q10, psEncCtrl->LTP_scale_Q14 ); + } + + /* Restore original gains */ + silk_memcpy( psEncCtrl->Gains_Q16, TempGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); + } +} diff --git a/drivers/opus/silk/fixed/find_LPC_FIX.c b/drivers/opus/silk/fixed/find_LPC_FIX.c new file mode 100644 index 0000000000..a46cdb7515 --- /dev/null +++ b/drivers/opus/silk/fixed/find_LPC_FIX.c @@ -0,0 +1,151 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +/* Finds LPC vector from correlations, and converts to NLSF */ +void silk_find_LPC_FIX( + silk_encoder_state *psEncC, /* I/O Encoder state */ + opus_int16 NLSF_Q15[], /* O NLSFs */ + const opus_int16 x[], /* I Input signal */ + const opus_int32 minInvGain_Q30 /* I Inverse of max prediction gain */ +) +{ + opus_int k, subfr_length; + opus_int32 a_Q16[ MAX_LPC_ORDER ]; + opus_int isInterpLower, shift; + opus_int32 res_nrg0, res_nrg1; + opus_int rshift0, rshift1; + + /* Used only for LSF interpolation */ + opus_int32 a_tmp_Q16[ MAX_LPC_ORDER ], res_nrg_interp, res_nrg, res_tmp_nrg; + opus_int res_nrg_interp_Q, res_nrg_Q, res_tmp_nrg_Q; + opus_int16 a_tmp_Q12[ MAX_LPC_ORDER ]; + opus_int16 NLSF0_Q15[ MAX_LPC_ORDER ]; + SAVE_STACK; + + subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder; + + /* Default: no interpolation */ + psEncC->indices.NLSFInterpCoef_Q2 = 4; + + /* Burg AR analysis for the full frame */ + silk_burg_modified( &res_nrg, &res_nrg_Q, a_Q16, x, minInvGain_Q30, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, psEncC->arch ); + + if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) { + VARDECL( opus_int16, LPC_res ); + + /* Optimal solution for last 10 ms */ + silk_burg_modified( &res_tmp_nrg, &res_tmp_nrg_Q, a_tmp_Q16, x + 2 * subfr_length, minInvGain_Q30, subfr_length, 2, psEncC->predictLPCOrder, psEncC->arch ); + + /* subtract residual energy here, as that's easier than adding it to the */ + /* residual energy of the first 10 ms in each iteration of the search below */ + shift = res_tmp_nrg_Q - res_nrg_Q; + if( shift >= 0 ) { + if( shift < 32 ) { + res_nrg = res_nrg - silk_RSHIFT( res_tmp_nrg, shift ); + } + } else { + silk_assert( shift > -32 ); + res_nrg = silk_RSHIFT( res_nrg, -shift ) - res_tmp_nrg; + res_nrg_Q = res_tmp_nrg_Q; + } + + /* Convert to NLSFs */ + silk_A2NLSF( NLSF_Q15, a_tmp_Q16, psEncC->predictLPCOrder ); + + ALLOC( LPC_res, 2 * subfr_length, opus_int16 ); + + /* Search over interpolation indices to find the one with lowest residual energy */ + for( k = 3; k >= 0; k-- ) { + /* Interpolate NLSFs for first half */ + silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder ); + + /* Convert to LPC for residual energy evaluation */ + silk_NLSF2A( a_tmp_Q12, NLSF0_Q15, psEncC->predictLPCOrder ); + + /* Calculate residual energy with NLSF interpolation */ + silk_LPC_analysis_filter( LPC_res, x, a_tmp_Q12, 2 * subfr_length, psEncC->predictLPCOrder ); + + silk_sum_sqr_shift( &res_nrg0, &rshift0, LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder ); + silk_sum_sqr_shift( &res_nrg1, &rshift1, LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ); + + /* Add subframe energies from first half frame */ + shift = rshift0 - rshift1; + if( shift >= 0 ) { + res_nrg1 = silk_RSHIFT( res_nrg1, shift ); + res_nrg_interp_Q = -rshift0; + } else { + res_nrg0 = silk_RSHIFT( res_nrg0, -shift ); + res_nrg_interp_Q = -rshift1; + } + res_nrg_interp = silk_ADD32( res_nrg0, res_nrg1 ); + + /* Compare with first half energy without NLSF interpolation, or best interpolated value so far */ + shift = res_nrg_interp_Q - res_nrg_Q; + if( shift >= 0 ) { + if( silk_RSHIFT( res_nrg_interp, shift ) < res_nrg ) { + isInterpLower = silk_TRUE; + } else { + isInterpLower = silk_FALSE; + } + } else { + if( -shift < 32 ) { + if( res_nrg_interp < silk_RSHIFT( res_nrg, -shift ) ) { + isInterpLower = silk_TRUE; + } else { + isInterpLower = silk_FALSE; + } + } else { + isInterpLower = silk_FALSE; + } + } + + /* Determine whether current interpolated NLSFs are best so far */ + if( isInterpLower == silk_TRUE ) { + /* Interpolation has lower residual energy */ + res_nrg = res_nrg_interp; + res_nrg_Q = res_nrg_interp_Q; + psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k; + } + } + } + + if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) { + /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */ + silk_A2NLSF( NLSF_Q15, a_Q16, psEncC->predictLPCOrder ); + } + + silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); + RESTORE_STACK; +} diff --git a/drivers/opus/silk/fixed/find_LTP_FIX.c b/drivers/opus/silk/fixed/find_LTP_FIX.c new file mode 100644 index 0000000000..a1d152eee4 --- /dev/null +++ b/drivers/opus/silk/fixed/find_LTP_FIX.c @@ -0,0 +1,244 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" +#include "tuning_parameters.h" + +/* Head room for correlations */ +#define LTP_CORRS_HEAD_ROOM 2 + +void silk_fit_LTP( + opus_int32 LTP_coefs_Q16[ LTP_ORDER ], + opus_int16 LTP_coefs_Q14[ LTP_ORDER ] +); + +void silk_find_LTP_FIX( + opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ + opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ + opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ + const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ + const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ + const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ + const opus_int subfr_length, /* I subframe length */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int mem_offset, /* I number of samples in LTP memory */ + opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */ +) +{ + opus_int i, k, lshift; + const opus_int16 *r_ptr, *lag_ptr; + opus_int16 *b_Q14_ptr; + + opus_int32 regu; + opus_int32 *WLTP_ptr; + opus_int32 b_Q16[ LTP_ORDER ], delta_b_Q14[ LTP_ORDER ], d_Q14[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], g_Q26; + opus_int32 w[ MAX_NB_SUBFR ], WLTP_max, max_abs_d_Q14, max_w_bits; + + opus_int32 temp32, denom32; + opus_int extra_shifts; + opus_int rr_shifts, maxRshifts, maxRshifts_wxtra, LZs; + opus_int32 LPC_res_nrg, LPC_LTP_res_nrg, div_Q16; + opus_int32 Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; + opus_int32 wd, m_Q12; + + b_Q14_ptr = b_Q14; + WLTP_ptr = WLTP; + r_ptr = &r_lpc[ mem_offset ]; + for( k = 0; k < nb_subfr; k++ ) { + lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); + + silk_sum_sqr_shift( &rr[ k ], &rr_shifts, r_ptr, subfr_length ); /* rr[ k ] in Q( -rr_shifts ) */ + + /* Assure headroom */ + LZs = silk_CLZ32( rr[k] ); + if( LZs < LTP_CORRS_HEAD_ROOM ) { + rr[ k ] = silk_RSHIFT_ROUND( rr[ k ], LTP_CORRS_HEAD_ROOM - LZs ); + rr_shifts += ( LTP_CORRS_HEAD_ROOM - LZs ); + } + corr_rshifts[ k ] = rr_shifts; + silk_corrMatrix_FIX( lag_ptr, subfr_length, LTP_ORDER, LTP_CORRS_HEAD_ROOM, WLTP_ptr, &corr_rshifts[ k ] ); /* WLTP_fix_ptr in Q( -corr_rshifts[ k ] ) */ + + /* The correlation vector always has lower max abs value than rr and/or RR so head room is assured */ + silk_corrVector_FIX( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr, corr_rshifts[ k ] ); /* Rr_fix_ptr in Q( -corr_rshifts[ k ] ) */ + if( corr_rshifts[ k ] > rr_shifts ) { + rr[ k ] = silk_RSHIFT( rr[ k ], corr_rshifts[ k ] - rr_shifts ); /* rr[ k ] in Q( -corr_rshifts[ k ] ) */ + } + silk_assert( rr[ k ] >= 0 ); + + regu = 1; + regu = silk_SMLAWB( regu, rr[ k ], SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); + regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); + regu = silk_SMLAWB( regu, matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ), SILK_FIX_CONST( LTP_DAMPING/3, 16 ) ); + silk_regularize_correlations_FIX( WLTP_ptr, &rr[k], regu, LTP_ORDER ); + + silk_solve_LDL_FIX( WLTP_ptr, LTP_ORDER, Rr, b_Q16 ); /* WLTP_fix_ptr and Rr_fix_ptr both in Q(-corr_rshifts[k]) */ + + /* Limit and store in Q14 */ + silk_fit_LTP( b_Q16, b_Q14_ptr ); + + /* Calculate residual energy */ + nrg[ k ] = silk_residual_energy16_covar_FIX( b_Q14_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER, 14 ); /* nrg_fix in Q( -corr_rshifts[ k ] ) */ + + /* temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); */ + extra_shifts = silk_min_int( corr_rshifts[ k ], LTP_CORRS_HEAD_ROOM ); + denom32 = silk_LSHIFT_SAT32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 + extra_shifts ) + /* Q( -corr_rshifts[ k ] + extra_shifts ) */ + silk_RSHIFT( silk_SMULWB( (opus_int32)subfr_length, 655 ), corr_rshifts[ k ] - extra_shifts ); /* Q( -corr_rshifts[ k ] + extra_shifts ) */ + denom32 = silk_max( denom32, 1 ); + silk_assert( ((opus_int64)Wght_Q15[ k ] << 16 ) < silk_int32_MAX ); /* Wght always < 0.5 in Q0 */ + temp32 = silk_DIV32( silk_LSHIFT( (opus_int32)Wght_Q15[ k ], 16 ), denom32 ); /* Q( 15 + 16 + corr_rshifts[k] - extra_shifts ) */ + temp32 = silk_RSHIFT( temp32, 31 + corr_rshifts[ k ] - extra_shifts - 26 ); /* Q26 */ + + /* Limit temp such that the below scaling never wraps around */ + WLTP_max = 0; + for( i = 0; i < LTP_ORDER * LTP_ORDER; i++ ) { + WLTP_max = silk_max( WLTP_ptr[ i ], WLTP_max ); + } + lshift = silk_CLZ32( WLTP_max ) - 1 - 3; /* keep 3 bits free for vq_nearest_neighbor_fix */ + silk_assert( 26 - 18 + lshift >= 0 ); + if( 26 - 18 + lshift < 31 ) { + temp32 = silk_min_32( temp32, silk_LSHIFT( (opus_int32)1, 26 - 18 + lshift ) ); + } + + silk_scale_vector32_Q26_lshift_18( WLTP_ptr, temp32, LTP_ORDER * LTP_ORDER ); /* WLTP_ptr in Q( 18 - corr_rshifts[ k ] ) */ + + w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER/2, LTP_ORDER/2, LTP_ORDER ); /* w in Q( 18 - corr_rshifts[ k ] ) */ + silk_assert( w[k] >= 0 ); + + r_ptr += subfr_length; + b_Q14_ptr += LTP_ORDER; + WLTP_ptr += LTP_ORDER * LTP_ORDER; + } + + maxRshifts = 0; + for( k = 0; k < nb_subfr; k++ ) { + maxRshifts = silk_max_int( corr_rshifts[ k ], maxRshifts ); + } + + /* Compute LTP coding gain */ + if( LTPredCodGain_Q7 != NULL ) { + LPC_LTP_res_nrg = 0; + LPC_res_nrg = 0; + silk_assert( LTP_CORRS_HEAD_ROOM >= 2 ); /* Check that no overflow will happen when adding */ + for( k = 0; k < nb_subfr; k++ ) { + LPC_res_nrg = silk_ADD32( LPC_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( rr[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ + LPC_LTP_res_nrg = silk_ADD32( LPC_LTP_res_nrg, silk_RSHIFT( silk_ADD32( silk_SMULWB( nrg[ k ], Wght_Q15[ k ] ), 1 ), 1 + ( maxRshifts - corr_rshifts[ k ] ) ) ); /* Q( -maxRshifts ) */ + } + LPC_LTP_res_nrg = silk_max( LPC_LTP_res_nrg, 1 ); /* avoid division by zero */ + + div_Q16 = silk_DIV32_varQ( LPC_res_nrg, LPC_LTP_res_nrg, 16 ); + *LTPredCodGain_Q7 = ( opus_int )silk_SMULBB( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ); + + silk_assert( *LTPredCodGain_Q7 == ( opus_int )silk_SAT16( silk_MUL( 3, silk_lin2log( div_Q16 ) - ( 16 << 7 ) ) ) ); + } + + /* smoothing */ + /* d = sum( B, 1 ); */ + b_Q14_ptr = b_Q14; + for( k = 0; k < nb_subfr; k++ ) { + d_Q14[ k ] = 0; + for( i = 0; i < LTP_ORDER; i++ ) { + d_Q14[ k ] += b_Q14_ptr[ i ]; + } + b_Q14_ptr += LTP_ORDER; + } + + /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ + + /* Find maximum absolute value of d_Q14 and the bits used by w in Q0 */ + max_abs_d_Q14 = 0; + max_w_bits = 0; + for( k = 0; k < nb_subfr; k++ ) { + max_abs_d_Q14 = silk_max_32( max_abs_d_Q14, silk_abs( d_Q14[ k ] ) ); + /* w[ k ] is in Q( 18 - corr_rshifts[ k ] ) */ + /* Find bits needed in Q( 18 - maxRshifts ) */ + max_w_bits = silk_max_32( max_w_bits, 32 - silk_CLZ32( w[ k ] ) + corr_rshifts[ k ] - maxRshifts ); + } + + /* max_abs_d_Q14 = (5 << 15); worst case, i.e. LTP_ORDER * -silk_int16_MIN */ + silk_assert( max_abs_d_Q14 <= ( 5 << 15 ) ); + + /* How many bits is needed for w*d' in Q( 18 - maxRshifts ) in the worst case, of all d_Q14's being equal to max_abs_d_Q14 */ + extra_shifts = max_w_bits + 32 - silk_CLZ32( max_abs_d_Q14 ) - 14; + + /* Subtract what we got available; bits in output var plus maxRshifts */ + extra_shifts -= ( 32 - 1 - 2 + maxRshifts ); /* Keep sign bit free as well as 2 bits for accumulation */ + extra_shifts = silk_max_int( extra_shifts, 0 ); + + maxRshifts_wxtra = maxRshifts + extra_shifts; + + temp32 = silk_RSHIFT( 262, maxRshifts + extra_shifts ) + 1; /* 1e-3f in Q( 18 - (maxRshifts + extra_shifts) ) */ + wd = 0; + for( k = 0; k < nb_subfr; k++ ) { + /* w has at least 2 bits of headroom so no overflow should happen */ + temp32 = silk_ADD32( temp32, silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ) ); /* Q( 18 - maxRshifts_wxtra ) */ + wd = silk_ADD32( wd, silk_LSHIFT( silk_SMULWW( silk_RSHIFT( w[ k ], maxRshifts_wxtra - corr_rshifts[ k ] ), d_Q14[ k ] ), 2 ) ); /* Q( 18 - maxRshifts_wxtra ) */ + } + m_Q12 = silk_DIV32_varQ( wd, temp32, 12 ); + + b_Q14_ptr = b_Q14; + for( k = 0; k < nb_subfr; k++ ) { + /* w_fix[ k ] from Q( 18 - corr_rshifts[ k ] ) to Q( 16 ) */ + if( 2 - corr_rshifts[k] > 0 ) { + temp32 = silk_RSHIFT( w[ k ], 2 - corr_rshifts[ k ] ); + } else { + temp32 = silk_LSHIFT_SAT32( w[ k ], corr_rshifts[ k ] - 2 ); + } + + g_Q26 = silk_MUL( + silk_DIV32( + SILK_FIX_CONST( LTP_SMOOTHING, 26 ), + silk_RSHIFT( SILK_FIX_CONST( LTP_SMOOTHING, 26 ), 10 ) + temp32 ), /* Q10 */ + silk_LSHIFT_SAT32( silk_SUB_SAT32( (opus_int32)m_Q12, silk_RSHIFT( d_Q14[ k ], 2 ) ), 4 ) ); /* Q16 */ + + temp32 = 0; + for( i = 0; i < LTP_ORDER; i++ ) { + delta_b_Q14[ i ] = silk_max_16( b_Q14_ptr[ i ], 1638 ); /* 1638_Q14 = 0.1_Q0 */ + temp32 += delta_b_Q14[ i ]; /* Q14 */ + } + temp32 = silk_DIV32( g_Q26, temp32 ); /* Q14 -> Q12 */ + for( i = 0; i < LTP_ORDER; i++ ) { + b_Q14_ptr[ i ] = silk_LIMIT_32( (opus_int32)b_Q14_ptr[ i ] + silk_SMULWB( silk_LSHIFT_SAT32( temp32, 4 ), delta_b_Q14[ i ] ), -16000, 28000 ); + } + b_Q14_ptr += LTP_ORDER; + } +} + +void silk_fit_LTP( + opus_int32 LTP_coefs_Q16[ LTP_ORDER ], + opus_int16 LTP_coefs_Q14[ LTP_ORDER ] +) +{ + opus_int i; + + for( i = 0; i < LTP_ORDER; i++ ) { + LTP_coefs_Q14[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( LTP_coefs_Q16[ i ], 2 ) ); + } +} diff --git a/drivers/opus/silk/fixed/find_pitch_lags_FIX.c b/drivers/opus/silk/fixed/find_pitch_lags_FIX.c new file mode 100644 index 0000000000..0598477cd1 --- /dev/null +++ b/drivers/opus/silk/fixed/find_pitch_lags_FIX.c @@ -0,0 +1,145 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +/* Find pitch lags */ +void silk_find_pitch_lags_FIX( + silk_encoder_state_FIX *psEnc, /* I/O encoder state */ + silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ + opus_int16 res[], /* O residual */ + const opus_int16 x[], /* I Speech signal */ + int arch /* I Run-time architecture */ +) +{ + opus_int buf_len, i, scale; + opus_int32 thrhld_Q13, res_nrg; + const opus_int16 *x_buf, *x_buf_ptr; + VARDECL( opus_int16, Wsig ); + opus_int16 *Wsig_ptr; + opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; + opus_int16 rc_Q15[ MAX_FIND_PITCH_LPC_ORDER ]; + opus_int32 A_Q24[ MAX_FIND_PITCH_LPC_ORDER ]; + opus_int16 A_Q12[ MAX_FIND_PITCH_LPC_ORDER ]; + SAVE_STACK; + + /******************************************/ + /* Set up buffer lengths etc based on Fs */ + /******************************************/ + buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length; + + /* Safety check */ + silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); + + x_buf = x - psEnc->sCmn.ltp_mem_length; + + /*************************************/ + /* Estimate LPC AR coefficients */ + /*************************************/ + + /* Calculate windowed signal */ + + ALLOC( Wsig, psEnc->sCmn.pitch_LPC_win_length, opus_int16 ); + + /* First LA_LTP samples */ + x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length; + Wsig_ptr = Wsig; + silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch ); + + /* Middle un - windowed samples */ + Wsig_ptr += psEnc->sCmn.la_pitch; + x_buf_ptr += psEnc->sCmn.la_pitch; + silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ) ) * sizeof( opus_int16 ) ); + + /* Last LA_LTP samples */ + Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); + x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - silk_LSHIFT( psEnc->sCmn.la_pitch, 1 ); + silk_apply_sine_window( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); + + /* Calculate autocorrelation sequence */ + silk_autocorr( auto_corr, &scale, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch ); + + /* Add white noise, as fraction of energy */ + auto_corr[ 0 ] = silk_SMLAWB( auto_corr[ 0 ], auto_corr[ 0 ], SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ) + 1; + + /* Calculate the reflection coefficients using schur */ + res_nrg = silk_schur( rc_Q15, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder ); + + /* Prediction gain */ + psEncCtrl->predGain_Q16 = silk_DIV32_varQ( auto_corr[ 0 ], silk_max_int( res_nrg, 1 ), 16 ); + + /* Convert reflection coefficients to prediction coefficients */ + silk_k2a( A_Q24, rc_Q15, psEnc->sCmn.pitchEstimationLPCOrder ); + + /* Convert From 32 bit Q24 to 16 bit Q12 coefs */ + for( i = 0; i < psEnc->sCmn.pitchEstimationLPCOrder; i++ ) { + A_Q12[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT( A_Q24[ i ], 12 ) ); + } + + /* Do BWE */ + silk_bwexpander( A_Q12, psEnc->sCmn.pitchEstimationLPCOrder, SILK_FIX_CONST( FIND_PITCH_BANDWIDTH_EXPANSION, 16 ) ); + + /*****************************************/ + /* LPC analysis filtering */ + /*****************************************/ + silk_LPC_analysis_filter( res, x_buf, A_Q12, buf_len, psEnc->sCmn.pitchEstimationLPCOrder ); + + if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) { + /* Threshold for pitch estimator */ + thrhld_Q13 = SILK_FIX_CONST( 0.6, 13 ); + thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.004, 13 ), psEnc->sCmn.pitchEstimationLPCOrder ); + thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 21 ), psEnc->sCmn.speech_activity_Q8 ); + thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.15, 13 ), silk_RSHIFT( psEnc->sCmn.prevSignalType, 1 ) ); + thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1, 14 ), psEnc->sCmn.input_tilt_Q15 ); + thrhld_Q13 = silk_SAT16( thrhld_Q13 ); + + /*****************************************/ + /* Call pitch estimator */ + /*****************************************/ + if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex, + &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16, + (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, + psEnc->sCmn.arch) == 0 ) + { + psEnc->sCmn.indices.signalType = TYPE_VOICED; + } else { + psEnc->sCmn.indices.signalType = TYPE_UNVOICED; + } + } else { + silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) ); + psEnc->sCmn.indices.lagIndex = 0; + psEnc->sCmn.indices.contourIndex = 0; + psEnc->LTPCorr_Q15 = 0; + } + RESTORE_STACK; +} diff --git a/drivers/opus/silk/fixed/find_pred_coefs_FIX.c b/drivers/opus/silk/fixed/find_pred_coefs_FIX.c new file mode 100644 index 0000000000..0ab70df09d --- /dev/null +++ b/drivers/opus/silk/fixed/find_pred_coefs_FIX.c @@ -0,0 +1,147 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" + +void silk_find_pred_coefs_FIX( + silk_encoder_state_FIX *psEnc, /* I/O encoder state */ + silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ + const opus_int16 res_pitch[], /* I Residual from pitch analysis */ + const opus_int16 x[], /* I Speech signal */ + opus_int condCoding /* I The type of conditional coding to use */ +) +{ + opus_int i; + opus_int32 invGains_Q16[ MAX_NB_SUBFR ], local_gains[ MAX_NB_SUBFR ], Wght_Q15[ MAX_NB_SUBFR ]; + opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; + const opus_int16 *x_ptr; + opus_int16 *x_pre_ptr; + VARDECL( opus_int16, LPC_in_pre ); + opus_int32 tmp, min_gain_Q16, minInvGain_Q30; + opus_int LTP_corrs_rshift[ MAX_NB_SUBFR ]; + SAVE_STACK; + + /* weighting for weighted least squares */ + min_gain_Q16 = silk_int32_MAX >> 6; + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + min_gain_Q16 = silk_min( min_gain_Q16, psEncCtrl->Gains_Q16[ i ] ); + } + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + /* Divide to Q16 */ + silk_assert( psEncCtrl->Gains_Q16[ i ] > 0 ); + /* Invert and normalize gains, and ensure that maximum invGains_Q16 is within range of a 16 bit int */ + invGains_Q16[ i ] = silk_DIV32_varQ( min_gain_Q16, psEncCtrl->Gains_Q16[ i ], 16 - 2 ); + + /* Ensure Wght_Q15 a minimum value 1 */ + invGains_Q16[ i ] = silk_max( invGains_Q16[ i ], 363 ); + + /* Square the inverted gains */ + silk_assert( invGains_Q16[ i ] == silk_SAT16( invGains_Q16[ i ] ) ); + tmp = silk_SMULWB( invGains_Q16[ i ], invGains_Q16[ i ] ); + Wght_Q15[ i ] = silk_RSHIFT( tmp, 1 ); + + /* Invert the inverted and normalized gains */ + local_gains[ i ] = silk_DIV32( ( (opus_int32)1 << 16 ), invGains_Q16[ i ] ); + } + + ALLOC( LPC_in_pre, + psEnc->sCmn.nb_subfr * psEnc->sCmn.predictLPCOrder + + psEnc->sCmn.frame_length, opus_int16 ); + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + VARDECL( opus_int32, WLTP ); + + /**********/ + /* VOICED */ + /**********/ + silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); + + ALLOC( WLTP, psEnc->sCmn.nb_subfr * LTP_ORDER * LTP_ORDER, opus_int32 ); + + /* LTP analysis */ + silk_find_LTP_FIX( psEncCtrl->LTPCoef_Q14, WLTP, &psEncCtrl->LTPredCodGain_Q7, + res_pitch, psEncCtrl->pitchL, Wght_Q15, psEnc->sCmn.subfr_length, + psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length, LTP_corrs_rshift ); + + /* Quantize LTP gain parameters */ + silk_quant_LTP_gains( psEncCtrl->LTPCoef_Q14, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, + &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr); + + /* Control LTP scaling */ + silk_LTP_scale_ctrl_FIX( psEnc, psEncCtrl, condCoding ); + + /* Create LTP residual */ + silk_LTP_analysis_filter_FIX( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef_Q14, + psEncCtrl->pitchL, invGains_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); + + } else { + /************/ + /* UNVOICED */ + /************/ + /* Create signal with prepended subframes, scaled by inverse gains */ + x_ptr = x - psEnc->sCmn.predictLPCOrder; + x_pre_ptr = LPC_in_pre; + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + silk_scale_copy_vector16( x_pre_ptr, x_ptr, invGains_Q16[ i ], + psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder ); + x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder; + x_ptr += psEnc->sCmn.subfr_length; + } + + silk_memset( psEncCtrl->LTPCoef_Q14, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( opus_int16 ) ); + psEncCtrl->LTPredCodGain_Q7 = 0; + psEnc->sCmn.sum_log_gain_Q7 = 0; + } + + /* Limit on total predictive coding gain */ + if( psEnc->sCmn.first_frame_after_reset ) { + minInvGain_Q30 = SILK_FIX_CONST( 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET, 30 ); + } else { + minInvGain_Q30 = silk_log2lin( silk_SMLAWB( 16 << 7, (opus_int32)psEncCtrl->LTPredCodGain_Q7, SILK_FIX_CONST( 1.0 / 3, 16 ) ) ); /* Q16 */ + minInvGain_Q30 = silk_DIV32_varQ( minInvGain_Q30, + silk_SMULWW( SILK_FIX_CONST( MAX_PREDICTION_POWER_GAIN, 0 ), + silk_SMLAWB( SILK_FIX_CONST( 0.25, 18 ), SILK_FIX_CONST( 0.75, 18 ), psEncCtrl->coding_quality_Q14 ) ), 14 ); + } + + /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */ + silk_find_LPC_FIX( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain_Q30 ); + + /* Quantize LSFs */ + silk_process_NLSFs( &psEnc->sCmn, psEncCtrl->PredCoef_Q12, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 ); + + /* Calculate residual energy using quantized LPC coefficients */ + silk_residual_energy_FIX( psEncCtrl->ResNrg, psEncCtrl->ResNrgQ, LPC_in_pre, psEncCtrl->PredCoef_Q12, local_gains, + psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); + + /* Copy to prediction struct for use in next frame for interpolation */ + silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); + RESTORE_STACK; +} diff --git a/drivers/opus/silk/fixed/k2a_FIX.c b/drivers/opus/silk/fixed/k2a_FIX.c new file mode 100644 index 0000000000..848666ee3b --- /dev/null +++ b/drivers/opus/silk/fixed/k2a_FIX.c @@ -0,0 +1,53 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Step up function, converts reflection coefficients to prediction coefficients */ +void silk_k2a( + opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ + const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */ + const opus_int32 order /* I Prediction order */ +) +{ + opus_int k, n; + opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; + + for( k = 0; k < order; k++ ) { + for( n = 0; n < k; n++ ) { + Atmp[ n ] = A_Q24[ n ]; + } + for( n = 0; n < k; n++ ) { + A_Q24[ n ] = silk_SMLAWB( A_Q24[ n ], silk_LSHIFT( Atmp[ k - n - 1 ], 1 ), rc_Q15[ k ] ); + } + A_Q24[ k ] = -silk_LSHIFT( (opus_int32)rc_Q15[ k ], 9 ); + } +} diff --git a/drivers/opus/silk/fixed/k2a_Q16_FIX.c b/drivers/opus/silk/fixed/k2a_Q16_FIX.c new file mode 100644 index 0000000000..f7e62e95fe --- /dev/null +++ b/drivers/opus/silk/fixed/k2a_Q16_FIX.c @@ -0,0 +1,53 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Step up function, converts reflection coefficients to prediction coefficients */ +void silk_k2a_Q16( + opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ + const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */ + const opus_int32 order /* I Prediction order */ +) +{ + opus_int k, n; + opus_int32 Atmp[ SILK_MAX_ORDER_LPC ]; + + for( k = 0; k < order; k++ ) { + for( n = 0; n < k; n++ ) { + Atmp[ n ] = A_Q24[ n ]; + } + for( n = 0; n < k; n++ ) { + A_Q24[ n ] = silk_SMLAWW( A_Q24[ n ], Atmp[ k - n - 1 ], rc_Q16[ k ] ); + } + A_Q24[ k ] = -silk_LSHIFT( rc_Q16[ k ], 8 ); + } +} diff --git a/drivers/opus/silk/fixed/main_FIX.h b/drivers/opus/silk/fixed/main_FIX.h new file mode 100644 index 0000000000..fb47ffe700 --- /dev/null +++ b/drivers/opus/silk/fixed/main_FIX.h @@ -0,0 +1,257 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_MAIN_FIX_H +#define SILK_MAIN_FIX_H + +#include "SigProc_FIX.h" +#include "structs_FIX.h" +#include "control.h" +#include "silk_main.h" +#include "PLC.h" +#include "debug.h" +#include "entenc.h" + +#ifndef FORCE_CPP_BUILD +#ifdef __cplusplus +extern "C" +{ +#endif +#endif + +#define silk_encoder_state_Fxx silk_encoder_state_FIX +#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FIX +#define silk_encode_frame_Fxx silk_encode_frame_FIX + +/*********************/ +/* Encoder Functions */ +/*********************/ + +/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ +void silk_HP_variable_cutoff( + silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */ +); + +/* Encoder main function */ +void silk_encode_do_VAD_FIX( + silk_encoder_state_FIX *psEnc /* I/O Pointer to Silk FIX encoder state */ +); + +/* Encoder main function */ +opus_int silk_encode_frame_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Pointer to Silk FIX encoder state */ + opus_int32 *pnBytesOut, /* O Pointer to number of payload bytes; */ + ec_enc *psRangeEnc, /* I/O compressor data structure */ + opus_int condCoding, /* I The type of conditional coding to use */ + opus_int maxBits, /* I If > 0: maximum number of output bits */ + opus_int useCBR /* I Flag to force constant-bitrate operation */ +); + +/* Initializes the Silk encoder state */ +opus_int silk_init_encoder( + silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */ + int arch /* I Run-time architecture */ +); + +/* Control the Silk encoder */ +opus_int silk_control_encoder( + silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk encoder state */ + silk_EncControlStruct *encControl, /* I Control structure */ + const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ + const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ + const opus_int channelNb, /* I Channel number */ + const opus_int force_fs_kHz +); + +/****************/ +/* Prefiltering */ +/****************/ +void silk_prefilter_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ + const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */ + opus_int32 xw_Q10[], /* O Weighted signal */ + const opus_int16 x[] /* I Speech signal */ +); + +/**************************/ +/* Noise shaping analysis */ +/**************************/ +/* Compute noise shaping coefficients and initial gain values */ +void silk_noise_shape_analysis_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ + silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ + const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ + const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ + int arch /* I Run-time architecture */ +); + +/* Autocorrelations for a warped frequency axis */ +void silk_warped_autocorrelation_FIX( + opus_int32 *corr, /* O Result [order + 1] */ + opus_int *scale, /* O Scaling of the correlation vector */ + const opus_int16 *input, /* I Input data to correlate */ + const opus_int warping_Q16, /* I Warping coefficient */ + const opus_int length, /* I Length of input */ + const opus_int order /* I Correlation order (even) */ +); + +/* Calculation of LTP state scaling */ +void silk_LTP_scale_ctrl_FIX( + silk_encoder_state_FIX *psEnc, /* I/O encoder state */ + silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ + opus_int condCoding /* I The type of conditional coding to use */ +); + +/**********************************************/ +/* Prediction Analysis */ +/**********************************************/ +/* Find pitch lags */ +void silk_find_pitch_lags_FIX( + silk_encoder_state_FIX *psEnc, /* I/O encoder state */ + silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ + opus_int16 res[], /* O residual */ + const opus_int16 x[], /* I Speech signal */ + int arch /* I Run-time architecture */ +); + +/* Find LPC and LTP coefficients */ +void silk_find_pred_coefs_FIX( + silk_encoder_state_FIX *psEnc, /* I/O encoder state */ + silk_encoder_control_FIX *psEncCtrl, /* I/O encoder control */ + const opus_int16 res_pitch[], /* I Residual from pitch analysis */ + const opus_int16 x[], /* I Speech signal */ + opus_int condCoding /* I The type of conditional coding to use */ +); + +/* LPC analysis */ +void silk_find_LPC_FIX( + silk_encoder_state *psEncC, /* I/O Encoder state */ + opus_int16 NLSF_Q15[], /* O NLSFs */ + const opus_int16 x[], /* I Input signal */ + const opus_int32 minInvGain_Q30 /* I Inverse of max prediction gain */ +); + +/* LTP analysis */ +void silk_find_LTP_FIX( + opus_int16 b_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ + opus_int32 WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ + opus_int *LTPredCodGain_Q7, /* O LTP coding gain */ + const opus_int16 r_lpc[], /* I residual signal after LPC signal + state for first 10 ms */ + const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ + const opus_int32 Wght_Q15[ MAX_NB_SUBFR ], /* I weights */ + const opus_int subfr_length, /* I subframe length */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int mem_offset, /* I number of samples in LTP memory */ + opus_int corr_rshifts[ MAX_NB_SUBFR ] /* O right shifts applied to correlations */ +); + +void silk_LTP_analysis_filter_FIX( + opus_int16 *LTP_res, /* O LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length ) */ + const opus_int16 *x, /* I Pointer to input signal with at least max( pitchL ) preceding samples */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],/* I LTP_ORDER LTP coefficients for each MAX_NB_SUBFR subframe */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lag, one for each subframe */ + const opus_int32 invGains_Q16[ MAX_NB_SUBFR ], /* I Inverse quantization gains, one for each subframe */ + const opus_int subfr_length, /* I Length of each subframe */ + const opus_int nb_subfr, /* I Number of subframes */ + const opus_int pre_length /* I Length of the preceding samples starting at &x[0] for each subframe */ +); + +/* Calculates residual energies of input subframes where all subframes have LPC_order */ +/* of preceding samples */ +void silk_residual_energy_FIX( + opus_int32 nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ + opus_int nrgsQ[ MAX_NB_SUBFR ], /* O Q value per subframe */ + const opus_int16 x[], /* I Input signal */ + opus_int16 a_Q12[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ + const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */ + const opus_int subfr_length, /* I Subframe length */ + const opus_int nb_subfr, /* I Number of subframes */ + const opus_int LPC_order /* I LPC order */ +); + +/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ +opus_int32 silk_residual_energy16_covar_FIX( + const opus_int16 *c, /* I Prediction vector */ + const opus_int32 *wXX, /* I Correlation matrix */ + const opus_int32 *wXx, /* I Correlation vector */ + opus_int32 wxx, /* I Signal energy */ + opus_int D, /* I Dimension */ + opus_int cQ /* I Q value for c vector 0 - 15 */ +); + +/* Processing of gains */ +void silk_process_gains_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ + silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */ + opus_int condCoding /* I The type of conditional coding to use */ +); + +/******************/ +/* Linear Algebra */ +/******************/ +/* Calculates correlation matrix X'*X */ +void silk_corrMatrix_FIX( + const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ + const opus_int L, /* I Length of vectors */ + const opus_int order, /* I Max lag for correlation */ + const opus_int head_room, /* I Desired headroom */ + opus_int32 *XX, /* O Pointer to X'*X correlation matrix [ order x order ] */ + opus_int *rshifts /* I/O Right shifts of correlations */ +); + +/* Calculates correlation vector X'*t */ +void silk_corrVector_FIX( + const opus_int16 *x, /* I x vector [L + order - 1] used to form data matrix X */ + const opus_int16 *t, /* I Target vector [L] */ + const opus_int L, /* I Length of vectors */ + const opus_int order, /* I Max lag for correlation */ + opus_int32 *Xt, /* O Pointer to X'*t correlation vector [order] */ + const opus_int rshifts /* I Right shifts of correlations */ +); + +/* Add noise to matrix diagonal */ +void silk_regularize_correlations_FIX( + opus_int32 *XX, /* I/O Correlation matrices */ + opus_int32 *xx, /* I/O Correlation values */ + opus_int32 noise, /* I Noise to add */ + opus_int D /* I Dimension of XX */ +); + +/* Solves Ax = b, assuming A is symmetric */ +void silk_solve_LDL_FIX( + opus_int32 *A, /* I Pointer to symetric square matrix A */ + opus_int M, /* I Size of matrix */ + const opus_int32 *b, /* I Pointer to b vector */ + opus_int32 *x_Q16 /* O Pointer to x solution vector */ +); + +#ifndef FORCE_CPP_BUILD +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* FORCE_CPP_BUILD */ +#endif /* SILK_MAIN_FIX_H */ diff --git a/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c b/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c new file mode 100644 index 0000000000..420cbeedfc --- /dev/null +++ b/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c @@ -0,0 +1,445 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */ +/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */ +/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */ +/* coefficient in an array of coefficients, for monic filters. */ +static OPUS_INLINE opus_int32 warped_gain( /* gain in Q16*/ + const opus_int32 *coefs_Q24, + opus_int lambda_Q16, + opus_int order +) { + opus_int i; + opus_int32 gain_Q24; + + lambda_Q16 = -lambda_Q16; + gain_Q24 = coefs_Q24[ order - 1 ]; + for( i = order - 2; i >= 0; i-- ) { + gain_Q24 = silk_SMLAWB( coefs_Q24[ i ], gain_Q24, lambda_Q16 ); + } + gain_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), gain_Q24, -lambda_Q16 ); + return silk_INVERSE32_varQ( gain_Q24, 40 ); +} + +/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ +/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ +static OPUS_INLINE void limit_warped_coefs( + opus_int32 *coefs_syn_Q24, + opus_int32 *coefs_ana_Q24, + opus_int lambda_Q16, + opus_int32 limit_Q24, + opus_int order +) { + opus_int i, iter, ind = 0; + opus_int32 tmp, maxabs_Q24, chirp_Q16, gain_syn_Q16, gain_ana_Q16; + opus_int32 nom_Q16, den_Q24; + + /* Convert to monic coefficients */ + lambda_Q16 = -lambda_Q16; + for( i = order - 1; i > 0; i-- ) { + coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); + coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); + } + lambda_Q16 = -lambda_Q16; + nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 ); + gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 ); + gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + for( i = 0; i < order; i++ ) { + coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); + coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); + } + + for( iter = 0; iter < 10; iter++ ) { + /* Find maximum absolute value */ + maxabs_Q24 = -1; + for( i = 0; i < order; i++ ) { + tmp = silk_max( silk_abs_int32( coefs_syn_Q24[ i ] ), silk_abs_int32( coefs_ana_Q24[ i ] ) ); + if( tmp > maxabs_Q24 ) { + maxabs_Q24 = tmp; + ind = i; + } + } + if( maxabs_Q24 <= limit_Q24 ) { + /* Coefficients are within range - done */ + return; + } + + /* Convert back to true warped coefficients */ + for( i = 1; i < order; i++ ) { + coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); + coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); + } + gain_syn_Q16 = silk_INVERSE32_varQ( gain_syn_Q16, 32 ); + gain_ana_Q16 = silk_INVERSE32_varQ( gain_ana_Q16, 32 ); + for( i = 0; i < order; i++ ) { + coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); + coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); + } + + /* Apply bandwidth expansion */ + chirp_Q16 = SILK_FIX_CONST( 0.99, 16 ) - silk_DIV32_varQ( + silk_SMULWB( maxabs_Q24 - limit_Q24, silk_SMLABB( SILK_FIX_CONST( 0.8, 10 ), SILK_FIX_CONST( 0.1, 10 ), iter ) ), + silk_MUL( maxabs_Q24, ind + 1 ), 22 ); + silk_bwexpander_32( coefs_syn_Q24, order, chirp_Q16 ); + silk_bwexpander_32( coefs_ana_Q24, order, chirp_Q16 ); + + /* Convert to monic warped coefficients */ + lambda_Q16 = -lambda_Q16; + for( i = order - 1; i > 0; i-- ) { + coefs_syn_Q24[ i - 1 ] = silk_SMLAWB( coefs_syn_Q24[ i - 1 ], coefs_syn_Q24[ i ], lambda_Q16 ); + coefs_ana_Q24[ i - 1 ] = silk_SMLAWB( coefs_ana_Q24[ i - 1 ], coefs_ana_Q24[ i ], lambda_Q16 ); + } + lambda_Q16 = -lambda_Q16; + nom_Q16 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 16 ), -(opus_int32)lambda_Q16, lambda_Q16 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_syn_Q24[ 0 ], lambda_Q16 ); + gain_syn_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + den_Q24 = silk_SMLAWB( SILK_FIX_CONST( 1.0, 24 ), coefs_ana_Q24[ 0 ], lambda_Q16 ); + gain_ana_Q16 = silk_DIV32_varQ( nom_Q16, den_Q24, 24 ); + for( i = 0; i < order; i++ ) { + coefs_syn_Q24[ i ] = silk_SMULWW( gain_syn_Q16, coefs_syn_Q24[ i ] ); + coefs_ana_Q24[ i ] = silk_SMULWW( gain_ana_Q16, coefs_ana_Q24[ i ] ); + } + } + silk_assert( 0 ); +} + +/**************************************************************/ +/* Compute noise shaping coefficients and initial gain values */ +/**************************************************************/ +void silk_noise_shape_analysis_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state FIX */ + silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control FIX */ + const opus_int16 *pitch_res, /* I LPC residual from pitch analysis */ + const opus_int16 *x, /* I Input signal [ frame_length + la_shape ] */ + int arch /* I Run-time architecture */ +) +{ + silk_shape_state_FIX *psShapeSt = &psEnc->sShape; + opus_int k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0; + opus_int32 SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32; + opus_int32 nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7; + opus_int32 delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8; + opus_int32 auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; + opus_int32 refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 AR1_Q24[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 AR2_Q24[ MAX_SHAPE_LPC_ORDER ]; + VARDECL( opus_int16, x_windowed ); + const opus_int16 *x_ptr, *pitch_res_ptr; + SAVE_STACK; + + /* Point to start of first LPC analysis block */ + x_ptr = x - psEnc->sCmn.la_shape; + + /****************/ + /* GAIN CONTROL */ + /****************/ + SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7; + + /* Input quality is the average of the quality in the lowest two VAD bands */ + psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ] + + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 ); + + /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */ + psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 - + SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 ); + + /* Reduce coding SNR during low speech activity */ + if( psEnc->sCmn.useCBR == 0 ) { + b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8; + b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 ); + SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, + silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ), /* Q11*/ + silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) ); /* Q12*/ + } + + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* Reduce gains for periodic signals */ + SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 ); + } else { + /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ + SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, + silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ), + SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 ); + } + + /*************************/ + /* SPARSENESS PROCESSING */ + /*************************/ + /* Set quantizer offset */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* Initially set to 0; may be overruled in process_gains(..) */ + psEnc->sCmn.indices.quantOffsetType = 0; + psEncCtrl->sparseness_Q8 = 0; + } else { + /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ + nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 ); + energy_variation_Q7 = 0; + log_energy_prev_Q7 = 0; + pitch_res_ptr = pitch_res; + for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { + silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples ); + nrg += silk_RSHIFT( nSamples, scale ); /* Q(-scale)*/ + + log_energy_Q7 = silk_lin2log( nrg ); + if( k > 0 ) { + energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 ); + } + log_energy_prev_Q7 = log_energy_Q7; + pitch_res_ptr += nSamples; + } + + psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 - + SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 ); + + /* Set quantization offset depending on sparseness measure */ + if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) { + psEnc->sCmn.indices.quantOffsetType = 0; + } else { + psEnc->sCmn.indices.quantOffsetType = 1; + } + + /* Increase coding SNR for sparse signals */ + SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) ); + } + + /*******************************/ + /* Control bandwidth expansion */ + /*******************************/ + /* More BWE for signals with high prediction gain */ + strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) ); + BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ), + silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 ); + delta_Q16 = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ), + SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) ); + BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 ); + BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 ); + /* BWExp1 will be applied after BWExp2, so make it relative */ + BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) ); + + if( psEnc->sCmn.warping_Q16 > 0 ) { + /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ + warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) ); + } else { + warping_Q16 = 0; + } + + /********************************************/ + /* Compute noise shaping AR coefs and gains */ + /********************************************/ + ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + /* Apply window: sine slope followed by flat part followed by cosine slope */ + opus_int shift, slope_part, flat_part; + flat_part = psEnc->sCmn.fs_kHz * 3; + slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 ); + + silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part ); + shift = slope_part; + silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) ); + shift += flat_part; + silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part ); + + /* Update pointer: next LPC analysis block */ + x_ptr += psEnc->sCmn.subfr_length; + + if( psEnc->sCmn.warping_Q16 > 0 ) { + /* Calculate warped auto correlation */ + silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); + } else { + /* Calculate regular auto correlation */ + silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch ); + } + + /* Add white noise, as a fraction of energy */ + auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ), + SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) ); + + /* Calculate the reflection coefficients using schur */ + nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder ); + silk_assert( nrg >= 0 ); + + /* Convert reflection coefficients to prediction coefficients */ + silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder ); + + Qnrg = -scale; /* range: -12...30*/ + silk_assert( Qnrg >= -12 ); + silk_assert( Qnrg <= 30 ); + + /* Make sure that Qnrg is an even number */ + if( Qnrg & 1 ) { + Qnrg -= 1; + nrg >>= 1; + } + + tmp32 = silk_SQRT_APPROX( nrg ); + Qnrg >>= 1; /* range: -6...15*/ + + psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( tmp32, 16 - Qnrg ); + + if( psEnc->sCmn.warping_Q16 > 0 ) { + /* Adjust gain for warping */ + gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder ); + silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); + if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) { + psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX; + } else { + psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); + } + } + + /* Bandwidth expansion for synthesis filter shaping */ + silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 ); + + /* Compute noise shaping filter coefficients */ + silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) ); + + /* Bandwidth expansion for analysis filter shaping */ + silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) ); + silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 ); + + /* Ratio of prediction gains, in energy domain */ + pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder ); + nrg = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder ); + + /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/ + pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 ); + psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 ); + + /* Convert to monic warped prediction coefficients and limit absolute values */ + limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder ); + + /* Convert from Q24 to Q13 and store in int16 */ + for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) { + psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) ); + psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) ); + } + } + + /*****************/ + /* Gain tweaking */ + /*****************/ + /* Increase gains during low speech activity and put lower limit on gains */ + gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) ); + gain_add_Q16 = silk_log2lin( silk_SMLAWB( SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) ); + silk_assert( gain_mult_Q16 > 0 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 ); + silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 ); + psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 ); + } + + gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ), + psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] ); + } + + /************************************************/ + /* Control low-frequency shaping and noise tilt */ + /************************************************/ + /* Less low frequency shaping for noisy inputs */ + strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ), + SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) ); + strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 ); + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ + /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ + opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] ); + /* Pack two coefficients in one int32 */ + psEncCtrl->LF_shp_Q14[ k ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 ); + psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); + } + silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/ + Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) - + silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ), + silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) ); + } else { + b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/ + /* Pack two coefficients in one int32 */ + psEncCtrl->LF_shp_Q14[ 0 ] = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - + silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 ); + psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) ); + for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ]; + } + Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 ); + } + + /****************************/ + /* HARMONIC SHAPING CONTROL */ + /****************************/ + /* Control boosting of harmonic frequencies */ + HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ), + psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) ); + + /* More harmonic boost for noisy input signals */ + HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16, + SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) ); + + if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* More harmonic noise shaping for high bitrates or noisy input */ + HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ), + SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ), + psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) ); + + /* Less harmonic noise shaping for less periodic signals */ + HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ), + silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) ); + } else { + HarmShapeGain_Q16 = 0; + } + + /*************************/ + /* Smooth over subframes */ + /*************************/ + for( k = 0; k < MAX_NB_SUBFR; k++ ) { + psShapeSt->HarmBoost_smth_Q16 = + silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16, HarmBoost_Q16 - psShapeSt->HarmBoost_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); + psShapeSt->HarmShapeGain_smth_Q16 = + silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); + psShapeSt->Tilt_smth_Q16 = + silk_SMLAWB( psShapeSt->Tilt_smth_Q16, Tilt_Q16 - psShapeSt->Tilt_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) ); + + psEncCtrl->HarmBoost_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16, 2 ); + psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 ); + psEncCtrl->Tilt_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16, 2 ); + } + RESTORE_STACK; +} diff --git a/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c b/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c new file mode 100644 index 0000000000..4d65c09d1d --- /dev/null +++ b/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c @@ -0,0 +1,744 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/*********************************************************** +* Pitch analyser function +********************************************************** */ +#include "SigProc_FIX.h" +#include "pitch_est_defines.h" +#include "stack_alloc.h" +#include "debug.h" +#include "pitch.h" + +#define SCRATCH_SIZE 22 +#define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 ) +#define SF_LENGTH_8KHZ ( PE_SUBFR_LENGTH_MS * 8 ) +#define MIN_LAG_4KHZ ( PE_MIN_LAG_MS * 4 ) +#define MIN_LAG_8KHZ ( PE_MIN_LAG_MS * 8 ) +#define MAX_LAG_4KHZ ( PE_MAX_LAG_MS * 4 ) +#define MAX_LAG_8KHZ ( PE_MAX_LAG_MS * 8 - 1 ) +#define CSTRIDE_4KHZ ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ ) +#define CSTRIDE_8KHZ ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) ) +#define D_COMP_MIN ( MIN_LAG_8KHZ - 3 ) +#define D_COMP_MAX ( MAX_LAG_8KHZ + 4 ) +#define D_COMP_STRIDE ( D_COMP_MAX - D_COMP_MIN ) + +typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ]; + +/************************************************************/ +/* Internally used functions */ +/************************************************************/ +static void silk_P_Ana_calc_corr_st3( + silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */ + const opus_int16 frame[], /* I vector to correlate */ + opus_int start_lag, /* I lag offset to search around */ + opus_int sf_length, /* I length of a 5 ms subframe */ + opus_int nb_subfr, /* I number of subframes */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ +); + +static void silk_P_Ana_calc_energy_st3( + silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */ + const opus_int16 frame[], /* I vector to calc energy in */ + opus_int start_lag, /* I lag offset to search around */ + opus_int sf_length, /* I length of one 5 ms subframe */ + opus_int nb_subfr, /* I number of subframes */ + opus_int complexity /* I Complexity setting */ +); + +/*************************************************************/ +/* FIXED POINT CORE PITCH ANALYSIS FUNCTION */ +/*************************************************************/ +opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ + const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ + opus_int *pitch_out, /* O 4 pitch lag values */ + opus_int16 *lagIndex, /* O Lag Index */ + opus_int8 *contourIndex, /* O Pitch contour Index */ + opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ + opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ + const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ + const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ + const opus_int Fs_kHz, /* I Sample frequency (kHz) */ + const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ + const opus_int nb_subfr, /* I number of 5 ms subframes */ + int arch /* I Run-time architecture */ +) +{ + VARDECL( opus_int16, frame_8kHz ); + VARDECL( opus_int16, frame_4kHz ); + opus_int32 filt_state[ 6 ]; + const opus_int16 *input_frame_ptr; + opus_int i, k, d, j; + VARDECL( opus_int16, C ); + VARDECL( opus_int32, xcorr32 ); + const opus_int16 *target_ptr, *basis_ptr; + opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target; + opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp; + VARDECL( opus_int16, d_comp ); + opus_int32 sum, threshold, lag_counter; + opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new; + opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new; + VARDECL( silk_pe_stage3_vals, energies_st3 ); + VARDECL( silk_pe_stage3_vals, cross_corr_st3 ); + opus_int frame_length, frame_length_8kHz, frame_length_4kHz; + opus_int sf_length; + opus_int min_lag; + opus_int max_lag; + opus_int32 contour_bias_Q15, diff; + opus_int nb_cbk_search, cbk_size; + opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13; + const opus_int8 *Lag_CB_ptr; + SAVE_STACK; + /* Check for valid sampling frequency */ + silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); + + /* Check for valid complexity setting */ + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); + + silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) ); + silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) ); + + /* Set up frame lengths max / min lag for the sampling frequency */ + frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz; + frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4; + frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8; + sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz; + min_lag = PE_MIN_LAG_MS * Fs_kHz; + max_lag = PE_MAX_LAG_MS * Fs_kHz - 1; + + /* Resample from input sampled at Fs_kHz to 8 kHz */ + ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 ); + if( Fs_kHz == 16 ) { + silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); + silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length ); + } else if( Fs_kHz == 12 ) { + silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) ); + silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length ); + } else { + silk_assert( Fs_kHz == 8 ); + silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) ); + } + + /* Decimate again to 4 kHz */ + silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */ + ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 ); + silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz ); + + /* Low-pass filter */ + for( i = frame_length_4kHz - 1; i > 0; i-- ) { + frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] ); + } + + /******************************************************************************* + ** Scale 4 kHz signal down to prevent correlations measures from overflowing + ** find scaling as max scaling for each 8kHz(?) subframe + *******************************************************************************/ + + /* Inner product is calculated with different lengths, so scale for the worst case */ + silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz ); + if( shift > 0 ) { + shift = silk_RSHIFT( shift, 1 ); + for( i = 0; i < frame_length_4kHz; i++ ) { + frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift ); + } + } + + /****************************************************************************** + * FIRST STAGE, operating in 4 khz + ******************************************************************************/ + ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 ); + ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 ); + silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) ); + target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ]; + for( k = 0; k < nb_subfr >> 1; k++ ) { + /* Check that we are within range of the array */ + silk_assert( target_ptr >= frame_4kHz ); + silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); + + basis_ptr = target_ptr - MIN_LAG_4KHZ; + + /* Check that we are within range of the array */ + silk_assert( basis_ptr >= frame_4kHz ); + silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); + + celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch ); + + /* Calculate first vector products before loop */ + cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ]; + normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ); + normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ) ); + normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) ); + + matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) = + (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */ + + /* From now on normalizer is computed recursively */ + for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) { + basis_ptr--; + + /* Check that we are within range of the array */ + silk_assert( basis_ptr >= frame_4kHz ); + silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz ); + + cross_corr = xcorr32[ MAX_LAG_4KHZ - d ]; + + /* Add contribution of new sample and remove contribution from oldest sample */ + normalizer = silk_ADD32( normalizer, + silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) - + silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) ); + + matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) = + (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */ + } + /* Update target pointer */ + target_ptr += SF_LENGTH_8KHZ; + } + + /* Combine two subframes into single correlation measure and apply short-lag bias */ + if( nb_subfr == PE_MAX_NB_SUBFR ) { + for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) { + sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ) + + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ); /* Q14 */ + sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */ + C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */ + } + } else { + /* Only short-lag bias */ + for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) { + sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 ); /* Q14 */ + sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */ + C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */ + } + } + + /* Sort */ + length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 ); + silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); + silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ, + length_d_srch ); + + /* Escape if correlation is very low already here */ + Cmax = (opus_int)C[ 0 ]; /* Q14 */ + if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) { + silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); + *LTPCorr_Q15 = 0; + *lagIndex = 0; + *contourIndex = 0; + RESTORE_STACK; + return 1; + } + + threshold = silk_SMULWB( search_thres1_Q16, Cmax ); + for( i = 0; i < length_d_srch; i++ ) { + /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */ + if( C[ i ] > threshold ) { + d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 ); + } else { + length_d_srch = i; + break; + } + } + silk_assert( length_d_srch > 0 ); + + ALLOC( d_comp, D_COMP_STRIDE, opus_int16 ); + for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) { + d_comp[ i - D_COMP_MIN ] = 0; + } + for( i = 0; i < length_d_srch; i++ ) { + d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1; + } + + /* Convolution */ + for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) { + d_comp[ i - D_COMP_MIN ] += + d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ]; + } + + length_d_srch = 0; + for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) { + if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) { + d_srch[ length_d_srch ] = i; + length_d_srch++; + } + } + + /* Convolution */ + for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) { + d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ] + + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ]; + } + + length_d_comp = 0; + for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) { + if( d_comp[ i - D_COMP_MIN ] > 0 ) { + d_comp[ length_d_comp ] = i - 2; + length_d_comp++; + } + } + + /********************************************************************************** + ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation + *************************************************************************************/ + + /****************************************************************************** + ** Scale signal down to avoid correlations measures from overflowing + *******************************************************************************/ + /* find scaling as max scaling for each subframe */ + silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz ); + if( shift > 0 ) { + shift = silk_RSHIFT( shift, 1 ); + for( i = 0; i < frame_length_8kHz; i++ ) { + frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift ); + } + } + + /********************************************************************************* + * Find energy of each subframe projected onto its history, for a range of delays + *********************************************************************************/ + silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) ); + + target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ]; + for( k = 0; k < nb_subfr; k++ ) { + + /* Check that we are within range of the array */ + silk_assert( target_ptr >= frame_8kHz ); + silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); + + energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 ); + for( j = 0; j < length_d_comp; j++ ) { + d = d_comp[ j ]; + basis_ptr = target_ptr - d; + + /* Check that we are within range of the array */ + silk_assert( basis_ptr >= frame_8kHz ); + silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz ); + + cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ ); + if( cross_corr > 0 ) { + energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ); + matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = + (opus_int16)silk_DIV32_varQ( cross_corr, + silk_ADD32( energy_target, + energy_basis ), + 13 + 1 ); /* Q13 */ + } else { + matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0; + } + } + target_ptr += SF_LENGTH_8KHZ; + } + + /* search over lag range and lags codebook */ + /* scale factor for lag codebook, as a function of center lag */ + + CCmax = silk_int32_MIN; + CCmax_b = silk_int32_MIN; + + CBimax = 0; /* To avoid returning undefined lag values */ + lag = -1; /* To check if lag with strong enough correlation has been found */ + + if( prevLag > 0 ) { + if( Fs_kHz == 12 ) { + prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 ); + } else if( Fs_kHz == 16 ) { + prevLag = silk_RSHIFT( prevLag, 1 ); + } + prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag ); + } else { + prevLag_log2_Q7 = 0; + } + silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) ); + /* Set up stage 2 codebook based on number of subframes */ + if( nb_subfr == PE_MAX_NB_SUBFR ) { + cbk_size = PE_NB_CBKS_STAGE2_EXT; + Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; + if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) { + /* If input is 8 khz use a larger codebook here because it is last stage */ + nb_cbk_search = PE_NB_CBKS_STAGE2_EXT; + } else { + nb_cbk_search = PE_NB_CBKS_STAGE2; + } + } else { + cbk_size = PE_NB_CBKS_STAGE2_10MS; + Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; + nb_cbk_search = PE_NB_CBKS_STAGE2_10MS; + } + + for( k = 0; k < length_d_srch; k++ ) { + d = d_srch[ k ]; + for( j = 0; j < nb_cbk_search; j++ ) { + CC[ j ] = 0; + for( i = 0; i < nb_subfr; i++ ) { + opus_int d_subfr; + /* Try all codebooks */ + d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size ); + CC[ j ] = CC[ j ] + + (opus_int32)matrix_ptr( C, i, + d_subfr - ( MIN_LAG_8KHZ - 2 ), + CSTRIDE_8KHZ ); + } + } + /* Find best codebook */ + CCmax_new = silk_int32_MIN; + CBimax_new = 0; + for( i = 0; i < nb_cbk_search; i++ ) { + if( CC[ i ] > CCmax_new ) { + CCmax_new = CC[ i ]; + CBimax_new = i; + } + } + + /* Bias towards shorter lags */ + lag_log2_Q7 = silk_lin2log( d ); /* Q7 */ + silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) ); + silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) ); + CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */ + + /* Bias towards previous lag */ + silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) ); + if( prevLag > 0 ) { + delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7; + silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) ); + delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 ); + prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */ + prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) ); + CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */ + } + + if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ + CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 ) && /* Correlation needs to be high enough to be voiced */ + silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ /* Lag must be in range */ + ) { + CCmax_b = CCmax_new_b; + CCmax = CCmax_new; + lag = d; + CBimax = CBimax_new; + } + } + + if( lag == -1 ) { + /* No suitable candidate found */ + silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); + *LTPCorr_Q15 = 0; + *lagIndex = 0; + *contourIndex = 0; + RESTORE_STACK; + return 1; + } + + /* Output normalized correlation */ + *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 ); + silk_assert( *LTPCorr_Q15 >= 0 ); + + if( Fs_kHz > 8 ) { + VARDECL( opus_int16, scratch_mem ); + /***************************************************************************/ + /* Scale input signal down to avoid correlations measures from overflowing */ + /***************************************************************************/ + /* find scaling as max scaling for each subframe */ + silk_sum_sqr_shift( &energy, &shift, frame, frame_length ); + ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 ); + if( shift > 0 ) { + /* Move signal to scratch mem because the input signal should be unchanged */ + shift = silk_RSHIFT( shift, 1 ); + for( i = 0; i < frame_length; i++ ) { + scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift ); + } + input_frame_ptr = scratch_mem; + } else { + input_frame_ptr = frame; + } + + /* Search in original signal */ + + CBimax_old = CBimax; + /* Compensate for decimation */ + silk_assert( lag == silk_SAT16( lag ) ); + if( Fs_kHz == 12 ) { + lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 ); + } else if( Fs_kHz == 16 ) { + lag = silk_LSHIFT( lag, 1 ); + } else { + lag = silk_SMULBB( lag, 3 ); + } + + lag = silk_LIMIT_int( lag, min_lag, max_lag ); + start_lag = silk_max_int( lag - 2, min_lag ); + end_lag = silk_min_int( lag + 2, max_lag ); + lag_new = lag; /* to avoid undefined lag */ + CBimax = 0; /* to avoid undefined lag */ + + CCmax = silk_int32_MIN; + /* pitch lags according to second stage */ + for( k = 0; k < nb_subfr; k++ ) { + pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ]; + } + + /* Set up codebook parameters according to complexity setting and frame length */ + if( nb_subfr == PE_MAX_NB_SUBFR ) { + nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ]; + cbk_size = PE_NB_CBKS_STAGE3_MAX; + Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; + } else { + nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; + cbk_size = PE_NB_CBKS_STAGE3_10MS; + Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; + } + + /* Calculate the correlations and energies needed in stage 3 */ + ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); + ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals ); + silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch ); + silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity ); + + lag_counter = 0; + silk_assert( lag == silk_SAT16( lag ) ); + contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag ); + + target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; + energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 ); + for( d = start_lag; d <= end_lag; d++ ) { + for( j = 0; j < nb_cbk_search; j++ ) { + cross_corr = 0; + energy = energy_target; + for( k = 0; k < nb_subfr; k++ ) { + cross_corr = silk_ADD32( cross_corr, + matrix_ptr( cross_corr_st3, k, j, + nb_cbk_search )[ lag_counter ] ); + energy = silk_ADD32( energy, + matrix_ptr( energies_st3, k, j, + nb_cbk_search )[ lag_counter ] ); + silk_assert( energy >= 0 ); + } + if( cross_corr > 0 ) { + CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 ); /* Q13 */ + /* Reduce depending on flatness of contour */ + diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j ); /* Q15 */ + silk_assert( diff == silk_SAT16( diff ) ); + CCmax_new = silk_SMULWB( CCmax_new, diff ); /* Q14 */ + } else { + CCmax_new = 0; + } + + if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) { + CCmax = CCmax_new; + lag_new = d; + CBimax = j; + } + } + lag_counter++; + } + + for( k = 0; k < nb_subfr; k++ ) { + pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); + pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz ); + } + *lagIndex = (opus_int16)( lag_new - min_lag); + *contourIndex = (opus_int8)CBimax; + } else { /* Fs_kHz == 8 */ + /* Save Lags */ + for( k = 0; k < nb_subfr; k++ ) { + pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); + pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 ); + } + *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ ); + *contourIndex = (opus_int8)CBimax; + } + silk_assert( *lagIndex >= 0 ); + /* return as voiced */ + RESTORE_STACK; + return 0; +} + +/*********************************************************************** + * Calculates the correlations used in stage 3 search. In order to cover + * the whole lag codebook for all the searched offset lags (lag +- 2), + * the following correlations are needed in each sub frame: + * + * sf1: lag range [-8,...,7] total 16 correlations + * sf2: lag range [-4,...,4] total 9 correlations + * sf3: lag range [-3,....4] total 8 correltions + * sf4: lag range [-6,....8] total 15 correlations + * + * In total 48 correlations. The direct implementation computed in worst + * case 4*12*5 = 240 correlations, but more likely around 120. + ***********************************************************************/ +static void silk_P_Ana_calc_corr_st3( + silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */ + const opus_int16 frame[], /* I vector to correlate */ + opus_int start_lag, /* I lag offset to search around */ + opus_int sf_length, /* I length of a 5 ms subframe */ + opus_int nb_subfr, /* I number of subframes */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ +) +{ + const opus_int16 *target_ptr; + opus_int i, j, k, lag_counter, lag_low, lag_high; + opus_int nb_cbk_search, delta, idx, cbk_size; + VARDECL( opus_int32, scratch_mem ); + VARDECL( opus_int32, xcorr32 ); + const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; + SAVE_STACK; + + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); + + if( nb_subfr == PE_MAX_NB_SUBFR ) { + Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; + Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; + nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; + cbk_size = PE_NB_CBKS_STAGE3_MAX; + } else { + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); + Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; + Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; + nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; + cbk_size = PE_NB_CBKS_STAGE3_10MS; + } + ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 ); + ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 ); + + target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */ + for( k = 0; k < nb_subfr; k++ ) { + lag_counter = 0; + + /* Calculate the correlations for each subframe */ + lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); + lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); + silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); + celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch ); + for( j = lag_low; j <= lag_high; j++ ) { + silk_assert( lag_counter < SCRATCH_SIZE ); + scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ]; + lag_counter++; + } + + delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); + for( i = 0; i < nb_cbk_search; i++ ) { + /* Fill out the 3 dim array that stores the correlations for */ + /* each code_book vector for each start lag */ + idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; + for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { + silk_assert( idx + j < SCRATCH_SIZE ); + silk_assert( idx + j < lag_counter ); + matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] = + scratch_mem[ idx + j ]; + } + } + target_ptr += sf_length; + } + RESTORE_STACK; +} + +/********************************************************************/ +/* Calculate the energies for first two subframes. The energies are */ +/* calculated recursively. */ +/********************************************************************/ +static void silk_P_Ana_calc_energy_st3( + silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */ + const opus_int16 frame[], /* I vector to calc energy in */ + opus_int start_lag, /* I lag offset to search around */ + opus_int sf_length, /* I length of one 5 ms subframe */ + opus_int nb_subfr, /* I number of subframes */ + opus_int complexity /* I Complexity setting */ +) +{ + const opus_int16 *target_ptr, *basis_ptr; + opus_int32 energy; + opus_int k, i, j, lag_counter; + opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff; + VARDECL( opus_int32, scratch_mem ); + const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; + SAVE_STACK; + + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); + + if( nb_subfr == PE_MAX_NB_SUBFR ) { + Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; + Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; + nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; + cbk_size = PE_NB_CBKS_STAGE3_MAX; + } else { + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); + Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; + Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; + nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; + cbk_size = PE_NB_CBKS_STAGE3_10MS; + } + ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 ); + + target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; + for( k = 0; k < nb_subfr; k++ ) { + lag_counter = 0; + + /* Calculate the energy for first lag */ + basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) ); + energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length ); + silk_assert( energy >= 0 ); + scratch_mem[ lag_counter ] = energy; + lag_counter++; + + lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 ); + for( i = 1; i < lag_diff; i++ ) { + /* remove part outside new window */ + energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] ); + silk_assert( energy >= 0 ); + + /* add part that comes into window */ + energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) ); + silk_assert( energy >= 0 ); + silk_assert( lag_counter < SCRATCH_SIZE ); + scratch_mem[ lag_counter ] = energy; + lag_counter++; + } + + delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); + for( i = 0; i < nb_cbk_search; i++ ) { + /* Fill out the 3 dim array that stores the correlations for */ + /* each code_book vector for each start lag */ + idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; + for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { + silk_assert( idx + j < SCRATCH_SIZE ); + silk_assert( idx + j < lag_counter ); + matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] = + scratch_mem[ idx + j ]; + silk_assert( + matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 ); + } + } + target_ptr += sf_length; + } + RESTORE_STACK; +} diff --git a/drivers/opus/silk/fixed/prefilter_FIX.c b/drivers/opus/silk/fixed/prefilter_FIX.c new file mode 100644 index 0000000000..0b027eb836 --- /dev/null +++ b/drivers/opus/silk/fixed/prefilter_FIX.c @@ -0,0 +1,209 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +/* Prefilter for finding Quantizer input signal */ +static OPUS_INLINE void silk_prefilt_FIX( + silk_prefilter_state_FIX *P, /* I/O state */ + opus_int32 st_res_Q12[], /* I short term residual signal */ + opus_int32 xw_Q3[], /* O prefiltered signal */ + opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ + opus_int Tilt_Q14, /* I Tilt shaping coeficient */ + opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ + opus_int lag, /* I Lag for harmonic shaping */ + opus_int length /* I Length of signals */ +); + +void silk_warped_LPC_analysis_filter_FIX( + opus_int32 state[], /* I/O State [order + 1] */ + opus_int32 res_Q2[], /* O Residual signal [length] */ + const opus_int16 coef_Q13[], /* I Coefficients [order] */ + const opus_int16 input[], /* I Input signal [length] */ + const opus_int16 lambda_Q16, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +) +{ + opus_int n, i; + opus_int32 acc_Q11, tmp1, tmp2; + + /* Order must be even */ + silk_assert( ( order & 1 ) == 0 ); + + for( n = 0; n < length; n++ ) { + /* Output of lowpass section */ + tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 ); + state[ 0 ] = silk_LSHIFT( input[ n ], 14 ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 ); + state[ 1 ] = tmp2; + acc_Q11 = silk_RSHIFT( order, 1 ); + acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] ); + /* Loop over allpass sections */ + for( i = 2; i < order; i += 2 ) { + /* Output of allpass section */ + tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 ); + state[ i ] = tmp1; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] ); + /* Output of allpass section */ + tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 ); + state[ i + 1 ] = tmp2; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] ); + } + state[ order ] = tmp1; + acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] ); + res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 ); + } +} + +void silk_prefilter_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ + const silk_encoder_control_FIX *psEncCtrl, /* I Encoder control */ + opus_int32 xw_Q3[], /* O Weighted signal */ + const opus_int16 x[] /* I Speech signal */ +) +{ + silk_prefilter_state_FIX *P = &psEnc->sPrefilt; + opus_int j, k, lag; + opus_int32 tmp_32; + const opus_int16 *AR1_shp_Q13; + const opus_int16 *px; + opus_int32 *pxw_Q3; + opus_int HarmShapeGain_Q12, Tilt_Q14; + opus_int32 HarmShapeFIRPacked_Q12, LF_shp_Q14; + VARDECL( opus_int32, x_filt_Q12 ); + VARDECL( opus_int32, st_res_Q2 ); + opus_int16 B_Q10[ 2 ]; + SAVE_STACK; + + /* Set up pointers */ + px = x; + pxw_Q3 = xw_Q3; + lag = P->lagPrev; + ALLOC( x_filt_Q12, psEnc->sCmn.subfr_length, opus_int32 ); + ALLOC( st_res_Q2, psEnc->sCmn.subfr_length, opus_int32 ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + /* Update Variables that change per sub frame */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + lag = psEncCtrl->pitchL[ k ]; + } + + /* Noise shape parameters */ + HarmShapeGain_Q12 = silk_SMULWB( (opus_int32)psEncCtrl->HarmShapeGain_Q14[ k ], 16384 - psEncCtrl->HarmBoost_Q14[ k ] ); + silk_assert( HarmShapeGain_Q12 >= 0 ); + HarmShapeFIRPacked_Q12 = silk_RSHIFT( HarmShapeGain_Q12, 2 ); + HarmShapeFIRPacked_Q12 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q12, 1 ), 16 ); + Tilt_Q14 = psEncCtrl->Tilt_Q14[ k ]; + LF_shp_Q14 = psEncCtrl->LF_shp_Q14[ k ]; + AR1_shp_Q13 = &psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER ]; + + /* Short term FIR filtering*/ + silk_warped_LPC_analysis_filter_FIX( P->sAR_shp, st_res_Q2, AR1_shp_Q13, px, + psEnc->sCmn.warping_Q16, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder ); + + /* Reduce (mainly) low frequencies during harmonic emphasis */ + B_Q10[ 0 ] = silk_RSHIFT_ROUND( psEncCtrl->GainsPre_Q14[ k ], 4 ); + tmp_32 = silk_SMLABB( SILK_FIX_CONST( INPUT_TILT, 26 ), psEncCtrl->HarmBoost_Q14[ k ], HarmShapeGain_Q12 ); /* Q26 */ + tmp_32 = silk_SMLABB( tmp_32, psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ); /* Q26 */ + tmp_32 = silk_SMULWB( tmp_32, -psEncCtrl->GainsPre_Q14[ k ] ); /* Q24 */ + tmp_32 = silk_RSHIFT_ROUND( tmp_32, 14 ); /* Q10 */ + B_Q10[ 1 ]= silk_SAT16( tmp_32 ); + x_filt_Q12[ 0 ] = silk_MLA( silk_MUL( st_res_Q2[ 0 ], B_Q10[ 0 ] ), P->sHarmHP_Q2, B_Q10[ 1 ] ); + for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) { + x_filt_Q12[ j ] = silk_MLA( silk_MUL( st_res_Q2[ j ], B_Q10[ 0 ] ), st_res_Q2[ j - 1 ], B_Q10[ 1 ] ); + } + P->sHarmHP_Q2 = st_res_Q2[ psEnc->sCmn.subfr_length - 1 ]; + + silk_prefilt_FIX( P, x_filt_Q12, pxw_Q3, HarmShapeFIRPacked_Q12, Tilt_Q14, LF_shp_Q14, lag, psEnc->sCmn.subfr_length ); + + px += psEnc->sCmn.subfr_length; + pxw_Q3 += psEnc->sCmn.subfr_length; + } + + P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; + RESTORE_STACK; +} + +/* Prefilter for finding Quantizer input signal */ +static OPUS_INLINE void silk_prefilt_FIX( + silk_prefilter_state_FIX *P, /* I/O state */ + opus_int32 st_res_Q12[], /* I short term residual signal */ + opus_int32 xw_Q3[], /* O prefiltered signal */ + opus_int32 HarmShapeFIRPacked_Q12, /* I Harmonic shaping coeficients */ + opus_int Tilt_Q14, /* I Tilt shaping coeficient */ + opus_int32 LF_shp_Q14, /* I Low-frequancy shaping coeficients */ + opus_int lag, /* I Lag for harmonic shaping */ + opus_int length /* I Length of signals */ +) +{ + opus_int i, idx, LTP_shp_buf_idx; + opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10; + opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12; + opus_int16 *LTP_shp_buf; + + /* To speed up use temp variables instead of using the struct */ + LTP_shp_buf = P->sLTP_shp; + LTP_shp_buf_idx = P->sLTP_shp_buf_idx; + sLF_AR_shp_Q12 = P->sLF_AR_shp_Q12; + sLF_MA_shp_Q12 = P->sLF_MA_shp_Q12; + + for( i = 0; i < length; i++ ) { + if( lag > 0 ) { + /* unrolled loop */ + silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); + idx = lag + LTP_shp_buf_idx; + n_LTP_Q12 = silk_SMULBB( LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 ); + } else { + n_LTP_Q12 = 0; + } + + n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 ); + n_LF_Q10 = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 ); + + sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) ); + sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12, silk_LSHIFT( n_LF_Q10, 2 ) ); + + LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; + LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) ); + + xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 ); + } + + /* Copy temp variable back to state */ + P->sLF_AR_shp_Q12 = sLF_AR_shp_Q12; + P->sLF_MA_shp_Q12 = sLF_MA_shp_Q12; + P->sLTP_shp_buf_idx = LTP_shp_buf_idx; +} diff --git a/drivers/opus/silk/fixed/process_gains_FIX.c b/drivers/opus/silk/fixed/process_gains_FIX.c new file mode 100644 index 0000000000..3a78c475bb --- /dev/null +++ b/drivers/opus/silk/fixed/process_gains_FIX.c @@ -0,0 +1,117 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" +#include "tuning_parameters.h" + +/* Processing of gains */ +void silk_process_gains_FIX( + silk_encoder_state_FIX *psEnc, /* I/O Encoder state */ + silk_encoder_control_FIX *psEncCtrl, /* I/O Encoder control */ + opus_int condCoding /* I The type of conditional coding to use */ +) +{ + silk_shape_state_FIX *psShapeSt = &psEnc->sShape; + opus_int k; + opus_int32 s_Q16, InvMaxSqrVal_Q16, gain, gain_squared, ResNrg, ResNrgPart, quant_offset_Q10; + + /* Gain reduction when LTP coding gain is high */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /*s = -0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); */ + s_Q16 = -silk_sigm_Q15( silk_RSHIFT_ROUND( psEncCtrl->LTPredCodGain_Q7 - SILK_FIX_CONST( 12.0, 7 ), 4 ) ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->Gains_Q16[ k ] = silk_SMLAWB( psEncCtrl->Gains_Q16[ k ], psEncCtrl->Gains_Q16[ k ], s_Q16 ); + } + } + + /* Limit the quantized signal */ + /* InvMaxSqrVal = pow( 2.0f, 0.33f * ( 21.0f - SNR_dB ) ) / subfr_length; */ + InvMaxSqrVal_Q16 = silk_DIV32_16( silk_log2lin( + silk_SMULWB( SILK_FIX_CONST( 21 + 16 / 0.33, 7 ) - psEnc->sCmn.SNR_dB_Q7, SILK_FIX_CONST( 0.33, 16 ) ) ), psEnc->sCmn.subfr_length ); + + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + /* Soft limit on ratio residual energy and squared gains */ + ResNrg = psEncCtrl->ResNrg[ k ]; + ResNrgPart = silk_SMULWW( ResNrg, InvMaxSqrVal_Q16 ); + if( psEncCtrl->ResNrgQ[ k ] > 0 ) { + ResNrgPart = silk_RSHIFT_ROUND( ResNrgPart, psEncCtrl->ResNrgQ[ k ] ); + } else { + if( ResNrgPart >= silk_RSHIFT( silk_int32_MAX, -psEncCtrl->ResNrgQ[ k ] ) ) { + ResNrgPart = silk_int32_MAX; + } else { + ResNrgPart = silk_LSHIFT( ResNrgPart, -psEncCtrl->ResNrgQ[ k ] ); + } + } + gain = psEncCtrl->Gains_Q16[ k ]; + gain_squared = silk_ADD_SAT32( ResNrgPart, silk_SMMUL( gain, gain ) ); + if( gain_squared < silk_int16_MAX ) { + /* recalculate with higher precision */ + gain_squared = silk_SMLAWW( silk_LSHIFT( ResNrgPart, 16 ), gain, gain ); + silk_assert( gain_squared > 0 ); + gain = silk_SQRT_APPROX( gain_squared ); /* Q8 */ + gain = silk_min( gain, silk_int32_MAX >> 8 ); + psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 8 ); /* Q16 */ + } else { + gain = silk_SQRT_APPROX( gain_squared ); /* Q0 */ + gain = silk_min( gain, silk_int32_MAX >> 16 ); + psEncCtrl->Gains_Q16[ k ] = silk_LSHIFT_SAT32( gain, 16 ); /* Q16 */ + } + } + + /* Save unquantized gains and gain Index */ + silk_memcpy( psEncCtrl->GainsUnq_Q16, psEncCtrl->Gains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); + psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex; + + /* Quantize gains */ + silk_gains_quant( psEnc->sCmn.indices.GainsIndices, psEncCtrl->Gains_Q16, + &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); + + /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + if( psEncCtrl->LTPredCodGain_Q7 + silk_RSHIFT( psEnc->sCmn.input_tilt_Q15, 8 ) > SILK_FIX_CONST( 1.0, 7 ) ) { + psEnc->sCmn.indices.quantOffsetType = 0; + } else { + psEnc->sCmn.indices.quantOffsetType = 1; + } + } + + /* Quantizer boundary adjustment */ + quant_offset_Q10 = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ]; + psEncCtrl->Lambda_Q10 = SILK_FIX_CONST( LAMBDA_OFFSET, 10 ) + + silk_SMULBB( SILK_FIX_CONST( LAMBDA_DELAYED_DECISIONS, 10 ), psEnc->sCmn.nStatesDelayedDecision ) + + silk_SMULWB( SILK_FIX_CONST( LAMBDA_SPEECH_ACT, 18 ), psEnc->sCmn.speech_activity_Q8 ) + + silk_SMULWB( SILK_FIX_CONST( LAMBDA_INPUT_QUALITY, 12 ), psEncCtrl->input_quality_Q14 ) + + silk_SMULWB( SILK_FIX_CONST( LAMBDA_CODING_QUALITY, 12 ), psEncCtrl->coding_quality_Q14 ) + + silk_SMULWB( SILK_FIX_CONST( LAMBDA_QUANT_OFFSET, 16 ), quant_offset_Q10 ); + + silk_assert( psEncCtrl->Lambda_Q10 > 0 ); + silk_assert( psEncCtrl->Lambda_Q10 < SILK_FIX_CONST( 2, 10 ) ); +} diff --git a/drivers/opus/silk/fixed/regularize_correlations_FIX.c b/drivers/opus/silk/fixed/regularize_correlations_FIX.c new file mode 100644 index 0000000000..a3378fdd17 --- /dev/null +++ b/drivers/opus/silk/fixed/regularize_correlations_FIX.c @@ -0,0 +1,47 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" + +/* Add noise to matrix diagonal */ +void silk_regularize_correlations_FIX( + opus_int32 *XX, /* I/O Correlation matrices */ + opus_int32 *xx, /* I/O Correlation values */ + opus_int32 noise, /* I Noise to add */ + opus_int D /* I Dimension of XX */ +) +{ + opus_int i; + for( i = 0; i < D; i++ ) { + matrix_ptr( &XX[ 0 ], i, i, D ) = silk_ADD32( matrix_ptr( &XX[ 0 ], i, i, D ), noise ); + } + xx[ 0 ] += noise; +} diff --git a/drivers/opus/silk/fixed/residual_energy16_FIX.c b/drivers/opus/silk/fixed/residual_energy16_FIX.c new file mode 100644 index 0000000000..39bdff2a72 --- /dev/null +++ b/drivers/opus/silk/fixed/residual_energy16_FIX.c @@ -0,0 +1,103 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" + +/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ +opus_int32 silk_residual_energy16_covar_FIX( + const opus_int16 *c, /* I Prediction vector */ + const opus_int32 *wXX, /* I Correlation matrix */ + const opus_int32 *wXx, /* I Correlation vector */ + opus_int32 wxx, /* I Signal energy */ + opus_int D, /* I Dimension */ + opus_int cQ /* I Q value for c vector 0 - 15 */ +) +{ + opus_int i, j, lshifts, Qxtra; + opus_int32 c_max, w_max, tmp, tmp2, nrg; + opus_int cn[ MAX_MATRIX_SIZE ]; + const opus_int32 *pRow; + + /* Safety checks */ + silk_assert( D >= 0 ); + silk_assert( D <= 16 ); + silk_assert( cQ > 0 ); + silk_assert( cQ < 16 ); + + lshifts = 16 - cQ; + Qxtra = lshifts; + + c_max = 0; + for( i = 0; i < D; i++ ) { + c_max = silk_max_32( c_max, silk_abs( (opus_int32)c[ i ] ) ); + } + Qxtra = silk_min_int( Qxtra, silk_CLZ32( c_max ) - 17 ); + + w_max = silk_max_32( wXX[ 0 ], wXX[ D * D - 1 ] ); + Qxtra = silk_min_int( Qxtra, silk_CLZ32( silk_MUL( D, silk_RSHIFT( silk_SMULWB( w_max, c_max ), 4 ) ) ) - 5 ); + Qxtra = silk_max_int( Qxtra, 0 ); + for( i = 0; i < D; i++ ) { + cn[ i ] = silk_LSHIFT( ( opus_int )c[ i ], Qxtra ); + silk_assert( silk_abs(cn[i]) <= ( silk_int16_MAX + 1 ) ); /* Check that silk_SMLAWB can be used */ + } + lshifts -= Qxtra; + + /* Compute wxx - 2 * wXx * c */ + tmp = 0; + for( i = 0; i < D; i++ ) { + tmp = silk_SMLAWB( tmp, wXx[ i ], cn[ i ] ); + } + nrg = silk_RSHIFT( wxx, 1 + lshifts ) - tmp; /* Q: -lshifts - 1 */ + + /* Add c' * wXX * c, assuming wXX is symmetric */ + tmp2 = 0; + for( i = 0; i < D; i++ ) { + tmp = 0; + pRow = &wXX[ i * D ]; + for( j = i + 1; j < D; j++ ) { + tmp = silk_SMLAWB( tmp, pRow[ j ], cn[ j ] ); + } + tmp = silk_SMLAWB( tmp, silk_RSHIFT( pRow[ i ], 1 ), cn[ i ] ); + tmp2 = silk_SMLAWB( tmp2, tmp, cn[ i ] ); + } + nrg = silk_ADD_LSHIFT32( nrg, tmp2, lshifts ); /* Q: -lshifts - 1 */ + + /* Keep one bit free always, because we add them for LSF interpolation */ + if( nrg < 1 ) { + nrg = 1; + } else if( nrg > silk_RSHIFT( silk_int32_MAX, lshifts + 2 ) ) { + nrg = silk_int32_MAX >> 1; + } else { + nrg = silk_LSHIFT( nrg, lshifts + 1 ); /* Q0 */ + } + return nrg; + +} diff --git a/drivers/opus/silk/fixed/residual_energy_FIX.c b/drivers/opus/silk/fixed/residual_energy_FIX.c new file mode 100644 index 0000000000..13dbc51e39 --- /dev/null +++ b/drivers/opus/silk/fixed/residual_energy_FIX.c @@ -0,0 +1,97 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" + +/* Calculates residual energies of input subframes where all subframes have LPC_order */ +/* of preceding samples */ +void silk_residual_energy_FIX( + opus_int32 nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ + opus_int nrgsQ[ MAX_NB_SUBFR ], /* O Q value per subframe */ + const opus_int16 x[], /* I Input signal */ + opus_int16 a_Q12[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ + const opus_int32 gains[ MAX_NB_SUBFR ], /* I Quantization gains */ + const opus_int subfr_length, /* I Subframe length */ + const opus_int nb_subfr, /* I Number of subframes */ + const opus_int LPC_order /* I LPC order */ +) +{ + opus_int offset, i, j, rshift, lz1, lz2; + opus_int16 *LPC_res_ptr; + VARDECL( opus_int16, LPC_res ); + const opus_int16 *x_ptr; + opus_int32 tmp32; + SAVE_STACK; + + x_ptr = x; + offset = LPC_order + subfr_length; + + /* Filter input to create the LPC residual for each frame half, and measure subframe energies */ + ALLOC( LPC_res, ( MAX_NB_SUBFR >> 1 ) * offset, opus_int16 ); + silk_assert( ( nb_subfr >> 1 ) * ( MAX_NB_SUBFR >> 1 ) == nb_subfr ); + for( i = 0; i < nb_subfr >> 1; i++ ) { + /* Calculate half frame LPC residual signal including preceding samples */ + silk_LPC_analysis_filter( LPC_res, x_ptr, a_Q12[ i ], ( MAX_NB_SUBFR >> 1 ) * offset, LPC_order ); + + /* Point to first subframe of the just calculated LPC residual signal */ + LPC_res_ptr = LPC_res + LPC_order; + for( j = 0; j < ( MAX_NB_SUBFR >> 1 ); j++ ) { + /* Measure subframe energy */ + silk_sum_sqr_shift( &nrgs[ i * ( MAX_NB_SUBFR >> 1 ) + j ], &rshift, LPC_res_ptr, subfr_length ); + + /* Set Q values for the measured energy */ + nrgsQ[ i * ( MAX_NB_SUBFR >> 1 ) + j ] = -rshift; + + /* Move to next subframe */ + LPC_res_ptr += offset; + } + /* Move to next frame half */ + x_ptr += ( MAX_NB_SUBFR >> 1 ) * offset; + } + + /* Apply the squared subframe gains */ + for( i = 0; i < nb_subfr; i++ ) { + /* Fully upscale gains and energies */ + lz1 = silk_CLZ32( nrgs[ i ] ) - 1; + lz2 = silk_CLZ32( gains[ i ] ) - 1; + + tmp32 = silk_LSHIFT32( gains[ i ], lz2 ); + + /* Find squared gains */ + tmp32 = silk_SMMUL( tmp32, tmp32 ); /* Q( 2 * lz2 - 32 )*/ + + /* Scale energies */ + nrgs[ i ] = silk_SMMUL( tmp32, silk_LSHIFT32( nrgs[ i ], lz1 ) ); /* Q( nrgsQ[ i ] + lz1 + 2 * lz2 - 32 - 32 )*/ + nrgsQ[ i ] += lz1 + 2 * lz2 - 32 - 32; + } + RESTORE_STACK; +} diff --git a/drivers/opus/silk/fixed/schur64_FIX.c b/drivers/opus/silk/fixed/schur64_FIX.c new file mode 100644 index 0000000000..22c0952ffd --- /dev/null +++ b/drivers/opus/silk/fixed/schur64_FIX.c @@ -0,0 +1,92 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Slower than schur(), but more accurate. */ +/* Uses SMULL(), available on armv4 */ +opus_int32 silk_schur64( /* O returns residual energy */ + opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */ + const opus_int32 c[], /* I Correlations [order+1] */ + opus_int32 order /* I Prediction order */ +) +{ + opus_int k, n; + opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; + opus_int32 Ctmp1_Q30, Ctmp2_Q30, rc_tmp_Q31; + + silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); + + /* Check for invalid input */ + if( c[ 0 ] <= 0 ) { + silk_memset( rc_Q16, 0, order * sizeof( opus_int32 ) ); + return 0; + } + + for( k = 0; k < order + 1; k++ ) { + C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; + } + + for( k = 0; k < order; k++ ) { + /* Check that we won't be getting an unstable rc, otherwise stop here. */ + if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) { + if ( C[ k + 1 ][ 0 ] > 0 ) { + rc_Q16[ k ] = -SILK_FIX_CONST( .99f, 16 ); + } else { + rc_Q16[ k ] = SILK_FIX_CONST( .99f, 16 ); + } + k++; + break; + } + + /* Get reflection coefficient: divide two Q30 values and get result in Q31 */ + rc_tmp_Q31 = silk_DIV32_varQ( -C[ k + 1 ][ 0 ], C[ 0 ][ 1 ], 31 ); + + /* Save the output */ + rc_Q16[ k ] = silk_RSHIFT_ROUND( rc_tmp_Q31, 15 ); + + /* Update correlations */ + for( n = 0; n < order - k; n++ ) { + Ctmp1_Q30 = C[ n + k + 1 ][ 0 ]; + Ctmp2_Q30 = C[ n ][ 1 ]; + + /* Multiply and add the highest int32 */ + C[ n + k + 1 ][ 0 ] = Ctmp1_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp2_Q30, 1 ), rc_tmp_Q31 ); + C[ n ][ 1 ] = Ctmp2_Q30 + silk_SMMUL( silk_LSHIFT( Ctmp1_Q30, 1 ), rc_tmp_Q31 ); + } + } + + for(; k < order; k++ ) { + rc_Q16[ k ] = 0; + } + + return silk_max_32( 1, C[ 0 ][ 1 ] ); +} diff --git a/drivers/opus/silk/fixed/schur_FIX.c b/drivers/opus/silk/fixed/schur_FIX.c new file mode 100644 index 0000000000..e8b24cf068 --- /dev/null +++ b/drivers/opus/silk/fixed/schur_FIX.c @@ -0,0 +1,106 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Faster than schur64(), but much less accurate. */ +/* uses SMLAWB(), requiring armv5E and higher. */ +opus_int32 silk_schur( /* O Returns residual energy */ + opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */ + const opus_int32 *c, /* I correlations [order+1] */ + const opus_int32 order /* I prediction order */ +) +{ + opus_int k, n, lz; + opus_int32 C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; + opus_int32 Ctmp1, Ctmp2, rc_tmp_Q15; + + silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); + + /* Get number of leading zeros */ + lz = silk_CLZ32( c[ 0 ] ); + + /* Copy correlations and adjust level to Q30 */ + if( lz < 2 ) { + /* lz must be 1, so shift one to the right */ + for( k = 0; k < order + 1; k++ ) { + C[ k ][ 0 ] = C[ k ][ 1 ] = silk_RSHIFT( c[ k ], 1 ); + } + } else if( lz > 2 ) { + /* Shift to the left */ + lz -= 2; + for( k = 0; k < order + 1; k++ ) { + C[ k ][ 0 ] = C[ k ][ 1 ] = silk_LSHIFT( c[ k ], lz ); + } + } else { + /* No need to shift */ + for( k = 0; k < order + 1; k++ ) { + C[ k ][ 0 ] = C[ k ][ 1 ] = c[ k ]; + } + } + + for( k = 0; k < order; k++ ) { + /* Check that we won't be getting an unstable rc, otherwise stop here. */ + if (silk_abs_int32(C[ k + 1 ][ 0 ]) >= C[ 0 ][ 1 ]) { + if ( C[ k + 1 ][ 0 ] > 0 ) { + rc_Q15[ k ] = -SILK_FIX_CONST( .99f, 15 ); + } else { + rc_Q15[ k ] = SILK_FIX_CONST( .99f, 15 ); + } + k++; + break; + } + + /* Get reflection coefficient */ + rc_tmp_Q15 = -silk_DIV32_16( C[ k + 1 ][ 0 ], silk_max_32( silk_RSHIFT( C[ 0 ][ 1 ], 15 ), 1 ) ); + + /* Clip (shouldn't happen for properly conditioned inputs) */ + rc_tmp_Q15 = silk_SAT16( rc_tmp_Q15 ); + + /* Store */ + rc_Q15[ k ] = (opus_int16)rc_tmp_Q15; + + /* Update correlations */ + for( n = 0; n < order - k; n++ ) { + Ctmp1 = C[ n + k + 1 ][ 0 ]; + Ctmp2 = C[ n ][ 1 ]; + C[ n + k + 1 ][ 0 ] = silk_SMLAWB( Ctmp1, silk_LSHIFT( Ctmp2, 1 ), rc_tmp_Q15 ); + C[ n ][ 1 ] = silk_SMLAWB( Ctmp2, silk_LSHIFT( Ctmp1, 1 ), rc_tmp_Q15 ); + } + } + + for(; k < order; k++ ) { + rc_Q15[ k ] = 0; + } + + /* return residual energy */ + return silk_max_32( 1, C[ 0 ][ 1 ] ); +} diff --git a/drivers/opus/silk/fixed/solve_LS_FIX.c b/drivers/opus/silk/fixed/solve_LS_FIX.c new file mode 100644 index 0000000000..5d09284935 --- /dev/null +++ b/drivers/opus/silk/fixed/solve_LS_FIX.c @@ -0,0 +1,249 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" +#include "stack_alloc.h" +#include "tuning_parameters.h" + +/*****************************/ +/* Internal function headers */ +/*****************************/ + +typedef struct { + opus_int32 Q36_part; + opus_int32 Q48_part; +} inv_D_t; + +/* Factorize square matrix A into LDL form */ +static OPUS_INLINE void silk_LDL_factorize_FIX( + opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ + opus_int M, /* I Size of Matrix */ + opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ + inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ +); + +/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveFirst_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +); + +/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveLast_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + const opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +); + +static OPUS_INLINE void silk_LS_divide_Q16_FIX( + opus_int32 T[], /* I/O Numenator vector */ + inv_D_t *inv_D, /* I 1 / D vector */ + opus_int M /* I dimension */ +); + +/* Solves Ax = b, assuming A is symmetric */ +void silk_solve_LDL_FIX( + opus_int32 *A, /* I Pointer to symetric square matrix A */ + opus_int M, /* I Size of matrix */ + const opus_int32 *b, /* I Pointer to b vector */ + opus_int32 *x_Q16 /* O Pointer to x solution vector */ +) +{ + VARDECL( opus_int32, L_Q16 ); + opus_int32 Y[ MAX_MATRIX_SIZE ]; + inv_D_t inv_D[ MAX_MATRIX_SIZE ]; + SAVE_STACK; + + silk_assert( M <= MAX_MATRIX_SIZE ); + ALLOC( L_Q16, M * M, opus_int32 ); + + /*************************************************** + Factorize A by LDL such that A = L*D*L', + where L is lower triangular with ones on diagonal + ****************************************************/ + silk_LDL_factorize_FIX( A, M, L_Q16, inv_D ); + + /**************************************************** + * substitute D*L'*x = Y. ie: + L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b + ******************************************************/ + silk_LS_SolveFirst_FIX( L_Q16, M, b, Y ); + + /**************************************************** + D*L'*x = Y <=> L'*x = inv(D)*Y, because D is + diagonal just multiply with 1/d_i + ****************************************************/ + silk_LS_divide_Q16_FIX( Y, inv_D, M ); + + /**************************************************** + x = inv(L') * inv(D) * Y + *****************************************************/ + silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 ); + RESTORE_STACK; +} + +static OPUS_INLINE void silk_LDL_factorize_FIX( + opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ + opus_int M, /* I Size of Matrix */ + opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ + inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ +) +{ + opus_int i, j, k, status, loop_count; + const opus_int32 *ptr1, *ptr2; + opus_int32 diag_min_value, tmp_32, err; + opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ]; + opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48; + + silk_assert( M <= MAX_MATRIX_SIZE ); + + status = 1; + diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 ); + for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) { + status = 0; + for( j = 0; j < M; j++ ) { + ptr1 = matrix_adr( L_Q16, j, 0, M ); + tmp_32 = 0; + for( i = 0; i < j; i++ ) { + v_Q0[ i ] = silk_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */ + tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */ + } + tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 ); + + if( tmp_32 < diag_min_value ) { + tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 ); + /* Matrix not positive semi-definite, or ill conditioned */ + for( i = 0; i < M; i++ ) { + matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 ); + } + status = 1; + break; + } + D_Q0[ j ] = tmp_32; /* always < max(Correlation) */ + + /* two-step division */ + one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */ + one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */ + err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */ + one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 ); /* Q48 */ + + /* Save 1/Ds */ + inv_D[ j ].Q36_part = one_div_diag_Q36; + inv_D[ j ].Q48_part = one_div_diag_Q48; + + matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */ + ptr1 = matrix_adr( A, j, 0, M ); + ptr2 = matrix_adr( L_Q16, j + 1, 0, M ); + for( i = j + 1; i < M; i++ ) { + tmp_32 = 0; + for( k = 0; k < j; k++ ) { + tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */ + } + tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */ + + /* tmp_32 / D_Q0[j] : Divide to Q16 */ + matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), + silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); + + /* go to next column */ + ptr2 += M; + } + } + } + + silk_assert( status == 0 ); +} + +static OPUS_INLINE void silk_LS_divide_Q16_FIX( + opus_int32 T[], /* I/O Numenator vector */ + inv_D_t *inv_D, /* I 1 / D vector */ + opus_int M /* I dimension */ +) +{ + opus_int i; + opus_int32 tmp_32; + opus_int32 one_div_diag_Q36, one_div_diag_Q48; + + for( i = 0; i < M; i++ ) { + one_div_diag_Q36 = inv_D[ i ].Q36_part; + one_div_diag_Q48 = inv_D[ i ].Q48_part; + + tmp_32 = T[ i ]; + T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); + } +} + +/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveFirst_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +) +{ + opus_int i, j; + const opus_int32 *ptr32; + opus_int32 tmp_32; + + for( i = 0; i < M; i++ ) { + ptr32 = matrix_adr( L_Q16, i, 0, M ); + tmp_32 = 0; + for( j = 0; j < i; j++ ) { + tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] ); + } + x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); + } +} + +/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ +static OPUS_INLINE void silk_LS_SolveLast_FIX( + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ + const opus_int M, /* I Dim of Matrix equation */ + const opus_int32 *b, /* I b Vector */ + opus_int32 *x_Q16 /* O x Vector */ +) +{ + opus_int i, j; + const opus_int32 *ptr32; + opus_int32 tmp_32; + + for( i = M - 1; i >= 0; i-- ) { + ptr32 = matrix_adr( L_Q16, 0, i, M ); + tmp_32 = 0; + for( j = M - 1; j > i; j-- ) { + tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] ); + } + x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); + } +} diff --git a/drivers/opus/silk/fixed/structs_FIX.h b/drivers/opus/silk/fixed/structs_FIX.h new file mode 100644 index 0000000000..0284dfa27a --- /dev/null +++ b/drivers/opus/silk/fixed/structs_FIX.h @@ -0,0 +1,133 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_STRUCTS_FIX_H +#define SILK_STRUCTS_FIX_H + +#include "typedef.h" +#include "silk_main.h" +#include "structs.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/********************************/ +/* Noise shaping analysis state */ +/********************************/ +typedef struct { + opus_int8 LastGainIndex; + opus_int32 HarmBoost_smth_Q16; + opus_int32 HarmShapeGain_smth_Q16; + opus_int32 Tilt_smth_Q16; +} silk_shape_state_FIX; + +/********************************/ +/* Prefilter state */ +/********************************/ +typedef struct { + opus_int16 sLTP_shp[ LTP_BUF_LENGTH ]; + opus_int32 sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ]; + opus_int sLTP_shp_buf_idx; + opus_int32 sLF_AR_shp_Q12; + opus_int32 sLF_MA_shp_Q12; + opus_int32 sHarmHP_Q2; + opus_int32 rand_seed; + opus_int lagPrev; +} silk_prefilter_state_FIX; + +/********************************/ +/* Encoder state FIX */ +/********************************/ +typedef struct { + silk_encoder_state sCmn; /* Common struct, shared with floating-point code */ + silk_shape_state_FIX sShape; /* Shape state */ + silk_prefilter_state_FIX sPrefilt; /* Prefilter State */ + + /* Buffer for find pitch and noise shape analysis */ + silk_DWORD_ALIGN opus_int16 x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */ + opus_int LTPCorr_Q15; /* Normalized correlation from pitch lag estimator */ +} silk_encoder_state_FIX; + +/************************/ +/* Encoder control FIX */ +/************************/ +typedef struct { + /* Prediction and coding parameters */ + opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; + silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; + opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ]; + opus_int LTP_scale_Q14; + opus_int pitchL[ MAX_NB_SUBFR ]; + + /* Noise shaping parameters */ + /* Testing */ + silk_DWORD_ALIGN opus_int16 AR1_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + silk_DWORD_ALIGN opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */ + opus_int GainsPre_Q14[ MAX_NB_SUBFR ]; + opus_int HarmBoost_Q14[ MAX_NB_SUBFR ]; + opus_int Tilt_Q14[ MAX_NB_SUBFR ]; + opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ]; + opus_int Lambda_Q10; + opus_int input_quality_Q14; + opus_int coding_quality_Q14; + + /* measures */ + opus_int sparseness_Q8; + opus_int32 predGain_Q16; + opus_int LTPredCodGain_Q7; + opus_int32 ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */ + opus_int ResNrgQ[ MAX_NB_SUBFR ]; /* Q domain for the residual energy > 0 */ + + /* Parameters for CBR mode */ + opus_int32 GainsUnq_Q16[ MAX_NB_SUBFR ]; + opus_int8 lastGainIndexPrev; +} silk_encoder_control_FIX; + +/************************/ +/* Encoder Super Struct */ +/************************/ +typedef struct { + silk_encoder_state_FIX state_Fxx[ ENCODER_NUM_CHANNELS ]; + stereo_enc_state sStereo; + opus_int32 nBitsExceeded; + opus_int nChannelsAPI; + opus_int nChannelsInternal; + opus_int nPrevChannelsInternal; + opus_int timeSinceSwitchAllowed_ms; + opus_int allowBandwidthSwitch; + opus_int prev_decode_only_middle; +} silk_encoder; + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/opus/silk/fixed/vector_ops_FIX.c b/drivers/opus/silk/fixed/vector_ops_FIX.c new file mode 100644 index 0000000000..b1e422eb91 --- /dev/null +++ b/drivers/opus/silk/fixed/vector_ops_FIX.c @@ -0,0 +1,96 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Copy and multiply a vector by a constant */ +void silk_scale_copy_vector16( + opus_int16 *data_out, + const opus_int16 *data_in, + opus_int32 gain_Q16, /* I Gain in Q16 */ + const opus_int dataSize /* I Length */ +) +{ + opus_int i; + opus_int32 tmp32; + + for( i = 0; i < dataSize; i++ ) { + tmp32 = silk_SMULWB( gain_Q16, data_in[ i ] ); + data_out[ i ] = (opus_int16)silk_CHECK_FIT16( tmp32 ); + } +} + +/* Multiply a vector by a constant */ +void silk_scale_vector32_Q26_lshift_18( + opus_int32 *data1, /* I/O Q0/Q18 */ + opus_int32 gain_Q26, /* I Q26 */ + opus_int dataSize /* I length */ +) +{ + opus_int i; + + for( i = 0; i < dataSize; i++ ) { + data1[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( silk_SMULL( data1[ i ], gain_Q26 ), 8 ) ); /* OUTPUT: Q18 */ + } +} + +/* sum = for(i=0;i<len;i++)inVec1[i]*inVec2[i]; --- inner product */ +/* Note for ARM asm: */ +/* * inVec1 and inVec2 should be at least 2 byte aligned. */ +/* * len should be positive 16bit integer. */ +/* * only when len>6, memory access can be reduced by half. */ +opus_int32 silk_inner_prod_aligned( + const opus_int16 *const inVec1, /* I input vector 1 */ + const opus_int16 *const inVec2, /* I input vector 2 */ + const opus_int len /* I vector lengths */ +) +{ + opus_int i; + opus_int32 sum = 0; + for( i = 0; i < len; i++ ) { + sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] ); + } + return sum; +} + +opus_int64 silk_inner_prod16_aligned_64( + const opus_int16 *inVec1, /* I input vector 1 */ + const opus_int16 *inVec2, /* I input vector 2 */ + const opus_int len /* I vector lengths */ +) +{ + opus_int i; + opus_int64 sum = 0; + for( i = 0; i < len; i++ ) { + sum = silk_SMLALBB( sum, inVec1[ i ], inVec2[ i ] ); + } + return sum; +} diff --git a/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c b/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c new file mode 100644 index 0000000000..3f04df775c --- /dev/null +++ b/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c @@ -0,0 +1,88 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FIX.h" + +#define QC 10 +#define QS 14 + +/* Autocorrelations for a warped frequency axis */ +void silk_warped_autocorrelation_FIX( + opus_int32 *corr, /* O Result [order + 1] */ + opus_int *scale, /* O Scaling of the correlation vector */ + const opus_int16 *input, /* I Input data to correlate */ + const opus_int warping_Q16, /* I Warping coefficient */ + const opus_int length, /* I Length of input */ + const opus_int order /* I Correlation order (even) */ +) +{ + opus_int n, i, lsh; + opus_int32 tmp1_QS, tmp2_QS; + opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; + opus_int64 corr_QC[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 }; + + /* Order must be even */ + silk_assert( ( order & 1 ) == 0 ); + silk_assert( 2 * QS - QC >= 0 ); + + /* Loop over samples */ + for( n = 0; n < length; n++ ) { + tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS ); + /* Loop over allpass sections */ + for( i = 0; i < order; i += 2 ) { + /* Output of allpass section */ + tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 ); + state_QS[ i ] = tmp1_QS; + corr_QC[ i ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC ); + /* Output of allpass section */ + tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 ); + state_QS[ i + 1 ] = tmp2_QS; + corr_QC[ i + 1 ] += silk_RSHIFT64( silk_SMULL( tmp2_QS, state_QS[ 0 ] ), 2 * QS - QC ); + } + state_QS[ order ] = tmp1_QS; + corr_QC[ order ] += silk_RSHIFT64( silk_SMULL( tmp1_QS, state_QS[ 0 ] ), 2 * QS - QC ); + } + + lsh = silk_CLZ64( corr_QC[ 0 ] ) - 35; + lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC ); + *scale = -( QC + lsh ); + silk_assert( *scale >= -30 && *scale <= 12 ); + if( lsh >= 0 ) { + for( i = 0; i < order + 1; i++ ) { + corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_LSHIFT64( corr_QC[ i ], lsh ) ); + } + } else { + for( i = 0; i < order + 1; i++ ) { + corr[ i ] = (opus_int32)silk_CHECK_FIT32( silk_RSHIFT64( corr_QC[ i ], -lsh ) ); + } + } + silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/ +} diff --git a/drivers/opus/silk/float/LPC_analysis_filter_FLP.c b/drivers/opus/silk/float/LPC_analysis_filter_FLP.c new file mode 100644 index 0000000000..8d26c093bf --- /dev/null +++ b/drivers/opus/silk/float/LPC_analysis_filter_FLP.c @@ -0,0 +1,249 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include <stdlib.h> +#include "main_FLP.h" + +/************************************************/ +/* LPC analysis filter */ +/* NB! State is kept internally and the */ +/* filter always starts with zero state */ +/* first Order output samples are set to zero */ +/************************************************/ + +/* 16th order LPC analysis filter, does not write first 16 samples */ +static OPUS_INLINE void silk_LPC_analysis_filter16_FLP( + silk_float r_LPC[], /* O LPC residual signal */ + const silk_float PredCoef[], /* I LPC coefficients */ + const silk_float s[], /* I Input signal */ + const opus_int length /* I Length of input signal */ +) +{ + opus_int ix; + silk_float LPC_pred; + const silk_float *s_ptr; + + for( ix = 16; ix < length; ix++ ) { + s_ptr = &s[ix - 1]; + + /* short-term prediction */ + LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + + s_ptr[ -1 ] * PredCoef[ 1 ] + + s_ptr[ -2 ] * PredCoef[ 2 ] + + s_ptr[ -3 ] * PredCoef[ 3 ] + + s_ptr[ -4 ] * PredCoef[ 4 ] + + s_ptr[ -5 ] * PredCoef[ 5 ] + + s_ptr[ -6 ] * PredCoef[ 6 ] + + s_ptr[ -7 ] * PredCoef[ 7 ] + + s_ptr[ -8 ] * PredCoef[ 8 ] + + s_ptr[ -9 ] * PredCoef[ 9 ] + + s_ptr[ -10 ] * PredCoef[ 10 ] + + s_ptr[ -11 ] * PredCoef[ 11 ] + + s_ptr[ -12 ] * PredCoef[ 12 ] + + s_ptr[ -13 ] * PredCoef[ 13 ] + + s_ptr[ -14 ] * PredCoef[ 14 ] + + s_ptr[ -15 ] * PredCoef[ 15 ]; + + /* prediction error */ + r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; + } +} + +/* 12th order LPC analysis filter, does not write first 12 samples */ +static OPUS_INLINE void silk_LPC_analysis_filter12_FLP( + silk_float r_LPC[], /* O LPC residual signal */ + const silk_float PredCoef[], /* I LPC coefficients */ + const silk_float s[], /* I Input signal */ + const opus_int length /* I Length of input signal */ +) +{ + opus_int ix; + silk_float LPC_pred; + const silk_float *s_ptr; + + for( ix = 12; ix < length; ix++ ) { + s_ptr = &s[ix - 1]; + + /* short-term prediction */ + LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + + s_ptr[ -1 ] * PredCoef[ 1 ] + + s_ptr[ -2 ] * PredCoef[ 2 ] + + s_ptr[ -3 ] * PredCoef[ 3 ] + + s_ptr[ -4 ] * PredCoef[ 4 ] + + s_ptr[ -5 ] * PredCoef[ 5 ] + + s_ptr[ -6 ] * PredCoef[ 6 ] + + s_ptr[ -7 ] * PredCoef[ 7 ] + + s_ptr[ -8 ] * PredCoef[ 8 ] + + s_ptr[ -9 ] * PredCoef[ 9 ] + + s_ptr[ -10 ] * PredCoef[ 10 ] + + s_ptr[ -11 ] * PredCoef[ 11 ]; + + /* prediction error */ + r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; + } +} + +/* 10th order LPC analysis filter, does not write first 10 samples */ +static OPUS_INLINE void silk_LPC_analysis_filter10_FLP( + silk_float r_LPC[], /* O LPC residual signal */ + const silk_float PredCoef[], /* I LPC coefficients */ + const silk_float s[], /* I Input signal */ + const opus_int length /* I Length of input signal */ +) +{ + opus_int ix; + silk_float LPC_pred; + const silk_float *s_ptr; + + for( ix = 10; ix < length; ix++ ) { + s_ptr = &s[ix - 1]; + + /* short-term prediction */ + LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + + s_ptr[ -1 ] * PredCoef[ 1 ] + + s_ptr[ -2 ] * PredCoef[ 2 ] + + s_ptr[ -3 ] * PredCoef[ 3 ] + + s_ptr[ -4 ] * PredCoef[ 4 ] + + s_ptr[ -5 ] * PredCoef[ 5 ] + + s_ptr[ -6 ] * PredCoef[ 6 ] + + s_ptr[ -7 ] * PredCoef[ 7 ] + + s_ptr[ -8 ] * PredCoef[ 8 ] + + s_ptr[ -9 ] * PredCoef[ 9 ]; + + /* prediction error */ + r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; + } +} + +/* 8th order LPC analysis filter, does not write first 8 samples */ +static OPUS_INLINE void silk_LPC_analysis_filter8_FLP( + silk_float r_LPC[], /* O LPC residual signal */ + const silk_float PredCoef[], /* I LPC coefficients */ + const silk_float s[], /* I Input signal */ + const opus_int length /* I Length of input signal */ +) +{ + opus_int ix; + silk_float LPC_pred; + const silk_float *s_ptr; + + for( ix = 8; ix < length; ix++ ) { + s_ptr = &s[ix - 1]; + + /* short-term prediction */ + LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + + s_ptr[ -1 ] * PredCoef[ 1 ] + + s_ptr[ -2 ] * PredCoef[ 2 ] + + s_ptr[ -3 ] * PredCoef[ 3 ] + + s_ptr[ -4 ] * PredCoef[ 4 ] + + s_ptr[ -5 ] * PredCoef[ 5 ] + + s_ptr[ -6 ] * PredCoef[ 6 ] + + s_ptr[ -7 ] * PredCoef[ 7 ]; + + /* prediction error */ + r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; + } +} + +/* 6th order LPC analysis filter, does not write first 6 samples */ +static OPUS_INLINE void silk_LPC_analysis_filter6_FLP( + silk_float r_LPC[], /* O LPC residual signal */ + const silk_float PredCoef[], /* I LPC coefficients */ + const silk_float s[], /* I Input signal */ + const opus_int length /* I Length of input signal */ +) +{ + opus_int ix; + silk_float LPC_pred; + const silk_float *s_ptr; + + for( ix = 6; ix < length; ix++ ) { + s_ptr = &s[ix - 1]; + + /* short-term prediction */ + LPC_pred = s_ptr[ 0 ] * PredCoef[ 0 ] + + s_ptr[ -1 ] * PredCoef[ 1 ] + + s_ptr[ -2 ] * PredCoef[ 2 ] + + s_ptr[ -3 ] * PredCoef[ 3 ] + + s_ptr[ -4 ] * PredCoef[ 4 ] + + s_ptr[ -5 ] * PredCoef[ 5 ]; + + /* prediction error */ + r_LPC[ix] = s_ptr[ 1 ] - LPC_pred; + } +} + +/************************************************/ +/* LPC analysis filter */ +/* NB! State is kept internally and the */ +/* filter always starts with zero state */ +/* first Order output samples are set to zero */ +/************************************************/ +void silk_LPC_analysis_filter_FLP( + silk_float r_LPC[], /* O LPC residual signal */ + const silk_float PredCoef[], /* I LPC coefficients */ + const silk_float s[], /* I Input signal */ + const opus_int length, /* I Length of input signal */ + const opus_int Order /* I LPC order */ +) +{ + silk_assert( Order <= length ); + + switch( Order ) { + case 6: + silk_LPC_analysis_filter6_FLP( r_LPC, PredCoef, s, length ); + break; + + case 8: + silk_LPC_analysis_filter8_FLP( r_LPC, PredCoef, s, length ); + break; + + case 10: + silk_LPC_analysis_filter10_FLP( r_LPC, PredCoef, s, length ); + break; + + case 12: + silk_LPC_analysis_filter12_FLP( r_LPC, PredCoef, s, length ); + break; + + case 16: + silk_LPC_analysis_filter16_FLP( r_LPC, PredCoef, s, length ); + break; + + default: + silk_assert( 0 ); + break; + } + + /* Set first Order output samples to zero */ + silk_memset( r_LPC, 0, Order * sizeof( silk_float ) ); +} + diff --git a/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c b/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c new file mode 100644 index 0000000000..968edfb189 --- /dev/null +++ b/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c @@ -0,0 +1,76 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "SigProc_FLP.h" + +#define RC_THRESHOLD 0.9999f + +/* compute inverse of LPC prediction gain, and */ +/* test if LPC coefficients are stable (all poles within unit circle) */ +/* this code is based on silk_a2k_FLP() */ +silk_float silk_LPC_inverse_pred_gain_FLP( /* O return inverse prediction gain, energy domain */ + const silk_float *A, /* I prediction coefficients [order] */ + opus_int32 order /* I prediction order */ +) +{ + opus_int k, n; + double invGain, rc, rc_mult1, rc_mult2; + silk_float Atmp[ 2 ][ SILK_MAX_ORDER_LPC ]; + silk_float *Aold, *Anew; + + Anew = Atmp[ order & 1 ]; + silk_memcpy( Anew, A, order * sizeof(silk_float) ); + + invGain = 1.0; + for( k = order - 1; k > 0; k-- ) { + rc = -Anew[ k ]; + if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) { + return 0.0f; + } + rc_mult1 = 1.0f - rc * rc; + rc_mult2 = 1.0f / rc_mult1; + invGain *= rc_mult1; + /* swap pointers */ + Aold = Anew; + Anew = Atmp[ k & 1 ]; + for( n = 0; n < k; n++ ) { + Anew[ n ] = (silk_float)( ( Aold[ n ] - Aold[ k - n - 1 ] * rc ) * rc_mult2 ); + } + } + rc = -Anew[ 0 ]; + if( rc > RC_THRESHOLD || rc < -RC_THRESHOLD ) { + return 0.0f; + } + rc_mult1 = 1.0f - rc * rc; + invGain *= rc_mult1; + return (silk_float)invGain; +} diff --git a/drivers/opus/silk/float/LTP_analysis_filter_FLP.c b/drivers/opus/silk/float/LTP_analysis_filter_FLP.c new file mode 100644 index 0000000000..fc729e99b1 --- /dev/null +++ b/drivers/opus/silk/float/LTP_analysis_filter_FLP.c @@ -0,0 +1,75 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" + +void silk_LTP_analysis_filter_FLP( + silk_float *LTP_res, /* O LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */ + const silk_float *x, /* I Input signal, with preceding samples */ + const silk_float B[ LTP_ORDER * MAX_NB_SUBFR ], /* I LTP coefficients for each subframe */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const silk_float invGains[ MAX_NB_SUBFR ], /* I Inverse quantization gains */ + const opus_int subfr_length, /* I Length of each subframe */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int pre_length /* I Preceding samples for each subframe */ +) +{ + const silk_float *x_ptr, *x_lag_ptr; + silk_float Btmp[ LTP_ORDER ]; + silk_float *LTP_res_ptr; + silk_float inv_gain; + opus_int k, i, j; + + x_ptr = x; + LTP_res_ptr = LTP_res; + for( k = 0; k < nb_subfr; k++ ) { + x_lag_ptr = x_ptr - pitchL[ k ]; + inv_gain = invGains[ k ]; + for( i = 0; i < LTP_ORDER; i++ ) { + Btmp[ i ] = B[ k * LTP_ORDER + i ]; + } + + /* LTP analysis FIR filter */ + for( i = 0; i < subfr_length + pre_length; i++ ) { + LTP_res_ptr[ i ] = x_ptr[ i ]; + /* Subtract long-term prediction */ + for( j = 0; j < LTP_ORDER; j++ ) { + LTP_res_ptr[ i ] -= Btmp[ j ] * x_lag_ptr[ LTP_ORDER / 2 - j ]; + } + LTP_res_ptr[ i ] *= inv_gain; + x_lag_ptr++; + } + + /* Update pointers */ + LTP_res_ptr += subfr_length + pre_length; + x_ptr += subfr_length; + } +} diff --git a/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c b/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c new file mode 100644 index 0000000000..60e1119d5a --- /dev/null +++ b/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c @@ -0,0 +1,52 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" + +void silk_LTP_scale_ctrl_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + opus_int condCoding /* I The type of conditional coding to use */ +) +{ + opus_int round_loss; + + if( condCoding == CODE_INDEPENDENTLY ) { + /* Only scale if first frame in packet */ + round_loss = psEnc->sCmn.PacketLoss_perc + psEnc->sCmn.nFramesPerPacket; + psEnc->sCmn.indices.LTP_scaleIndex = (opus_int8)silk_LIMIT( round_loss * psEncCtrl->LTPredCodGain * 0.1f, 0.0f, 2.0f ); + } else { + /* Default is minimum scaling */ + psEnc->sCmn.indices.LTP_scaleIndex = 0; + } + + psEncCtrl->LTP_scale = (silk_float)silk_LTPScales_table_Q14[ psEnc->sCmn.indices.LTP_scaleIndex ] / 16384.0f; +} diff --git a/drivers/opus/silk/float/SigProc_FLP.h b/drivers/opus/silk/float/SigProc_FLP.h new file mode 100644 index 0000000000..f0cb3733be --- /dev/null +++ b/drivers/opus/silk/float/SigProc_FLP.h @@ -0,0 +1,204 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_SIGPROC_FLP_H +#define SILK_SIGPROC_FLP_H + +#include "SigProc_FIX.h" +#include "float_cast.h" +#include <math.h> + +#ifdef __cplusplus +extern "C" +{ +#endif + +/********************************************************************/ +/* SIGNAL PROCESSING FUNCTIONS */ +/********************************************************************/ + +/* Chirp (bw expand) LP AR filter */ +void silk_bwexpander_FLP( + silk_float *ar, /* I/O AR filter to be expanded (without leading 1) */ + const opus_int d, /* I length of ar */ + const silk_float chirp /* I chirp factor (typically in range (0..1) ) */ +); + +/* compute inverse of LPC prediction gain, and */ +/* test if LPC coefficients are stable (all poles within unit circle) */ +/* this code is based on silk_FLP_a2k() */ +silk_float silk_LPC_inverse_pred_gain_FLP( /* O return inverse prediction gain, energy domain */ + const silk_float *A, /* I prediction coefficients [order] */ + opus_int32 order /* I prediction order */ +); + +silk_float silk_schur_FLP( /* O returns residual energy */ + silk_float refl_coef[], /* O reflection coefficients (length order) */ + const silk_float auto_corr[], /* I autocorrelation sequence (length order+1) */ + opus_int order /* I order */ +); + +void silk_k2a_FLP( + silk_float *A, /* O prediction coefficients [order] */ + const silk_float *rc, /* I reflection coefficients [order] */ + opus_int32 order /* I prediction order */ +); + +/* Solve the normal equations using the Levinson-Durbin recursion */ +silk_float silk_levinsondurbin_FLP( /* O prediction error energy */ + silk_float A[], /* O prediction coefficients [order] */ + const silk_float corr[], /* I input auto-correlations [order + 1] */ + const opus_int order /* I prediction order */ +); + +/* compute autocorrelation */ +void silk_autocorrelation_FLP( + silk_float *results, /* O result (length correlationCount) */ + const silk_float *inputData, /* I input data to correlate */ + opus_int inputDataSize, /* I length of input */ + opus_int correlationCount /* I number of correlation taps to compute */ +); + +opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, 1 unvoiced */ + const silk_float *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ + opus_int *pitch_out, /* O Pitch lag values [nb_subfr] */ + opus_int16 *lagIndex, /* O Lag Index */ + opus_int8 *contourIndex, /* O Pitch contour Index */ + silk_float *LTPCorr, /* I/O Normalized correlation; input: value from previous frame */ + opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ + const silk_float search_thres1, /* I First stage threshold for lag candidates 0 - 1 */ + const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */ + const opus_int Fs_kHz, /* I sample frequency (kHz) */ + const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ + const opus_int nb_subfr, /* I Number of 5 ms subframes */ + int arch /* I Run-time architecture */ +); + +void silk_insertion_sort_decreasing_FLP( + silk_float *a, /* I/O Unsorted / Sorted vector */ + opus_int *idx, /* O Index vector for the sorted elements */ + const opus_int L, /* I Vector length */ + const opus_int K /* I Number of correctly sorted positions */ +); + +/* Compute reflection coefficients from input signal */ +silk_float silk_burg_modified_FLP( /* O returns residual energy */ + silk_float A[], /* O prediction coefficients (length order) */ + const silk_float x[], /* I input signal, length: nb_subfr*(D+L_sub) */ + const silk_float minInvGain, /* I minimum inverse prediction gain */ + const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */ + const opus_int nb_subfr, /* I number of subframes stacked in x */ + const opus_int D /* I order */ +); + +/* multiply a vector by a constant */ +void silk_scale_vector_FLP( + silk_float *data1, + silk_float gain, + opus_int dataSize +); + +/* copy and multiply a vector by a constant */ +void silk_scale_copy_vector_FLP( + silk_float *data_out, + const silk_float *data_in, + silk_float gain, + opus_int dataSize +); + +/* inner product of two silk_float arrays, with result as double */ +double silk_inner_product_FLP( + const silk_float *data1, + const silk_float *data2, + opus_int dataSize +); + +/* sum of squares of a silk_float array, with result as double */ +double silk_energy_FLP( + const silk_float *data, + opus_int dataSize +); + +/********************************************************************/ +/* MACROS */ +/********************************************************************/ + +#define PI (3.1415926536f) + +#define silk_min_float( a, b ) (((a) < (b)) ? (a) : (b)) +#define silk_max_float( a, b ) (((a) > (b)) ? (a) : (b)) +#define silk_abs_float( a ) ((silk_float)fabs(a)) + +/* sigmoid function */ +static OPUS_INLINE silk_float silk_sigmoid( silk_float x ) +{ + return (silk_float)(1.0 / (1.0 + exp(-x))); +} + +/* floating-point to integer conversion (rounding) */ +static OPUS_INLINE opus_int32 silk_float2int( silk_float x ) +{ + return (opus_int32)float2int( x ); +} + +/* floating-point to integer conversion (rounding) */ +static OPUS_INLINE void silk_float2short_array( + opus_int16 *out, + const silk_float *in, + opus_int32 length +) +{ + opus_int32 k; + for( k = length - 1; k >= 0; k-- ) { + out[k] = silk_SAT16( (opus_int32)float2int( in[k] ) ); + } +} + +/* integer to floating-point conversion */ +static OPUS_INLINE void silk_short2float_array( + silk_float *out, + const opus_int16 *in, + opus_int32 length +) +{ + opus_int32 k; + for( k = length - 1; k >= 0; k-- ) { + out[k] = (silk_float)in[k]; + } +} + +/* using log2() helps the fixed-point conversion */ +static OPUS_INLINE silk_float silk_log2( double x ) +{ + return ( silk_float )( 3.32192809488736 * log10( x ) ); +} + +#ifdef __cplusplus +} +#endif + +#endif /* SILK_SIGPROC_FLP_H */ diff --git a/drivers/opus/silk/float/apply_sine_window_FLP.c b/drivers/opus/silk/float/apply_sine_window_FLP.c new file mode 100644 index 0000000000..d904585d17 --- /dev/null +++ b/drivers/opus/silk/float/apply_sine_window_FLP.c @@ -0,0 +1,81 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" + +/* Apply sine window to signal vector */ +/* Window types: */ +/* 1 -> sine window from 0 to pi/2 */ +/* 2 -> sine window from pi/2 to pi */ +void silk_apply_sine_window_FLP( + silk_float px_win[], /* O Pointer to windowed signal */ + const silk_float px[], /* I Pointer to input signal */ + const opus_int win_type, /* I Selects a window type */ + const opus_int length /* I Window length, multiple of 4 */ +) +{ + opus_int k; + silk_float freq, c, S0, S1; + + silk_assert( win_type == 1 || win_type == 2 ); + + /* Length must be multiple of 4 */ + silk_assert( ( length & 3 ) == 0 ); + + freq = PI / ( length + 1 ); + + /* Approximation of 2 * cos(f) */ + c = 2.0f - freq * freq; + + /* Initialize state */ + if( win_type < 2 ) { + /* Start from 0 */ + S0 = 0.0f; + /* Approximation of sin(f) */ + S1 = freq; + } else { + /* Start from 1 */ + S0 = 1.0f; + /* Approximation of cos(f) */ + S1 = 0.5f * c; + } + + /* Uses the recursive equation: sin(n*f) = 2 * cos(f) * sin((n-1)*f) - sin((n-2)*f) */ + /* 4 samples at a time */ + for( k = 0; k < length; k += 4 ) { + px_win[ k + 0 ] = px[ k + 0 ] * 0.5f * ( S0 + S1 ); + px_win[ k + 1 ] = px[ k + 1 ] * S1; + S0 = c * S1 - S0; + px_win[ k + 2 ] = px[ k + 2 ] * 0.5f * ( S1 + S0 ); + px_win[ k + 3 ] = px[ k + 3 ] * S0; + S1 = c * S0 - S1; + } +} diff --git a/drivers/opus/silk/float/autocorrelation_FLP.c b/drivers/opus/silk/float/autocorrelation_FLP.c new file mode 100644 index 0000000000..192a001b16 --- /dev/null +++ b/drivers/opus/silk/float/autocorrelation_FLP.c @@ -0,0 +1,52 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "typedef.h" +#include "SigProc_FLP.h" + +/* compute autocorrelation */ +void silk_autocorrelation_FLP( + silk_float *results, /* O result (length correlationCount) */ + const silk_float *inputData, /* I input data to correlate */ + opus_int inputDataSize, /* I length of input */ + opus_int correlationCount /* I number of correlation taps to compute */ +) +{ + opus_int i; + + if( correlationCount > inputDataSize ) { + correlationCount = inputDataSize; + } + + for( i = 0; i < correlationCount; i++ ) { + results[ i ] = (silk_float)silk_inner_product_FLP( inputData, inputData + i, inputDataSize - i ); + } +} diff --git a/drivers/opus/silk/float/burg_modified_FLP.c b/drivers/opus/silk/float/burg_modified_FLP.c new file mode 100644 index 0000000000..0f30ca2280 --- /dev/null +++ b/drivers/opus/silk/float/burg_modified_FLP.c @@ -0,0 +1,186 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FLP.h" +#include "tuning_parameters.h" +#include "define.h" + +#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/ + +/* Compute reflection coefficients from input signal */ +silk_float silk_burg_modified_FLP( /* O returns residual energy */ + silk_float A[], /* O prediction coefficients (length order) */ + const silk_float x[], /* I input signal, length: nb_subfr*(D+L_sub) */ + const silk_float minInvGain, /* I minimum inverse prediction gain */ + const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */ + const opus_int nb_subfr, /* I number of subframes stacked in x */ + const opus_int D /* I order */ +) +{ + opus_int k, n, s, reached_max_gain; + double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2; + const silk_float *x_ptr; + double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ]; + double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ]; + double Af[ SILK_MAX_ORDER_LPC ]; + + silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); + + /* Compute autocorrelations, added over subframes */ + C0 = silk_energy_FLP( x, nb_subfr * subfr_length ); + silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) ); + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + for( n = 1; n < D + 1; n++ ) { + C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n ); + } + } + silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) ); + + /* Initialize */ + CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f; + invGain = 1.0f; + reached_max_gain = 0; + for( n = 0; n < D; n++ ) { + /* Update first row of correlation matrix (without first element) */ + /* Update last row of correlation matrix (without last element, stored in reversed order) */ + /* Update C * Af */ + /* Update C * flipud(Af) (stored in reversed order) */ + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + tmp1 = x_ptr[ n ]; + tmp2 = x_ptr[ subfr_length - n - 1 ]; + for( k = 0; k < n; k++ ) { + C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ]; + C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ]; + Atmp = Af[ k ]; + tmp1 += x_ptr[ n - k - 1 ] * Atmp; + tmp2 += x_ptr[ subfr_length - n + k ] * Atmp; + } + for( k = 0; k <= n; k++ ) { + CAf[ k ] -= tmp1 * x_ptr[ n - k ]; + CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ]; + } + } + tmp1 = C_first_row[ n ]; + tmp2 = C_last_row[ n ]; + for( k = 0; k < n; k++ ) { + Atmp = Af[ k ]; + tmp1 += C_last_row[ n - k - 1 ] * Atmp; + tmp2 += C_first_row[ n - k - 1 ] * Atmp; + } + CAf[ n + 1 ] = tmp1; + CAb[ n + 1 ] = tmp2; + + /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ + num = CAb[ n + 1 ]; + nrg_b = CAb[ 0 ]; + nrg_f = CAf[ 0 ]; + for( k = 0; k < n; k++ ) { + Atmp = Af[ k ]; + num += CAb[ n - k ] * Atmp; + nrg_b += CAb[ k + 1 ] * Atmp; + nrg_f += CAf[ k + 1 ] * Atmp; + } + silk_assert( nrg_f > 0.0 ); + silk_assert( nrg_b > 0.0 ); + + /* Calculate the next order reflection (parcor) coefficient */ + rc = -2.0 * num / ( nrg_f + nrg_b ); + silk_assert( rc > -1.0 && rc < 1.0 ); + + /* Update inverse prediction gain */ + tmp1 = invGain * ( 1.0 - rc * rc ); + if( tmp1 <= minInvGain ) { + /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ + rc = sqrt( 1.0 - minInvGain / invGain ); + if( num > 0 ) { + /* Ensure adjusted reflection coefficients has the original sign */ + rc = -rc; + } + invGain = minInvGain; + reached_max_gain = 1; + } else { + invGain = tmp1; + } + + /* Update the AR coefficients */ + for( k = 0; k < (n + 1) >> 1; k++ ) { + tmp1 = Af[ k ]; + tmp2 = Af[ n - k - 1 ]; + Af[ k ] = tmp1 + rc * tmp2; + Af[ n - k - 1 ] = tmp2 + rc * tmp1; + } + Af[ n ] = rc; + + if( reached_max_gain ) { + /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ + for( k = n + 1; k < D; k++ ) { + Af[ k ] = 0.0; + } + break; + } + + /* Update C * Af and C * Ab */ + for( k = 0; k <= n + 1; k++ ) { + tmp1 = CAf[ k ]; + CAf[ k ] += rc * CAb[ n - k + 1 ]; + CAb[ n - k + 1 ] += rc * tmp1; + } + } + + if( reached_max_gain ) { + /* Convert to silk_float */ + for( k = 0; k < D; k++ ) { + A[ k ] = (silk_float)( -Af[ k ] ); + } + /* Subtract energy of preceding samples from C0 */ + for( s = 0; s < nb_subfr; s++ ) { + C0 -= silk_energy_FLP( x + s * subfr_length, D ); + } + /* Approximate residual energy */ + nrg_f = C0 * invGain; + } else { + /* Compute residual energy and store coefficients as silk_float */ + nrg_f = CAf[ 0 ]; + tmp1 = 1.0; + for( k = 0; k < D; k++ ) { + Atmp = Af[ k ]; + nrg_f += CAf[ k + 1 ] * Atmp; + tmp1 += Atmp * Atmp; + A[ k ] = (silk_float)(-Atmp); + } + nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1; + } + + /* Return residual energy */ + return (silk_float)nrg_f; +} diff --git a/drivers/opus/silk/float/bwexpander_FLP.c b/drivers/opus/silk/float/bwexpander_FLP.c new file mode 100644 index 0000000000..86154dc3f1 --- /dev/null +++ b/drivers/opus/silk/float/bwexpander_FLP.c @@ -0,0 +1,49 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FLP.h" + +/* Chirp (bw expand) LP AR filter */ +void silk_bwexpander_FLP( + silk_float *ar, /* I/O AR filter to be expanded (without leading 1) */ + const opus_int d, /* I length of ar */ + const silk_float chirp /* I chirp factor (typically in range (0..1) ) */ +) +{ + opus_int i; + silk_float cfac = chirp; + + for( i = 0; i < d - 1; i++ ) { + ar[ i ] *= cfac; + cfac *= chirp; + } + ar[ d - 1 ] *= cfac; +} diff --git a/drivers/opus/silk/float/corrMatrix_FLP.c b/drivers/opus/silk/float/corrMatrix_FLP.c new file mode 100644 index 0000000000..e193c98f11 --- /dev/null +++ b/drivers/opus/silk/float/corrMatrix_FLP.c @@ -0,0 +1,93 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/********************************************************************** + * Correlation matrix computations for LS estimate. + **********************************************************************/ + +#include "main_FLP.h" + +/* Calculates correlation vector X'*t */ +void silk_corrVector_FLP( + const silk_float *x, /* I x vector [L+order-1] used to create X */ + const silk_float *t, /* I Target vector [L] */ + const opus_int L, /* I Length of vecors */ + const opus_int Order, /* I Max lag for correlation */ + silk_float *Xt /* O X'*t correlation vector [order] */ +) +{ + opus_int lag; + const silk_float *ptr1; + + ptr1 = &x[ Order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */ + for( lag = 0; lag < Order; lag++ ) { + /* Calculate X[:,lag]'*t */ + Xt[ lag ] = (silk_float)silk_inner_product_FLP( ptr1, t, L ); + ptr1--; /* Next column of X */ + } +} + +/* Calculates correlation matrix X'*X */ +void silk_corrMatrix_FLP( + const silk_float *x, /* I x vector [ L+order-1 ] used to create X */ + const opus_int L, /* I Length of vectors */ + const opus_int Order, /* I Max lag for correlation */ + silk_float *XX /* O X'*X correlation matrix [order x order] */ +) +{ + opus_int j, lag; + double energy; + const silk_float *ptr1, *ptr2; + + ptr1 = &x[ Order - 1 ]; /* First sample of column 0 of X */ + energy = silk_energy_FLP( ptr1, L ); /* X[:,0]'*X[:,0] */ + matrix_ptr( XX, 0, 0, Order ) = ( silk_float )energy; + for( j = 1; j < Order; j++ ) { + /* Calculate X[:,j]'*X[:,j] */ + energy += ptr1[ -j ] * ptr1[ -j ] - ptr1[ L - j ] * ptr1[ L - j ]; + matrix_ptr( XX, j, j, Order ) = ( silk_float )energy; + } + + ptr2 = &x[ Order - 2 ]; /* First sample of column 1 of X */ + for( lag = 1; lag < Order; lag++ ) { + /* Calculate X[:,0]'*X[:,lag] */ + energy = silk_inner_product_FLP( ptr1, ptr2, L ); + matrix_ptr( XX, lag, 0, Order ) = ( silk_float )energy; + matrix_ptr( XX, 0, lag, Order ) = ( silk_float )energy; + /* Calculate X[:,j]'*X[:,j + lag] */ + for( j = 1; j < ( Order - lag ); j++ ) { + energy += ptr1[ -j ] * ptr2[ -j ] - ptr1[ L - j ] * ptr2[ L - j ]; + matrix_ptr( XX, lag + j, j, Order ) = ( silk_float )energy; + matrix_ptr( XX, j, lag + j, Order ) = ( silk_float )energy; + } + ptr2--; /* Next column of X */ + } +} diff --git a/drivers/opus/silk/float/encode_frame_FLP.c b/drivers/opus/silk/float/encode_frame_FLP.c new file mode 100644 index 0000000000..90e5357ced --- /dev/null +++ b/drivers/opus/silk/float/encode_frame_FLP.c @@ -0,0 +1,372 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" +#include "tuning_parameters.h" + +/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ +static OPUS_INLINE void silk_LBRR_encode_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + const silk_float xfw[], /* I Input signal */ + opus_int condCoding /* I The type of conditional coding used so far for this frame */ +); + +void silk_encode_do_VAD_FLP( + silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ +) +{ + /****************************/ + /* Voice Activity Detection */ + /****************************/ + silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 ); + + /**************************************************/ + /* Convert speech activity into VAD and DTX flags */ + /**************************************************/ + if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { + psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; + psEnc->sCmn.noSpeechCounter++; + if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { + psEnc->sCmn.inDTX = 0; + } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { + psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; + psEnc->sCmn.inDTX = 0; + } + psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0; + } else { + psEnc->sCmn.noSpeechCounter = 0; + psEnc->sCmn.inDTX = 0; + psEnc->sCmn.indices.signalType = TYPE_UNVOICED; + psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1; + } +} + +/****************/ +/* Encode frame */ +/****************/ +opus_int silk_encode_frame_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + opus_int32 *pnBytesOut, /* O Number of payload bytes; */ + ec_enc *psRangeEnc, /* I/O compressor data structure */ + opus_int condCoding, /* I The type of conditional coding to use */ + opus_int maxBits, /* I If > 0: maximum number of output bits */ + opus_int useCBR /* I Flag to force constant-bitrate operation */ +) +{ + silk_encoder_control_FLP sEncCtrl; + opus_int i, iter, maxIter, found_upper, found_lower, ret = 0; + silk_float *x_frame, *res_pitch_frame; + silk_float xfw[ MAX_FRAME_LENGTH ]; + silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ]; + ec_enc sRangeEnc_copy, sRangeEnc_copy2; + silk_nsq_state sNSQ_copy, sNSQ_copy2; + opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper; + opus_int32 gainsID, gainsID_lower, gainsID_upper; + opus_int16 gainMult_Q8; + opus_int16 ec_prevLagIndex_copy; + opus_int ec_prevSignalType_copy; + opus_int8 LastGainIndex_copy2; + opus_int32 pGains_Q16[ MAX_NB_SUBFR ]; + opus_uint8 ec_buf_copy[ 1275 ]; + + /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ + LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; + + psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3; + + /**************************************************************/ + /* Set up Input Pointers, and insert frame in input buffer */ + /**************************************************************/ + /* pointers aligned with start of frame to encode */ + x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */ + res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */ + + /***************************************/ + /* Ensure smooth bandwidth transitions */ + /***************************************/ + silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); + + /*******************************************/ + /* Copy new frame to front of input buffer */ + /*******************************************/ + silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); + + /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */ + for( i = 0; i < 8; i++ ) { + x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f; + } + + if( !psEnc->sCmn.prefillFlag ) { + /*****************************************/ + /* Find pitch lags, initial LPC analysis */ + /*****************************************/ + silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); + + /************************/ + /* Noise shape analysis */ + /************************/ + silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame ); + + /***************************************************/ + /* Find linear prediction coefficients (LPC + LTP) */ + /***************************************************/ + silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, condCoding ); + + /****************************************/ + /* Process gains */ + /****************************************/ + silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding ); + + /*****************************************/ + /* Prefiltering for noise shaper */ + /*****************************************/ + silk_prefilter_FLP( psEnc, &sEncCtrl, xfw, x_frame ); + + /****************************************/ + /* Low Bitrate Redundant Encoding */ + /****************************************/ + silk_LBRR_encode_FLP( psEnc, &sEncCtrl, xfw, condCoding ); + + /* Loop over quantizer and entroy coding to control bitrate */ + maxIter = 6; + gainMult_Q8 = SILK_FIX_CONST( 1, 8 ); + found_lower = 0; + found_upper = 0; + gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); + gainsID_lower = -1; + gainsID_upper = -1; + /* Copy part of the input state */ + silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) ); + silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); + seed_copy = psEnc->sCmn.indices.Seed; + ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex; + ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType; + for( iter = 0; ; iter++ ) { + if( gainsID == gainsID_lower ) { + nBits = nBits_lower; + } else if( gainsID == gainsID_upper ) { + nBits = nBits_upper; + } else { + /* Restore part of the input state */ + if( iter > 0 ) { + silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) ); + silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) ); + psEnc->sCmn.indices.Seed = seed_copy; + psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; + psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; + } + + /*****************************************/ + /* Noise shaping quantization */ + /*****************************************/ + silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, xfw ); + + /****************************************/ + /* Encode Parameters */ + /****************************************/ + silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); + + /****************************************/ + /* Encode Excitation Signal */ + /****************************************/ + silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, + psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); + + nBits = ec_tell( psRangeEnc ); + + if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { + break; + } + } + + if( iter == maxIter ) { + if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { + /* Restore output state from earlier iteration that did meet the bitrate budget */ + silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); + silk_assert( sRangeEnc_copy2.offs <= 1275 ); + silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); + silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); + psEnc->sShape.LastGainIndex = LastGainIndex_copy2; + } + break; + } + + if( nBits > maxBits ) { + if( found_lower == 0 && iter >= 2 ) { + /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ + sEncCtrl.Lambda *= 1.5f; + found_upper = 0; + gainsID_upper = -1; + } else { + found_upper = 1; + nBits_upper = nBits; + gainMult_upper = gainMult_Q8; + gainsID_upper = gainsID; + } + } else if( nBits < maxBits - 5 ) { + found_lower = 1; + nBits_lower = nBits; + gainMult_lower = gainMult_Q8; + if( gainsID != gainsID_lower ) { + gainsID_lower = gainsID; + /* Copy part of the output state */ + silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); + silk_assert( psRangeEnc->offs <= 1275 ); + silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); + silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); + LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; + } + } else { + /* Within 5 bits of budget: close enough */ + break; + } + + if( ( found_lower & found_upper ) == 0 ) { + /* Adjust gain according to high-rate rate/distortion curve */ + opus_int32 gain_factor_Q16; + gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); + gain_factor_Q16 = silk_min_32( gain_factor_Q16, SILK_FIX_CONST( 2, 16 ) ); + if( nBits > maxBits ) { + gain_factor_Q16 = silk_max_32( gain_factor_Q16, SILK_FIX_CONST( 1.3, 16 ) ); + } + gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); + } else { + /* Adjust gain by interpolating */ + gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower ); + /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */ + if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) { + gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ); + } else + if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) { + gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ); + } + } + + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], gainMult_Q8 ), 8 ); + } + + /* Quantize gains */ + psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; + silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16, + &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); + + /* Unique identifier of gains vector */ + gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); + + /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f; + } + } + } + + /* Update input buffer */ + silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], + ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) ); + + /* Exit without entropy coding */ + if( psEnc->sCmn.prefillFlag ) { + /* No payload */ + *pnBytesOut = 0; + return ret; + } + + /* Parameters needed for next frame */ + psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; + psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; + + /****************************************/ + /* Finalize payload */ + /****************************************/ + psEnc->sCmn.first_frame_after_reset = 0; + /* Payload size */ + *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); + + return ret; +} + +/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ +static OPUS_INLINE void silk_LBRR_encode_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + const silk_float xfw[], /* I Input signal */ + opus_int condCoding /* I The type of conditional coding used so far for this frame */ +) +{ + opus_int k; + opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; + silk_float TempGains[ MAX_NB_SUBFR ]; + SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ]; + silk_nsq_state sNSQ_LBRR; + + /*******************************************/ + /* Control use of inband LBRR */ + /*******************************************/ + if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) { + psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1; + + /* Copy noise shaping quantizer state and quantization indices from regular encoding */ + silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); + silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) ); + + /* Save original gains */ + silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) ); + + if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) { + /* First frame in packet or previous frame not LBRR coded */ + psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex; + + /* Increase Gains to get target LBRR rate */ + psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases; + psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 ); + } + + /* Decode to get gains in sync with decoder */ + silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices, + &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); + + /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f ); + } + + /*****************************************/ + /* Noise shaping quantization */ + /*****************************************/ + silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR, + psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw ); + + /* Restore original gains */ + silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) ); + } +} diff --git a/drivers/opus/silk/float/energy_FLP.c b/drivers/opus/silk/float/energy_FLP.c new file mode 100644 index 0000000000..d441526df3 --- /dev/null +++ b/drivers/opus/silk/float/energy_FLP.c @@ -0,0 +1,60 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FLP.h" + +/* sum of squares of a silk_float array, with result as double */ +double silk_energy_FLP( + const silk_float *data, + opus_int dataSize +) +{ + opus_int i, dataSize4; + double result; + + /* 4x unrolled loop */ + result = 0.0; + dataSize4 = dataSize & 0xFFFC; + for( i = 0; i < dataSize4; i += 4 ) { + result += data[ i + 0 ] * (double)data[ i + 0 ] + + data[ i + 1 ] * (double)data[ i + 1 ] + + data[ i + 2 ] * (double)data[ i + 2 ] + + data[ i + 3 ] * (double)data[ i + 3 ]; + } + + /* add any remaining products */ + for( ; i < dataSize; i++ ) { + result += data[ i ] * (double)data[ i ]; + } + + silk_assert( result >= 0.0 ); + return result; +} diff --git a/drivers/opus/silk/float/find_LPC_FLP.c b/drivers/opus/silk/float/find_LPC_FLP.c new file mode 100644 index 0000000000..212f2de3cd --- /dev/null +++ b/drivers/opus/silk/float/find_LPC_FLP.c @@ -0,0 +1,104 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "define.h" +#include "main_FLP.h" +#include "tuning_parameters.h" + +/* LPC analysis */ +void silk_find_LPC_FLP( + silk_encoder_state *psEncC, /* I/O Encoder state */ + opus_int16 NLSF_Q15[], /* O NLSFs */ + const silk_float x[], /* I Input signal */ + const silk_float minInvGain /* I Inverse of max prediction gain */ +) +{ + opus_int k, subfr_length; + silk_float a[ MAX_LPC_ORDER ]; + + /* Used only for NLSF interpolation */ + silk_float res_nrg, res_nrg_2nd, res_nrg_interp; + opus_int16 NLSF0_Q15[ MAX_LPC_ORDER ]; + silk_float a_tmp[ MAX_LPC_ORDER ]; + silk_float LPC_res[ MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ]; + + subfr_length = psEncC->subfr_length + psEncC->predictLPCOrder; + + /* Default: No interpolation */ + psEncC->indices.NLSFInterpCoef_Q2 = 4; + + /* Burg AR analysis for the full frame */ + res_nrg = silk_burg_modified_FLP( a, x, minInvGain, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder ); + + if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) { + /* Optimal solution for last 10 ms; subtract residual energy here, as that's easier than */ + /* adding it to the residual energy of the first 10 ms in each iteration of the search below */ + res_nrg -= silk_burg_modified_FLP( a_tmp, x + ( MAX_NB_SUBFR / 2 ) * subfr_length, minInvGain, subfr_length, MAX_NB_SUBFR / 2, psEncC->predictLPCOrder ); + + /* Convert to NLSFs */ + silk_A2NLSF_FLP( NLSF_Q15, a_tmp, psEncC->predictLPCOrder ); + + /* Search over interpolation indices to find the one with lowest residual energy */ + res_nrg_2nd = silk_float_MAX; + for( k = 3; k >= 0; k-- ) { + /* Interpolate NLSFs for first half */ + silk_interpolate( NLSF0_Q15, psEncC->prev_NLSFq_Q15, NLSF_Q15, k, psEncC->predictLPCOrder ); + + /* Convert to LPC for residual energy evaluation */ + silk_NLSF2A_FLP( a_tmp, NLSF0_Q15, psEncC->predictLPCOrder ); + + /* Calculate residual energy with LSF interpolation */ + silk_LPC_analysis_filter_FLP( LPC_res, a_tmp, x, 2 * subfr_length, psEncC->predictLPCOrder ); + res_nrg_interp = (silk_float)( + silk_energy_FLP( LPC_res + psEncC->predictLPCOrder, subfr_length - psEncC->predictLPCOrder ) + + silk_energy_FLP( LPC_res + psEncC->predictLPCOrder + subfr_length, subfr_length - psEncC->predictLPCOrder ) ); + + /* Determine whether current interpolated NLSFs are best so far */ + if( res_nrg_interp < res_nrg ) { + /* Interpolation has lower residual energy */ + res_nrg = res_nrg_interp; + psEncC->indices.NLSFInterpCoef_Q2 = (opus_int8)k; + } else if( res_nrg_interp > res_nrg_2nd ) { + /* No reason to continue iterating - residual energies will continue to climb */ + break; + } + res_nrg_2nd = res_nrg_interp; + } + } + + if( psEncC->indices.NLSFInterpCoef_Q2 == 4 ) { + /* NLSF interpolation is currently inactive, calculate NLSFs from full frame AR coefficients */ + silk_A2NLSF_FLP( NLSF_Q15, a, psEncC->predictLPCOrder ); + } + + silk_assert( psEncC->indices.NLSFInterpCoef_Q2 == 4 || + ( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) ); +} diff --git a/drivers/opus/silk/float/find_LTP_FLP.c b/drivers/opus/silk/float/find_LTP_FLP.c new file mode 100644 index 0000000000..5c62851f20 --- /dev/null +++ b/drivers/opus/silk/float/find_LTP_FLP.c @@ -0,0 +1,132 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" +#include "tuning_parameters.h" + +void silk_find_LTP_FLP( + silk_float b[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ + silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ + silk_float *LTPredCodGain, /* O LTP coding gain */ + const silk_float r_lpc[], /* I LPC residual */ + const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ + const silk_float Wght[ MAX_NB_SUBFR ], /* I Weights */ + const opus_int subfr_length, /* I Subframe length */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int mem_offset /* I Number of samples in LTP memory */ +) +{ + opus_int i, k; + silk_float *b_ptr, temp, *WLTP_ptr; + silk_float LPC_res_nrg, LPC_LTP_res_nrg; + silk_float d[ MAX_NB_SUBFR ], m, g, delta_b[ LTP_ORDER ]; + silk_float w[ MAX_NB_SUBFR ], nrg[ MAX_NB_SUBFR ], regu; + silk_float Rr[ LTP_ORDER ], rr[ MAX_NB_SUBFR ]; + const silk_float *r_ptr, *lag_ptr; + + b_ptr = b; + WLTP_ptr = WLTP; + r_ptr = &r_lpc[ mem_offset ]; + for( k = 0; k < nb_subfr; k++ ) { + lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 ); + + silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, WLTP_ptr ); + silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, Rr ); + + rr[ k ] = ( silk_float )silk_energy_FLP( r_ptr, subfr_length ); + regu = 1.0f + rr[ k ] + + matrix_ptr( WLTP_ptr, 0, 0, LTP_ORDER ) + + matrix_ptr( WLTP_ptr, LTP_ORDER-1, LTP_ORDER-1, LTP_ORDER ); + regu *= LTP_DAMPING / 3; + silk_regularize_correlations_FLP( WLTP_ptr, &rr[ k ], regu, LTP_ORDER ); + silk_solve_LDL_FLP( WLTP_ptr, LTP_ORDER, Rr, b_ptr ); + + /* Calculate residual energy */ + nrg[ k ] = silk_residual_energy_covar_FLP( b_ptr, WLTP_ptr, Rr, rr[ k ], LTP_ORDER ); + + temp = Wght[ k ] / ( nrg[ k ] * Wght[ k ] + 0.01f * subfr_length ); + silk_scale_vector_FLP( WLTP_ptr, temp, LTP_ORDER * LTP_ORDER ); + w[ k ] = matrix_ptr( WLTP_ptr, LTP_ORDER / 2, LTP_ORDER / 2, LTP_ORDER ); + + r_ptr += subfr_length; + b_ptr += LTP_ORDER; + WLTP_ptr += LTP_ORDER * LTP_ORDER; + } + + /* Compute LTP coding gain */ + if( LTPredCodGain != NULL ) { + LPC_LTP_res_nrg = 1e-6f; + LPC_res_nrg = 0.0f; + for( k = 0; k < nb_subfr; k++ ) { + LPC_res_nrg += rr[ k ] * Wght[ k ]; + LPC_LTP_res_nrg += nrg[ k ] * Wght[ k ]; + } + + silk_assert( LPC_LTP_res_nrg > 0 ); + *LTPredCodGain = 3.0f * silk_log2( LPC_res_nrg / LPC_LTP_res_nrg ); + } + + /* Smoothing */ + /* d = sum( B, 1 ); */ + b_ptr = b; + for( k = 0; k < nb_subfr; k++ ) { + d[ k ] = 0; + for( i = 0; i < LTP_ORDER; i++ ) { + d[ k ] += b_ptr[ i ]; + } + b_ptr += LTP_ORDER; + } + /* m = ( w * d' ) / ( sum( w ) + 1e-3 ); */ + temp = 1e-3f; + for( k = 0; k < nb_subfr; k++ ) { + temp += w[ k ]; + } + m = 0; + for( k = 0; k < nb_subfr; k++ ) { + m += d[ k ] * w[ k ]; + } + m = m / temp; + + b_ptr = b; + for( k = 0; k < nb_subfr; k++ ) { + g = LTP_SMOOTHING / ( LTP_SMOOTHING + w[ k ] ) * ( m - d[ k ] ); + temp = 0; + for( i = 0; i < LTP_ORDER; i++ ) { + delta_b[ i ] = silk_max_float( b_ptr[ i ], 0.1f ); + temp += delta_b[ i ]; + } + temp = g / temp; + for( i = 0; i < LTP_ORDER; i++ ) { + b_ptr[ i ] = b_ptr[ i ] + delta_b[ i ] * temp; + } + b_ptr += LTP_ORDER; + } +} diff --git a/drivers/opus/silk/float/find_pitch_lags_FLP.c b/drivers/opus/silk/float/find_pitch_lags_FLP.c new file mode 100644 index 0000000000..d74d5941b5 --- /dev/null +++ b/drivers/opus/silk/float/find_pitch_lags_FLP.c @@ -0,0 +1,132 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include <stdlib.h> +#include "main_FLP.h" +#include "tuning_parameters.h" + +void silk_find_pitch_lags_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + silk_float res[], /* O Residual */ + const silk_float x[], /* I Speech signal */ + int arch /* I Run-time architecture */ +) +{ + opus_int buf_len; + silk_float thrhld, res_nrg; + const silk_float *x_buf_ptr, *x_buf; + silk_float auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ]; + silk_float A[ MAX_FIND_PITCH_LPC_ORDER ]; + silk_float refl_coef[ MAX_FIND_PITCH_LPC_ORDER ]; + silk_float Wsig[ FIND_PITCH_LPC_WIN_MAX ]; + silk_float *Wsig_ptr; + + /******************************************/ + /* Set up buffer lengths etc based on Fs */ + /******************************************/ + buf_len = psEnc->sCmn.la_pitch + psEnc->sCmn.frame_length + psEnc->sCmn.ltp_mem_length; + + /* Safety check */ + silk_assert( buf_len >= psEnc->sCmn.pitch_LPC_win_length ); + + x_buf = x - psEnc->sCmn.ltp_mem_length; + + /******************************************/ + /* Estimate LPC AR coeficients */ + /******************************************/ + + /* Calculate windowed signal */ + + /* First LA_LTP samples */ + x_buf_ptr = x_buf + buf_len - psEnc->sCmn.pitch_LPC_win_length; + Wsig_ptr = Wsig; + silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 1, psEnc->sCmn.la_pitch ); + + /* Middle non-windowed samples */ + Wsig_ptr += psEnc->sCmn.la_pitch; + x_buf_ptr += psEnc->sCmn.la_pitch; + silk_memcpy( Wsig_ptr, x_buf_ptr, ( psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ) ) * sizeof( silk_float ) ); + + /* Last LA_LTP samples */ + Wsig_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ); + x_buf_ptr += psEnc->sCmn.pitch_LPC_win_length - ( psEnc->sCmn.la_pitch << 1 ); + silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch ); + + /* Calculate autocorrelation sequence */ + silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 ); + + /* Add white noise, as a fraction of the energy */ + auto_corr[ 0 ] += auto_corr[ 0 ] * FIND_PITCH_WHITE_NOISE_FRACTION + 1; + + /* Calculate the reflection coefficients using Schur */ + res_nrg = silk_schur_FLP( refl_coef, auto_corr, psEnc->sCmn.pitchEstimationLPCOrder ); + + /* Prediction gain */ + psEncCtrl->predGain = auto_corr[ 0 ] / silk_max_float( res_nrg, 1.0f ); + + /* Convert reflection coefficients to prediction coefficients */ + silk_k2a_FLP( A, refl_coef, psEnc->sCmn.pitchEstimationLPCOrder ); + + /* Bandwidth expansion */ + silk_bwexpander_FLP( A, psEnc->sCmn.pitchEstimationLPCOrder, FIND_PITCH_BANDWIDTH_EXPANSION ); + + /*****************************************/ + /* LPC analysis filtering */ + /*****************************************/ + silk_LPC_analysis_filter_FLP( res, A, x_buf, buf_len, psEnc->sCmn.pitchEstimationLPCOrder ); + + if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) { + /* Threshold for pitch estimator */ + thrhld = 0.6f; + thrhld -= 0.004f * psEnc->sCmn.pitchEstimationLPCOrder; + thrhld -= 0.1f * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); + thrhld -= 0.15f * (psEnc->sCmn.prevSignalType >> 1); + thrhld -= 0.1f * psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ); + + /*****************************************/ + /* Call Pitch estimator */ + /*****************************************/ + if( silk_pitch_analysis_core_FLP( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, + &psEnc->sCmn.indices.contourIndex, &psEnc->LTPCorr, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16 / 65536.0f, + thrhld, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr, arch ) == 0 ) + { + psEnc->sCmn.indices.signalType = TYPE_VOICED; + } else { + psEnc->sCmn.indices.signalType = TYPE_UNVOICED; + } + } else { + silk_memset( psEncCtrl->pitchL, 0, sizeof( psEncCtrl->pitchL ) ); + psEnc->sCmn.indices.lagIndex = 0; + psEnc->sCmn.indices.contourIndex = 0; + psEnc->LTPCorr = 0; + } +} diff --git a/drivers/opus/silk/float/find_pred_coefs_FLP.c b/drivers/opus/silk/float/find_pred_coefs_FLP.c new file mode 100644 index 0000000000..e0d8804cc9 --- /dev/null +++ b/drivers/opus/silk/float/find_pred_coefs_FLP.c @@ -0,0 +1,117 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" + +/* Find LPC and LTP coefficients */ +void silk_find_pred_coefs_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + const silk_float res_pitch[], /* I Residual from pitch analysis */ + const silk_float x[], /* I Speech signal */ + opus_int condCoding /* I The type of conditional coding to use */ +) +{ + opus_int i; + silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ]; + silk_float invGains[ MAX_NB_SUBFR ], Wght[ MAX_NB_SUBFR ]; + opus_int16 NLSF_Q15[ MAX_LPC_ORDER ]; + const silk_float *x_ptr; + silk_float *x_pre_ptr, LPC_in_pre[ MAX_NB_SUBFR * MAX_LPC_ORDER + MAX_FRAME_LENGTH ]; + silk_float minInvGain; + + /* Weighting for weighted least squares */ + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + silk_assert( psEncCtrl->Gains[ i ] > 0.0f ); + invGains[ i ] = 1.0f / psEncCtrl->Gains[ i ]; + Wght[ i ] = invGains[ i ] * invGains[ i ]; + } + + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /**********/ + /* VOICED */ + /**********/ + silk_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 ); + + /* LTP analysis */ + silk_find_LTP_FLP( psEncCtrl->LTPCoef, WLTP, &psEncCtrl->LTPredCodGain, res_pitch, + psEncCtrl->pitchL, Wght, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.ltp_mem_length ); + + /* Quantize LTP gain parameters */ + silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, + &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr ); + + /* Control LTP scaling */ + silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding ); + + /* Create LTP residual */ + silk_LTP_analysis_filter_FLP( LPC_in_pre, x - psEnc->sCmn.predictLPCOrder, psEncCtrl->LTPCoef, + psEncCtrl->pitchL, invGains, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); + } else { + /************/ + /* UNVOICED */ + /************/ + /* Create signal with prepended subframes, scaled by inverse gains */ + x_ptr = x - psEnc->sCmn.predictLPCOrder; + x_pre_ptr = LPC_in_pre; + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + silk_scale_copy_vector_FLP( x_pre_ptr, x_ptr, invGains[ i ], + psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder ); + x_pre_ptr += psEnc->sCmn.subfr_length + psEnc->sCmn.predictLPCOrder; + x_ptr += psEnc->sCmn.subfr_length; + } + silk_memset( psEncCtrl->LTPCoef, 0, psEnc->sCmn.nb_subfr * LTP_ORDER * sizeof( silk_float ) ); + psEncCtrl->LTPredCodGain = 0.0f; + psEnc->sCmn.sum_log_gain_Q7 = 0; + } + + /* Limit on total predictive coding gain */ + if( psEnc->sCmn.first_frame_after_reset ) { + minInvGain = 1.0f / MAX_PREDICTION_POWER_GAIN_AFTER_RESET; + } else { + minInvGain = (silk_float)pow( 2, psEncCtrl->LTPredCodGain / 3 ) / MAX_PREDICTION_POWER_GAIN; + minInvGain /= 0.25f + 0.75f * psEncCtrl->coding_quality; + } + + /* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */ + silk_find_LPC_FLP( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain ); + + /* Quantize LSFs */ + silk_process_NLSFs_FLP( &psEnc->sCmn, psEncCtrl->PredCoef, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 ); + + /* Calculate residual energy using quantized LPC coefficients */ + silk_residual_energy_FLP( psEncCtrl->ResNrg, LPC_in_pre, psEncCtrl->PredCoef, psEncCtrl->Gains, + psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.predictLPCOrder ); + + /* Copy to prediction struct for use in next frame for interpolation */ + silk_memcpy( psEnc->sCmn.prev_NLSFq_Q15, NLSF_Q15, sizeof( psEnc->sCmn.prev_NLSFq_Q15 ) ); +} + diff --git a/drivers/opus/silk/float/inner_product_FLP.c b/drivers/opus/silk/float/inner_product_FLP.c new file mode 100644 index 0000000000..57acf5ffba --- /dev/null +++ b/drivers/opus/silk/float/inner_product_FLP.c @@ -0,0 +1,60 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FLP.h" + +/* inner product of two silk_float arrays, with result as double */ +double silk_inner_product_FLP( + const silk_float *data1, + const silk_float *data2, + opus_int dataSize +) +{ + opus_int i, dataSize4; + double result; + + /* 4x unrolled loop */ + result = 0.0; + dataSize4 = dataSize & 0xFFFC; + for( i = 0; i < dataSize4; i += 4 ) { + result += data1[ i + 0 ] * (double)data2[ i + 0 ] + + data1[ i + 1 ] * (double)data2[ i + 1 ] + + data1[ i + 2 ] * (double)data2[ i + 2 ] + + data1[ i + 3 ] * (double)data2[ i + 3 ]; + } + + /* add any remaining products */ + for( ; i < dataSize; i++ ) { + result += data1[ i ] * (double)data2[ i ]; + } + + return result; +} diff --git a/drivers/opus/silk/float/k2a_FLP.c b/drivers/opus/silk/float/k2a_FLP.c new file mode 100644 index 0000000000..a668a32127 --- /dev/null +++ b/drivers/opus/silk/float/k2a_FLP.c @@ -0,0 +1,53 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FLP.h" + +/* step up function, converts reflection coefficients to prediction coefficients */ +void silk_k2a_FLP( + silk_float *A, /* O prediction coefficients [order] */ + const silk_float *rc, /* I reflection coefficients [order] */ + opus_int32 order /* I prediction order */ +) +{ + opus_int k, n; + silk_float Atmp[ SILK_MAX_ORDER_LPC ]; + + for( k = 0; k < order; k++ ) { + for( n = 0; n < k; n++ ) { + Atmp[ n ] = A[ n ]; + } + for( n = 0; n < k; n++ ) { + A[ n ] += Atmp[ k - n - 1 ] * rc[ k ]; + } + A[ k ] = -rc[ k ]; + } +} diff --git a/drivers/opus/silk/float/levinsondurbin_FLP.c b/drivers/opus/silk/float/levinsondurbin_FLP.c new file mode 100644 index 0000000000..64aaf0fb29 --- /dev/null +++ b/drivers/opus/silk/float/levinsondurbin_FLP.c @@ -0,0 +1,81 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FLP.h" + +/* Solve the normal equations using the Levinson-Durbin recursion */ +silk_float silk_levinsondurbin_FLP( /* O prediction error energy */ + silk_float A[], /* O prediction coefficients [order] */ + const silk_float corr[], /* I input auto-correlations [order + 1] */ + const opus_int order /* I prediction order */ +) +{ + opus_int i, mHalf, m; + silk_float min_nrg, nrg, t, km, Atmp1, Atmp2; + + min_nrg = 1e-12f * corr[ 0 ] + 1e-9f; + nrg = corr[ 0 ]; + nrg = silk_max_float(min_nrg, nrg); + A[ 0 ] = corr[ 1 ] / nrg; + nrg -= A[ 0 ] * corr[ 1 ]; + nrg = silk_max_float(min_nrg, nrg); + + for( m = 1; m < order; m++ ) + { + t = corr[ m + 1 ]; + for( i = 0; i < m; i++ ) { + t -= A[ i ] * corr[ m - i ]; + } + + /* reflection coefficient */ + km = t / nrg; + + /* residual energy */ + nrg -= km * t; + nrg = silk_max_float(min_nrg, nrg); + + mHalf = m >> 1; + for( i = 0; i < mHalf; i++ ) { + Atmp1 = A[ i ]; + Atmp2 = A[ m - i - 1 ]; + A[ m - i - 1 ] -= km * Atmp1; + A[ i ] -= km * Atmp2; + } + if( m & 1 ) { + A[ mHalf ] -= km * A[ mHalf ]; + } + A[ m ] = km; + } + + /* return the residual energy */ + return nrg; +} + diff --git a/drivers/opus/silk/float/main_FLP.h b/drivers/opus/silk/float/main_FLP.h new file mode 100644 index 0000000000..92d6ec3df1 --- /dev/null +++ b/drivers/opus/silk/float/main_FLP.h @@ -0,0 +1,312 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_MAIN_FLP_H +#define SILK_MAIN_FLP_H + +#include "SigProc_FLP.h" +#include "SigProc_FIX.h" +#include "structs_FLP.h" +#include "silk_main.h" +#include "define.h" +#include "debug.h" +#include "entenc.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define silk_encoder_state_Fxx silk_encoder_state_FLP +#define silk_encode_do_VAD_Fxx silk_encode_do_VAD_FLP +#define silk_encode_frame_Fxx silk_encode_frame_FLP + +/*********************/ +/* Encoder Functions */ +/*********************/ + +/* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */ +void silk_HP_variable_cutoff( + silk_encoder_state_Fxx state_Fxx[] /* I/O Encoder states */ +); + +/* Encoder main function */ +void silk_encode_do_VAD_FLP( + silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ +); + +/* Encoder main function */ +opus_int silk_encode_frame_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + opus_int32 *pnBytesOut, /* O Number of payload bytes; */ + ec_enc *psRangeEnc, /* I/O compressor data structure */ + opus_int condCoding, /* I The type of conditional coding to use */ + opus_int maxBits, /* I If > 0: maximum number of output bits */ + opus_int useCBR /* I Flag to force constant-bitrate operation */ +); + +/* Initializes the Silk encoder state */ +opus_int silk_init_encoder( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + int arch /* I Run-tim architecture */ +); + +/* Control the Silk encoder */ +opus_int silk_control_encoder( + silk_encoder_state_FLP *psEnc, /* I/O Pointer to Silk encoder state FLP */ + silk_EncControlStruct *encControl, /* I Control structure */ + const opus_int32 TargetRate_bps, /* I Target max bitrate (bps) */ + const opus_int allow_bw_switch, /* I Flag to allow switching audio bandwidth */ + const opus_int channelNb, /* I Channel number */ + const opus_int force_fs_kHz +); + +/****************/ +/* Prefiltering */ +/****************/ +void silk_prefilter_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + const silk_encoder_control_FLP *psEncCtrl, /* I Encoder control FLP */ + silk_float xw[], /* O Weighted signal */ + const silk_float x[] /* I Speech signal */ +); + +/**************************/ +/* Noise shaping analysis */ +/**************************/ +/* Compute noise shaping coefficients and initial gain values */ +void silk_noise_shape_analysis_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + const silk_float *pitch_res, /* I LPC residual from pitch analysis */ + const silk_float *x /* I Input signal [frame_length + la_shape] */ +); + +/* Autocorrelations for a warped frequency axis */ +void silk_warped_autocorrelation_FLP( + silk_float *corr, /* O Result [order + 1] */ + const silk_float *input, /* I Input data to correlate */ + const silk_float warping, /* I Warping coefficient */ + const opus_int length, /* I Length of input */ + const opus_int order /* I Correlation order (even) */ +); + +/* Calculation of LTP state scaling */ +void silk_LTP_scale_ctrl_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + opus_int condCoding /* I The type of conditional coding to use */ +); + +/**********************************************/ +/* Prediction Analysis */ +/**********************************************/ +/* Find pitch lags */ +void silk_find_pitch_lags_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + silk_float res[], /* O Residual */ + const silk_float x[], /* I Speech signal */ + int arch /* I Run-time architecture */ +); + +/* Find LPC and LTP coefficients */ +void silk_find_pred_coefs_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + const silk_float res_pitch[], /* I Residual from pitch analysis */ + const silk_float x[], /* I Speech signal */ + opus_int condCoding /* I The type of conditional coding to use */ +); + +/* LPC analysis */ +void silk_find_LPC_FLP( + silk_encoder_state *psEncC, /* I/O Encoder state */ + opus_int16 NLSF_Q15[], /* O NLSFs */ + const silk_float x[], /* I Input signal */ + const silk_float minInvGain /* I Prediction gain from LTP (dB) */ +); + +/* LTP analysis */ +void silk_find_LTP_FLP( + silk_float b[ MAX_NB_SUBFR * LTP_ORDER ], /* O LTP coefs */ + silk_float WLTP[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* O Weight for LTP quantization */ + silk_float *LTPredCodGain, /* O LTP coding gain */ + const silk_float r_lpc[], /* I LPC residual */ + const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */ + const silk_float Wght[ MAX_NB_SUBFR ], /* I Weights */ + const opus_int subfr_length, /* I Subframe length */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int mem_offset /* I Number of samples in LTP memory */ +); + +void silk_LTP_analysis_filter_FLP( + silk_float *LTP_res, /* O LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */ + const silk_float *x, /* I Input signal, with preceding samples */ + const silk_float B[ LTP_ORDER * MAX_NB_SUBFR ], /* I LTP coefficients for each subframe */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const silk_float invGains[ MAX_NB_SUBFR ], /* I Inverse quantization gains */ + const opus_int subfr_length, /* I Length of each subframe */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int pre_length /* I Preceding samples for each subframe */ +); + +/* Calculates residual energies of input subframes where all subframes have LPC_order */ +/* of preceding samples */ +void silk_residual_energy_FLP( + silk_float nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ + const silk_float x[], /* I Input signal */ + silk_float a[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ + const silk_float gains[], /* I Quantization gains */ + const opus_int subfr_length, /* I Subframe length */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int LPC_order /* I LPC order */ +); + +/* 16th order LPC analysis filter */ +void silk_LPC_analysis_filter_FLP( + silk_float r_LPC[], /* O LPC residual signal */ + const silk_float PredCoef[], /* I LPC coefficients */ + const silk_float s[], /* I Input signal */ + const opus_int length, /* I Length of input signal */ + const opus_int Order /* I LPC order */ +); + +/* LTP tap quantizer */ +void silk_quant_LTP_gains_FLP( + silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */ + opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */ + opus_int8 *periodicity_index, /* O Periodicity index */ + opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ + const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ + const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ + const opus_int lowComplexity, /* I Flag for low complexity */ + const opus_int nb_subfr /* I number of subframes */ +); + +/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ +silk_float silk_residual_energy_covar_FLP( /* O Weighted residual energy */ + const silk_float *c, /* I Filter coefficients */ + silk_float *wXX, /* I/O Weighted correlation matrix, reg. out */ + const silk_float *wXx, /* I Weighted correlation vector */ + const silk_float wxx, /* I Weighted correlation value */ + const opus_int D /* I Dimension */ +); + +/* Processing of gains */ +void silk_process_gains_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + opus_int condCoding /* I The type of conditional coding to use */ +); + +/******************/ +/* Linear Algebra */ +/******************/ +/* Calculates correlation matrix X'*X */ +void silk_corrMatrix_FLP( + const silk_float *x, /* I x vector [ L+order-1 ] used to create X */ + const opus_int L, /* I Length of vectors */ + const opus_int Order, /* I Max lag for correlation */ + silk_float *XX /* O X'*X correlation matrix [order x order] */ +); + +/* Calculates correlation vector X'*t */ +void silk_corrVector_FLP( + const silk_float *x, /* I x vector [L+order-1] used to create X */ + const silk_float *t, /* I Target vector [L] */ + const opus_int L, /* I Length of vecors */ + const opus_int Order, /* I Max lag for correlation */ + silk_float *Xt /* O X'*t correlation vector [order] */ +); + +/* Add noise to matrix diagonal */ +void silk_regularize_correlations_FLP( + silk_float *XX, /* I/O Correlation matrices */ + silk_float *xx, /* I/O Correlation values */ + const silk_float noise, /* I Noise energy to add */ + const opus_int D /* I Dimension of XX */ +); + +/* Function to solve linear equation Ax = b, where A is an MxM symmetric matrix */ +void silk_solve_LDL_FLP( + silk_float *A, /* I/O Symmetric square matrix, out: reg. */ + const opus_int M, /* I Size of matrix */ + const silk_float *b, /* I Pointer to b vector */ + silk_float *x /* O Pointer to x solution vector */ +); + +/* Apply sine window to signal vector. */ +/* Window types: */ +/* 1 -> sine window from 0 to pi/2 */ +/* 2 -> sine window from pi/2 to pi */ +void silk_apply_sine_window_FLP( + silk_float px_win[], /* O Pointer to windowed signal */ + const silk_float px[], /* I Pointer to input signal */ + const opus_int win_type, /* I Selects a window type */ + const opus_int length /* I Window length, multiple of 4 */ +); + +/* Wrapper functions. Call flp / fix code */ + +/* Convert AR filter coefficients to NLSF parameters */ +void silk_A2NLSF_FLP( + opus_int16 *NLSF_Q15, /* O NLSF vector [ LPC_order ] */ + const silk_float *pAR, /* I LPC coefficients [ LPC_order ] */ + const opus_int LPC_order /* I LPC order */ +); + +/* Convert NLSF parameters to AR prediction filter coefficients */ +void silk_NLSF2A_FLP( + silk_float *pAR, /* O LPC coefficients [ LPC_order ] */ + const opus_int16 *NLSF_Q15, /* I NLSF vector [ LPC_order ] */ + const opus_int LPC_order /* I LPC order */ +); + +/* Limit, stabilize, and quantize NLSFs */ +void silk_process_NLSFs_FLP( + silk_encoder_state *psEncC, /* I/O Encoder state */ + silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ + opus_int16 NLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ + const opus_int16 prev_NLSF_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ +); + +/* Floating-point Silk NSQ wrapper */ +void silk_NSQ_wrapper_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + SideInfoIndices *psIndices, /* I/O Quantization indices */ + silk_nsq_state *psNSQ, /* I/O Noise Shaping Quantzation state */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const silk_float x[] /* I Prefiltered input signal */ +); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/opus/silk/float/noise_shape_analysis_FLP.c b/drivers/opus/silk/float/noise_shape_analysis_FLP.c new file mode 100644 index 0000000000..f80e0b3d0e --- /dev/null +++ b/drivers/opus/silk/float/noise_shape_analysis_FLP.c @@ -0,0 +1,365 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" +#include "tuning_parameters.h" + +/* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */ +/* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */ +/* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */ +/* coefficient in an array of coefficients, for monic filters. */ +static OPUS_INLINE silk_float warped_gain( + const silk_float *coefs, + silk_float lambda, + opus_int order +) { + opus_int i; + silk_float gain; + + lambda = -lambda; + gain = coefs[ order - 1 ]; + for( i = order - 2; i >= 0; i-- ) { + gain = lambda * gain + coefs[ i ]; + } + return (silk_float)( 1.0f / ( 1.0f - lambda * gain ) ); +} + +/* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ +/* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ +static OPUS_INLINE void warped_true2monic_coefs( + silk_float *coefs_syn, + silk_float *coefs_ana, + silk_float lambda, + silk_float limit, + opus_int order +) { + opus_int i, iter, ind = 0; + silk_float tmp, maxabs, chirp, gain_syn, gain_ana; + + /* Convert to monic coefficients */ + for( i = order - 1; i > 0; i-- ) { + coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; + coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; + } + gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); + gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); + for( i = 0; i < order; i++ ) { + coefs_syn[ i ] *= gain_syn; + coefs_ana[ i ] *= gain_ana; + } + + /* Limit */ + for( iter = 0; iter < 10; iter++ ) { + /* Find maximum absolute value */ + maxabs = -1.0f; + for( i = 0; i < order; i++ ) { + tmp = silk_max( silk_abs_float( coefs_syn[ i ] ), silk_abs_float( coefs_ana[ i ] ) ); + if( tmp > maxabs ) { + maxabs = tmp; + ind = i; + } + } + if( maxabs <= limit ) { + /* Coefficients are within range - done */ + return; + } + + /* Convert back to true warped coefficients */ + for( i = 1; i < order; i++ ) { + coefs_syn[ i - 1 ] += lambda * coefs_syn[ i ]; + coefs_ana[ i - 1 ] += lambda * coefs_ana[ i ]; + } + gain_syn = 1.0f / gain_syn; + gain_ana = 1.0f / gain_ana; + for( i = 0; i < order; i++ ) { + coefs_syn[ i ] *= gain_syn; + coefs_ana[ i ] *= gain_ana; + } + + /* Apply bandwidth expansion */ + chirp = 0.99f - ( 0.8f + 0.1f * iter ) * ( maxabs - limit ) / ( maxabs * ( ind + 1 ) ); + silk_bwexpander_FLP( coefs_syn, order, chirp ); + silk_bwexpander_FLP( coefs_ana, order, chirp ); + + /* Convert to monic warped coefficients */ + for( i = order - 1; i > 0; i-- ) { + coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; + coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; + } + gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); + gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); + for( i = 0; i < order; i++ ) { + coefs_syn[ i ] *= gain_syn; + coefs_ana[ i ] *= gain_ana; + } + } + silk_assert( 0 ); +} + +/* Compute noise shaping coefficients and initial gain values */ +void silk_noise_shape_analysis_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + const silk_float *pitch_res, /* I LPC residual from pitch analysis */ + const silk_float *x /* I Input signal [frame_length + la_shape] */ +) +{ + silk_shape_state_FLP *psShapeSt = &psEnc->sShape; + opus_int k, nSamples; + silk_float SNR_adj_dB, HarmBoost, HarmShapeGain, Tilt; + silk_float nrg, pre_nrg, log_energy, log_energy_prev, energy_variation; + silk_float delta, BWExp1, BWExp2, gain_mult, gain_add, strength, b, warping; + silk_float x_windowed[ SHAPE_LPC_WIN_MAX ]; + silk_float auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; + const silk_float *x_ptr, *pitch_res_ptr; + + /* Point to start of first LPC analysis block */ + x_ptr = x - psEnc->sCmn.la_shape; + + /****************/ + /* GAIN CONTROL */ + /****************/ + SNR_adj_dB = psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ); + + /* Input quality is the average of the quality in the lowest two VAD bands */ + psEncCtrl->input_quality = 0.5f * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] + psEnc->sCmn.input_quality_bands_Q15[ 1 ] ) * ( 1.0f / 32768.0f ); + + /* Coding quality level, between 0.0 and 1.0 */ + psEncCtrl->coding_quality = silk_sigmoid( 0.25f * ( SNR_adj_dB - 20.0f ) ); + + if( psEnc->sCmn.useCBR == 0 ) { + /* Reduce coding SNR during low speech activity */ + b = 1.0f - psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); + SNR_adj_dB -= BG_SNR_DECR_dB * psEncCtrl->coding_quality * ( 0.5f + 0.5f * psEncCtrl->input_quality ) * b * b; + } + + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* Reduce gains for periodic signals */ + SNR_adj_dB += HARM_SNR_INCR_dB * psEnc->LTPCorr; + } else { + /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ + SNR_adj_dB += ( -0.4f * psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) + 6.0f ) * ( 1.0f - psEncCtrl->input_quality ); + } + + /*************************/ + /* SPARSENESS PROCESSING */ + /*************************/ + /* Set quantizer offset */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* Initially set to 0; may be overruled in process_gains(..) */ + psEnc->sCmn.indices.quantOffsetType = 0; + psEncCtrl->sparseness = 0.0f; + } else { + /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ + nSamples = 2 * psEnc->sCmn.fs_kHz; + energy_variation = 0.0f; + log_energy_prev = 0.0f; + pitch_res_ptr = pitch_res; + for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { + nrg = ( silk_float )nSamples + ( silk_float )silk_energy_FLP( pitch_res_ptr, nSamples ); + log_energy = silk_log2( nrg ); + if( k > 0 ) { + energy_variation += silk_abs_float( log_energy - log_energy_prev ); + } + log_energy_prev = log_energy; + pitch_res_ptr += nSamples; + } + psEncCtrl->sparseness = silk_sigmoid( 0.4f * ( energy_variation - 5.0f ) ); + + /* Set quantization offset depending on sparseness measure */ + if( psEncCtrl->sparseness > SPARSENESS_THRESHOLD_QNT_OFFSET ) { + psEnc->sCmn.indices.quantOffsetType = 0; + } else { + psEnc->sCmn.indices.quantOffsetType = 1; + } + + /* Increase coding SNR for sparse signals */ + SNR_adj_dB += SPARSE_SNR_INCR_dB * ( psEncCtrl->sparseness - 0.5f ); + } + + /*******************************/ + /* Control bandwidth expansion */ + /*******************************/ + /* More BWE for signals with high prediction gain */ + strength = FIND_PITCH_WHITE_NOISE_FRACTION * psEncCtrl->predGain; /* between 0.0 and 1.0 */ + BWExp1 = BWExp2 = BANDWIDTH_EXPANSION / ( 1.0f + strength * strength ); + delta = LOW_RATE_BANDWIDTH_EXPANSION_DELTA * ( 1.0f - 0.75f * psEncCtrl->coding_quality ); + BWExp1 -= delta; + BWExp2 += delta; + /* BWExp1 will be applied after BWExp2, so make it relative */ + BWExp1 /= BWExp2; + + if( psEnc->sCmn.warping_Q16 > 0 ) { + /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ + warping = (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f + 0.01f * psEncCtrl->coding_quality; + } else { + warping = 0.0f; + } + + /********************************************/ + /* Compute noise shaping AR coefs and gains */ + /********************************************/ + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + /* Apply window: sine slope followed by flat part followed by cosine slope */ + opus_int shift, slope_part, flat_part; + flat_part = psEnc->sCmn.fs_kHz * 3; + slope_part = ( psEnc->sCmn.shapeWinLength - flat_part ) / 2; + + silk_apply_sine_window_FLP( x_windowed, x_ptr, 1, slope_part ); + shift = slope_part; + silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(silk_float) ); + shift += flat_part; + silk_apply_sine_window_FLP( x_windowed + shift, x_ptr + shift, 2, slope_part ); + + /* Update pointer: next LPC analysis block */ + x_ptr += psEnc->sCmn.subfr_length; + + if( psEnc->sCmn.warping_Q16 > 0 ) { + /* Calculate warped auto correlation */ + silk_warped_autocorrelation_FLP( auto_corr, x_windowed, warping, + psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); + } else { + /* Calculate regular auto correlation */ + silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 ); + } + + /* Add white noise, as a fraction of energy */ + auto_corr[ 0 ] += auto_corr[ 0 ] * SHAPE_WHITE_NOISE_FRACTION; + + /* Convert correlations to prediction coefficients, and compute residual energy */ + nrg = silk_levinsondurbin_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], auto_corr, psEnc->sCmn.shapingLPCOrder ); + psEncCtrl->Gains[ k ] = ( silk_float )sqrt( nrg ); + + if( psEnc->sCmn.warping_Q16 > 0 ) { + /* Adjust gain for warping */ + psEncCtrl->Gains[ k ] *= warped_gain( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], warping, psEnc->sCmn.shapingLPCOrder ); + } + + /* Bandwidth expansion for synthesis filter shaping */ + silk_bwexpander_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp2 ); + + /* Compute noise shaping filter coefficients */ + silk_memcpy( + &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], + &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], + psEnc->sCmn.shapingLPCOrder * sizeof( silk_float ) ); + + /* Bandwidth expansion for analysis filter shaping */ + silk_bwexpander_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp1 ); + + /* Ratio of prediction gains, in energy domain */ + pre_nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); + nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); + psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ); + + /* Convert to monic warped prediction coefficients and limit absolute values */ + warped_true2monic_coefs( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], + warping, 3.999f, psEnc->sCmn.shapingLPCOrder ); + } + + /*****************/ + /* Gain tweaking */ + /*****************/ + /* Increase gains during low speech activity */ + gain_mult = (silk_float)pow( 2.0f, -0.16f * SNR_adj_dB ); + gain_add = (silk_float)pow( 2.0f, 0.16f * MIN_QGAIN_DB ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->Gains[ k ] *= gain_mult; + psEncCtrl->Gains[ k ] += gain_add; + } + + gain_mult = 1.0f + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT; + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->GainsPre[ k ] *= gain_mult; + } + + /************************************************/ + /* Control low-frequency shaping and noise tilt */ + /************************************************/ + /* Less low frequency shaping for noisy inputs */ + strength = LOW_FREQ_SHAPING * ( 1.0f + LOW_QUALITY_LOW_FREQ_SHAPING_DECR * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] * ( 1.0f / 32768.0f ) - 1.0f ) ); + strength *= psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ + /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + b = 0.2f / psEnc->sCmn.fs_kHz + 3.0f / psEncCtrl->pitchL[ k ]; + psEncCtrl->LF_MA_shp[ k ] = -1.0f + b; + psEncCtrl->LF_AR_shp[ k ] = 1.0f - b - b * strength; + } + Tilt = - HP_NOISE_COEF - + (1 - HP_NOISE_COEF) * HARM_HP_NOISE_COEF * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); + } else { + b = 1.3f / psEnc->sCmn.fs_kHz; + psEncCtrl->LF_MA_shp[ 0 ] = -1.0f + b; + psEncCtrl->LF_AR_shp[ 0 ] = 1.0f - b - b * strength * 0.6f; + for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->LF_MA_shp[ k ] = psEncCtrl->LF_MA_shp[ 0 ]; + psEncCtrl->LF_AR_shp[ k ] = psEncCtrl->LF_AR_shp[ 0 ]; + } + Tilt = -HP_NOISE_COEF; + } + + /****************************/ + /* HARMONIC SHAPING CONTROL */ + /****************************/ + /* Control boosting of harmonic frequencies */ + HarmBoost = LOW_RATE_HARMONIC_BOOST * ( 1.0f - psEncCtrl->coding_quality ) * psEnc->LTPCorr; + + /* More harmonic boost for noisy input signals */ + HarmBoost += LOW_INPUT_QUALITY_HARMONIC_BOOST * ( 1.0f - psEncCtrl->input_quality ); + + if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + /* Harmonic noise shaping */ + HarmShapeGain = HARMONIC_SHAPING; + + /* More harmonic noise shaping for high bitrates or noisy input */ + HarmShapeGain += HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING * + ( 1.0f - ( 1.0f - psEncCtrl->coding_quality ) * psEncCtrl->input_quality ); + + /* Less harmonic noise shaping for less periodic signals */ + HarmShapeGain *= ( silk_float )sqrt( psEnc->LTPCorr ); + } else { + HarmShapeGain = 0.0f; + } + + /*************************/ + /* Smooth over subframes */ + /*************************/ + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psShapeSt->HarmBoost_smth += SUBFR_SMTH_COEF * ( HarmBoost - psShapeSt->HarmBoost_smth ); + psEncCtrl->HarmBoost[ k ] = psShapeSt->HarmBoost_smth; + psShapeSt->HarmShapeGain_smth += SUBFR_SMTH_COEF * ( HarmShapeGain - psShapeSt->HarmShapeGain_smth ); + psEncCtrl->HarmShapeGain[ k ] = psShapeSt->HarmShapeGain_smth; + psShapeSt->Tilt_smth += SUBFR_SMTH_COEF * ( Tilt - psShapeSt->Tilt_smth ); + psEncCtrl->Tilt[ k ] = psShapeSt->Tilt_smth; + } +} diff --git a/drivers/opus/silk/float/pitch_analysis_core_FLP.c b/drivers/opus/silk/float/pitch_analysis_core_FLP.c new file mode 100644 index 0000000000..2588094c49 --- /dev/null +++ b/drivers/opus/silk/float/pitch_analysis_core_FLP.c @@ -0,0 +1,630 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/***************************************************************************** +* Pitch analyser function +******************************************************************************/ +#include "SigProc_FLP.h" +#include "SigProc_FIX.h" +#include "pitch_est_defines.h" +#include "pitch.h" + +#define SCRATCH_SIZE 22 + +/************************************************************/ +/* Internally used functions */ +/************************************************************/ +static void silk_P_Ana_calc_corr_st3( + silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ + const silk_float frame[], /* I vector to correlate */ + opus_int start_lag, /* I start lag */ + opus_int sf_length, /* I sub frame length */ + opus_int nb_subfr, /* I number of subframes */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ +); + +static void silk_P_Ana_calc_energy_st3( + silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ + const silk_float frame[], /* I vector to correlate */ + opus_int start_lag, /* I start lag */ + opus_int sf_length, /* I sub frame length */ + opus_int nb_subfr, /* I number of subframes */ + opus_int complexity /* I Complexity setting */ +); + +/************************************************************/ +/* CORE PITCH ANALYSIS FUNCTION */ +/************************************************************/ +opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, 1 unvoiced */ + const silk_float *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ + opus_int *pitch_out, /* O Pitch lag values [nb_subfr] */ + opus_int16 *lagIndex, /* O Lag Index */ + opus_int8 *contourIndex, /* O Pitch contour Index */ + silk_float *LTPCorr, /* I/O Normalized correlation; input: value from previous frame */ + opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ + const silk_float search_thres1, /* I First stage threshold for lag candidates 0 - 1 */ + const silk_float search_thres2, /* I Final threshold for lag candidates 0 - 1 */ + const opus_int Fs_kHz, /* I sample frequency (kHz) */ + const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ + const opus_int nb_subfr, /* I Number of 5 ms subframes */ + int arch /* I Run-time architecture */ +) +{ + opus_int i, k, d, j; + silk_float frame_8kHz[ PE_MAX_FRAME_LENGTH_MS * 8 ]; + silk_float frame_4kHz[ PE_MAX_FRAME_LENGTH_MS * 4 ]; + opus_int16 frame_8_FIX[ PE_MAX_FRAME_LENGTH_MS * 8 ]; + opus_int16 frame_4_FIX[ PE_MAX_FRAME_LENGTH_MS * 4 ]; + opus_int32 filt_state[ 6 ]; + silk_float threshold, contour_bias; + silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ]; + opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ]; + silk_float CC[ PE_NB_CBKS_STAGE2_EXT ]; + const silk_float *target_ptr, *basis_ptr; + double cross_corr, normalizer, energy, energy_tmp; + opus_int d_srch[ PE_D_SRCH_LENGTH ]; + opus_int16 d_comp[ (PE_MAX_LAG >> 1) + 5 ]; + opus_int length_d_srch, length_d_comp; + silk_float Cmax, CCmax, CCmax_b, CCmax_new_b, CCmax_new; + opus_int CBimax, CBimax_new, lag, start_lag, end_lag, lag_new; + opus_int cbk_size; + silk_float lag_log2, prevLag_log2, delta_lag_log2_sqr; + silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ]; + silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ]; + opus_int lag_counter; + opus_int frame_length, frame_length_8kHz, frame_length_4kHz; + opus_int sf_length, sf_length_8kHz, sf_length_4kHz; + opus_int min_lag, min_lag_8kHz, min_lag_4kHz; + opus_int max_lag, max_lag_8kHz, max_lag_4kHz; + opus_int nb_cbk_search; + const opus_int8 *Lag_CB_ptr; + + /* Check for valid sampling frequency */ + silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 ); + + /* Check for valid complexity setting */ + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); + + silk_assert( search_thres1 >= 0.0f && search_thres1 <= 1.0f ); + silk_assert( search_thres2 >= 0.0f && search_thres2 <= 1.0f ); + + /* Set up frame lengths max / min lag for the sampling frequency */ + frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz; + frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4; + frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8; + sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz; + sf_length_4kHz = PE_SUBFR_LENGTH_MS * 4; + sf_length_8kHz = PE_SUBFR_LENGTH_MS * 8; + min_lag = PE_MIN_LAG_MS * Fs_kHz; + min_lag_4kHz = PE_MIN_LAG_MS * 4; + min_lag_8kHz = PE_MIN_LAG_MS * 8; + max_lag = PE_MAX_LAG_MS * Fs_kHz - 1; + max_lag_4kHz = PE_MAX_LAG_MS * 4; + max_lag_8kHz = PE_MAX_LAG_MS * 8 - 1; + + /* Resample from input sampled at Fs_kHz to 8 kHz */ + if( Fs_kHz == 16 ) { + /* Resample to 16 -> 8 khz */ + opus_int16 frame_16_FIX[ 16 * PE_MAX_FRAME_LENGTH_MS ]; + silk_float2short_array( frame_16_FIX, frame, frame_length ); + silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); + silk_resampler_down2( filt_state, frame_8_FIX, frame_16_FIX, frame_length ); + silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz ); + } else if( Fs_kHz == 12 ) { + /* Resample to 12 -> 8 khz */ + opus_int16 frame_12_FIX[ 12 * PE_MAX_FRAME_LENGTH_MS ]; + silk_float2short_array( frame_12_FIX, frame, frame_length ); + silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) ); + silk_resampler_down2_3( filt_state, frame_8_FIX, frame_12_FIX, frame_length ); + silk_short2float_array( frame_8kHz, frame_8_FIX, frame_length_8kHz ); + } else { + silk_assert( Fs_kHz == 8 ); + silk_float2short_array( frame_8_FIX, frame, frame_length_8kHz ); + } + + /* Decimate again to 4 kHz */ + silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) ); + silk_resampler_down2( filt_state, frame_4_FIX, frame_8_FIX, frame_length_8kHz ); + silk_short2float_array( frame_4kHz, frame_4_FIX, frame_length_4kHz ); + + /* Low-pass filter */ + for( i = frame_length_4kHz - 1; i > 0; i-- ) { + frame_4kHz[ i ] += frame_4kHz[ i - 1 ]; + } + + /****************************************************************************** + * FIRST STAGE, operating in 4 khz + ******************************************************************************/ + silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5)); + target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ]; + for( k = 0; k < nb_subfr >> 1; k++ ) { + /* Check that we are within range of the array */ + silk_assert( target_ptr >= frame_4kHz ); + silk_assert( target_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); + + basis_ptr = target_ptr - min_lag_4kHz; + + /* Check that we are within range of the array */ + silk_assert( basis_ptr >= frame_4kHz ); + silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); + + celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1, arch ); + + /* Calculate first vector products before loop */ + cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ]; + normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) + + silk_energy_FLP( basis_ptr, sf_length_8kHz ) + + sf_length_8kHz * 4000.0f; + + C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer ); + + /* From now on normalizer is computed recursively */ + for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) { + basis_ptr--; + + /* Check that we are within range of the array */ + silk_assert( basis_ptr >= frame_4kHz ); + silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz ); + + cross_corr = xcorr[ max_lag_4kHz - d ]; + + /* Add contribution of new sample and remove contribution from oldest sample */ + normalizer += + basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] - + basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ]; + C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer ); + } + /* Update target pointer */ + target_ptr += sf_length_8kHz; + } + + /* Apply short-lag bias */ + for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) { + C[ 0 ][ i ] -= C[ 0 ][ i ] * i / 4096.0f; + } + + /* Sort */ + length_d_srch = 4 + 2 * complexity; + silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH ); + silk_insertion_sort_decreasing_FLP( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch ); + + /* Escape if correlation is very low already here */ + Cmax = C[ 0 ][ min_lag_4kHz ]; + if( Cmax < 0.2f ) { + silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) ); + *LTPCorr = 0.0f; + *lagIndex = 0; + *contourIndex = 0; + return 1; + } + + threshold = search_thres1 * Cmax; + for( i = 0; i < length_d_srch; i++ ) { + /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */ + if( C[ 0 ][ min_lag_4kHz + i ] > threshold ) { + d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + min_lag_4kHz, 1 ); + } else { + length_d_srch = i; + break; + } + } + silk_assert( length_d_srch > 0 ); + + for( i = min_lag_8kHz - 5; i < max_lag_8kHz + 5; i++ ) { + d_comp[ i ] = 0; + } + for( i = 0; i < length_d_srch; i++ ) { + d_comp[ d_srch[ i ] ] = 1; + } + + /* Convolution */ + for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) { + d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ]; + } + + length_d_srch = 0; + for( i = min_lag_8kHz; i < max_lag_8kHz + 1; i++ ) { + if( d_comp[ i + 1 ] > 0 ) { + d_srch[ length_d_srch ] = i; + length_d_srch++; + } + } + + /* Convolution */ + for( i = max_lag_8kHz + 3; i >= min_lag_8kHz; i-- ) { + d_comp[ i ] += d_comp[ i - 1 ] + d_comp[ i - 2 ] + d_comp[ i - 3 ]; + } + + length_d_comp = 0; + for( i = min_lag_8kHz; i < max_lag_8kHz + 4; i++ ) { + if( d_comp[ i ] > 0 ) { + d_comp[ length_d_comp ] = (opus_int16)( i - 2 ); + length_d_comp++; + } + } + + /********************************************************************************** + ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation + *************************************************************************************/ + /********************************************************************************* + * Find energy of each subframe projected onto its history, for a range of delays + *********************************************************************************/ + silk_memset( C, 0, PE_MAX_NB_SUBFR*((PE_MAX_LAG >> 1) + 5) * sizeof(silk_float)); + + if( Fs_kHz == 8 ) { + target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * 8 ]; + } else { + target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ]; + } + for( k = 0; k < nb_subfr; k++ ) { + energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0; + for( j = 0; j < length_d_comp; j++ ) { + d = d_comp[ j ]; + basis_ptr = target_ptr - d; + cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz ); + if( cross_corr > 0.0f ) { + energy = silk_energy_FLP( basis_ptr, sf_length_8kHz ); + C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) ); + } else { + C[ k ][ d ] = 0.0f; + } + } + target_ptr += sf_length_8kHz; + } + + /* search over lag range and lags codebook */ + /* scale factor for lag codebook, as a function of center lag */ + + CCmax = 0.0f; /* This value doesn't matter */ + CCmax_b = -1000.0f; + + CBimax = 0; /* To avoid returning undefined lag values */ + lag = -1; /* To check if lag with strong enough correlation has been found */ + + if( prevLag > 0 ) { + if( Fs_kHz == 12 ) { + prevLag = silk_LSHIFT( prevLag, 1 ) / 3; + } else if( Fs_kHz == 16 ) { + prevLag = silk_RSHIFT( prevLag, 1 ); + } + prevLag_log2 = silk_log2( (silk_float)prevLag ); + } else { + prevLag_log2 = 0; + } + + /* Set up stage 2 codebook based on number of subframes */ + if( nb_subfr == PE_MAX_NB_SUBFR ) { + cbk_size = PE_NB_CBKS_STAGE2_EXT; + Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ]; + if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) { + /* If input is 8 khz use a larger codebook here because it is last stage */ + nb_cbk_search = PE_NB_CBKS_STAGE2_EXT; + } else { + nb_cbk_search = PE_NB_CBKS_STAGE2; + } + } else { + cbk_size = PE_NB_CBKS_STAGE2_10MS; + Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ]; + nb_cbk_search = PE_NB_CBKS_STAGE2_10MS; + } + + for( k = 0; k < length_d_srch; k++ ) { + d = d_srch[ k ]; + for( j = 0; j < nb_cbk_search; j++ ) { + CC[j] = 0.0f; + for( i = 0; i < nb_subfr; i++ ) { + /* Try all codebooks */ + CC[ j ] += C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size )]; + } + } + /* Find best codebook */ + CCmax_new = -1000.0f; + CBimax_new = 0; + for( i = 0; i < nb_cbk_search; i++ ) { + if( CC[ i ] > CCmax_new ) { + CCmax_new = CC[ i ]; + CBimax_new = i; + } + } + + /* Bias towards shorter lags */ + lag_log2 = silk_log2( (silk_float)d ); + CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2; + + /* Bias towards previous lag */ + if( prevLag > 0 ) { + delta_lag_log2_sqr = lag_log2 - prevLag_log2; + delta_lag_log2_sqr *= delta_lag_log2_sqr; + CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f ); + } + + if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */ + CCmax_new > nb_subfr * search_thres2 /* Correlation needs to be high enough to be voiced */ + ) { + CCmax_b = CCmax_new_b; + CCmax = CCmax_new; + lag = d; + CBimax = CBimax_new; + } + } + + if( lag == -1 ) { + /* No suitable candidate found */ + silk_memset( pitch_out, 0, PE_MAX_NB_SUBFR * sizeof(opus_int) ); + *LTPCorr = 0.0f; + *lagIndex = 0; + *contourIndex = 0; + return 1; + } + + /* Output normalized correlation */ + *LTPCorr = (silk_float)( CCmax / nb_subfr ); + silk_assert( *LTPCorr >= 0.0f ); + + if( Fs_kHz > 8 ) { + /* Search in original signal */ + + /* Compensate for decimation */ + silk_assert( lag == silk_SAT16( lag ) ); + if( Fs_kHz == 12 ) { + lag = silk_RSHIFT_ROUND( silk_SMULBB( lag, 3 ), 1 ); + } else { /* Fs_kHz == 16 */ + lag = silk_LSHIFT( lag, 1 ); + } + + lag = silk_LIMIT_int( lag, min_lag, max_lag ); + start_lag = silk_max_int( lag - 2, min_lag ); + end_lag = silk_min_int( lag + 2, max_lag ); + lag_new = lag; /* to avoid undefined lag */ + CBimax = 0; /* to avoid undefined lag */ + + CCmax = -1000.0f; + + /* Calculate the correlations and energies needed in stage 3 */ + silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch ); + silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity ); + + lag_counter = 0; + silk_assert( lag == silk_SAT16( lag ) ); + contour_bias = PE_FLATCONTOUR_BIAS / lag; + + /* Set up cbk parameters according to complexity setting and frame length */ + if( nb_subfr == PE_MAX_NB_SUBFR ) { + nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ]; + cbk_size = PE_NB_CBKS_STAGE3_MAX; + Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; + } else { + nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; + cbk_size = PE_NB_CBKS_STAGE3_10MS; + Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; + } + + target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ]; + energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0; + for( d = start_lag; d <= end_lag; d++ ) { + for( j = 0; j < nb_cbk_search; j++ ) { + cross_corr = 0.0; + energy = energy_tmp; + for( k = 0; k < nb_subfr; k++ ) { + cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ]; + energy += energies_st3[ k ][ j ][ lag_counter ]; + } + if( cross_corr > 0.0 ) { + CCmax_new = (silk_float)( 2 * cross_corr / energy ); + /* Reduce depending on flatness of contour */ + CCmax_new *= 1.0f - contour_bias * j; + } else { + CCmax_new = 0.0f; + } + + if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) { + CCmax = CCmax_new; + lag_new = d; + CBimax = j; + } + } + lag_counter++; + } + + for( k = 0; k < nb_subfr; k++ ) { + pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); + pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz ); + } + *lagIndex = (opus_int16)( lag_new - min_lag ); + *contourIndex = (opus_int8)CBimax; + } else { /* Fs_kHz == 8 */ + /* Save Lags */ + for( k = 0; k < nb_subfr; k++ ) { + pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); + pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 ); + } + *lagIndex = (opus_int16)( lag - min_lag_8kHz ); + *contourIndex = (opus_int8)CBimax; + } + silk_assert( *lagIndex >= 0 ); + /* return as voiced */ + return 0; +} + +/*********************************************************************** + * Calculates the correlations used in stage 3 search. In order to cover + * the whole lag codebook for all the searched offset lags (lag +- 2), + * the following correlations are needed in each sub frame: + * + * sf1: lag range [-8,...,7] total 16 correlations + * sf2: lag range [-4,...,4] total 9 correlations + * sf3: lag range [-3,....4] total 8 correltions + * sf4: lag range [-6,....8] total 15 correlations + * + * In total 48 correlations. The direct implementation computed in worst + * case 4*12*5 = 240 correlations, but more likely around 120. + ***********************************************************************/ +static void silk_P_Ana_calc_corr_st3( + silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ + const silk_float frame[], /* I vector to correlate */ + opus_int start_lag, /* I start lag */ + opus_int sf_length, /* I sub frame length */ + opus_int nb_subfr, /* I number of subframes */ + opus_int complexity, /* I Complexity setting */ + int arch /* I Run-time architecture */ +) +{ + const silk_float *target_ptr; + opus_int i, j, k, lag_counter, lag_low, lag_high; + opus_int nb_cbk_search, delta, idx, cbk_size; + silk_float scratch_mem[ SCRATCH_SIZE ]; + opus_val32 xcorr[ SCRATCH_SIZE ]; + const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; + + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); + + if( nb_subfr == PE_MAX_NB_SUBFR ) { + Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; + Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; + nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; + cbk_size = PE_NB_CBKS_STAGE3_MAX; + } else { + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); + Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; + Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; + nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; + cbk_size = PE_NB_CBKS_STAGE3_10MS; + } + + target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */ + for( k = 0; k < nb_subfr; k++ ) { + lag_counter = 0; + + /* Calculate the correlations for each subframe */ + lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 ); + lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 ); + silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE); + celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1, arch ); + for( j = lag_low; j <= lag_high; j++ ) { + silk_assert( lag_counter < SCRATCH_SIZE ); + scratch_mem[ lag_counter ] = xcorr[ lag_high - j ]; + lag_counter++; + } + + delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); + for( i = 0; i < nb_cbk_search; i++ ) { + /* Fill out the 3 dim array that stores the correlations for */ + /* each code_book vector for each start lag */ + idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; + for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { + silk_assert( idx + j < SCRATCH_SIZE ); + silk_assert( idx + j < lag_counter ); + cross_corr_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ]; + } + } + target_ptr += sf_length; + } +} + +/********************************************************************/ +/* Calculate the energies for first two subframes. The energies are */ +/* calculated recursively. */ +/********************************************************************/ +static void silk_P_Ana_calc_energy_st3( + silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */ + const silk_float frame[], /* I vector to correlate */ + opus_int start_lag, /* I start lag */ + opus_int sf_length, /* I sub frame length */ + opus_int nb_subfr, /* I number of subframes */ + opus_int complexity /* I Complexity setting */ +) +{ + const silk_float *target_ptr, *basis_ptr; + double energy; + opus_int k, i, j, lag_counter; + opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff; + silk_float scratch_mem[ SCRATCH_SIZE ]; + const opus_int8 *Lag_range_ptr, *Lag_CB_ptr; + + silk_assert( complexity >= SILK_PE_MIN_COMPLEX ); + silk_assert( complexity <= SILK_PE_MAX_COMPLEX ); + + if( nb_subfr == PE_MAX_NB_SUBFR ) { + Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ]; + Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ]; + nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ]; + cbk_size = PE_NB_CBKS_STAGE3_MAX; + } else { + silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1); + Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ]; + Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ]; + nb_cbk_search = PE_NB_CBKS_STAGE3_10MS; + cbk_size = PE_NB_CBKS_STAGE3_10MS; + } + + target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; + for( k = 0; k < nb_subfr; k++ ) { + lag_counter = 0; + + /* Calculate the energy for first lag */ + basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) ); + energy = silk_energy_FLP( basis_ptr, sf_length ) + 1e-3; + silk_assert( energy >= 0.0 ); + scratch_mem[lag_counter] = (silk_float)energy; + lag_counter++; + + lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 ); + for( i = 1; i < lag_diff; i++ ) { + /* remove part outside new window */ + energy -= basis_ptr[sf_length - i] * (double)basis_ptr[sf_length - i]; + silk_assert( energy >= 0.0 ); + + /* add part that comes into window */ + energy += basis_ptr[ -i ] * (double)basis_ptr[ -i ]; + silk_assert( energy >= 0.0 ); + silk_assert( lag_counter < SCRATCH_SIZE ); + scratch_mem[lag_counter] = (silk_float)energy; + lag_counter++; + } + + delta = matrix_ptr( Lag_range_ptr, k, 0, 2 ); + for( i = 0; i < nb_cbk_search; i++ ) { + /* Fill out the 3 dim array that stores the correlations for */ + /* each code_book vector for each start lag */ + idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta; + for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) { + silk_assert( idx + j < SCRATCH_SIZE ); + silk_assert( idx + j < lag_counter ); + energies_st3[ k ][ i ][ j ] = scratch_mem[ idx + j ]; + silk_assert( energies_st3[ k ][ i ][ j ] >= 0.0f ); + } + } + target_ptr += sf_length; + } +} diff --git a/drivers/opus/silk/float/prefilter_FLP.c b/drivers/opus/silk/float/prefilter_FLP.c new file mode 100644 index 0000000000..aa43852ff1 --- /dev/null +++ b/drivers/opus/silk/float/prefilter_FLP.c @@ -0,0 +1,206 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" +#include "tuning_parameters.h" + +/* +* Prefilter for finding Quantizer input signal +*/ +static OPUS_INLINE void silk_prefilt_FLP( + silk_prefilter_state_FLP *P, /* I/O state */ + silk_float st_res[], /* I */ + silk_float xw[], /* O */ + silk_float *HarmShapeFIR, /* I */ + silk_float Tilt, /* I */ + silk_float LF_MA_shp, /* I */ + silk_float LF_AR_shp, /* I */ + opus_int lag, /* I */ + opus_int length /* I */ +); + +static void silk_warped_LPC_analysis_filter_FLP( + silk_float state[], /* I/O State [order + 1] */ + silk_float res[], /* O Residual signal [length] */ + const silk_float coef[], /* I Coefficients [order] */ + const silk_float input[], /* I Input signal [length] */ + const silk_float lambda, /* I Warping factor */ + const opus_int length, /* I Length of input signal */ + const opus_int order /* I Filter order (even) */ +) +{ + opus_int n, i; + silk_float acc, tmp1, tmp2; + + /* Order must be even */ + silk_assert( ( order & 1 ) == 0 ); + + for( n = 0; n < length; n++ ) { + /* Output of lowpass section */ + tmp2 = state[ 0 ] + lambda * state[ 1 ]; + state[ 0 ] = input[ n ]; + /* Output of allpass section */ + tmp1 = state[ 1 ] + lambda * ( state[ 2 ] - tmp2 ); + state[ 1 ] = tmp2; + acc = coef[ 0 ] * tmp2; + /* Loop over allpass sections */ + for( i = 2; i < order; i += 2 ) { + /* Output of allpass section */ + tmp2 = state[ i ] + lambda * ( state[ i + 1 ] - tmp1 ); + state[ i ] = tmp1; + acc += coef[ i - 1 ] * tmp1; + /* Output of allpass section */ + tmp1 = state[ i + 1 ] + lambda * ( state[ i + 2 ] - tmp2 ); + state[ i + 1 ] = tmp2; + acc += coef[ i ] * tmp2; + } + state[ order ] = tmp1; + acc += coef[ order - 1 ] * tmp1; + res[ n ] = input[ n ] - acc; + } +} + +/* +* silk_prefilter. Main prefilter function +*/ +void silk_prefilter_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + const silk_encoder_control_FLP *psEncCtrl, /* I Encoder control FLP */ + silk_float xw[], /* O Weighted signal */ + const silk_float x[] /* I Speech signal */ +) +{ + silk_prefilter_state_FLP *P = &psEnc->sPrefilt; + opus_int j, k, lag; + silk_float HarmShapeGain, Tilt, LF_MA_shp, LF_AR_shp; + silk_float B[ 2 ]; + const silk_float *AR1_shp; + const silk_float *px; + silk_float *pxw; + silk_float HarmShapeFIR[ 3 ]; + silk_float st_res[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ]; + + /* Set up pointers */ + px = x; + pxw = xw; + lag = P->lagPrev; + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + /* Update Variables that change per sub frame */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + lag = psEncCtrl->pitchL[ k ]; + } + + /* Noise shape parameters */ + HarmShapeGain = psEncCtrl->HarmShapeGain[ k ] * ( 1.0f - psEncCtrl->HarmBoost[ k ] ); + HarmShapeFIR[ 0 ] = 0.25f * HarmShapeGain; + HarmShapeFIR[ 1 ] = 32767.0f / 65536.0f * HarmShapeGain; + HarmShapeFIR[ 2 ] = 0.25f * HarmShapeGain; + Tilt = psEncCtrl->Tilt[ k ]; + LF_MA_shp = psEncCtrl->LF_MA_shp[ k ]; + LF_AR_shp = psEncCtrl->LF_AR_shp[ k ]; + AR1_shp = &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ]; + + /* Short term FIR filtering */ + silk_warped_LPC_analysis_filter_FLP( P->sAR_shp, st_res, AR1_shp, px, + (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f, psEnc->sCmn.subfr_length, psEnc->sCmn.shapingLPCOrder ); + + /* Reduce (mainly) low frequencies during harmonic emphasis */ + B[ 0 ] = psEncCtrl->GainsPre[ k ]; + B[ 1 ] = -psEncCtrl->GainsPre[ k ] * + ( psEncCtrl->HarmBoost[ k ] * HarmShapeGain + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT ); + pxw[ 0 ] = B[ 0 ] * st_res[ 0 ] + B[ 1 ] * P->sHarmHP; + for( j = 1; j < psEnc->sCmn.subfr_length; j++ ) { + pxw[ j ] = B[ 0 ] * st_res[ j ] + B[ 1 ] * st_res[ j - 1 ]; + } + P->sHarmHP = st_res[ psEnc->sCmn.subfr_length - 1 ]; + + silk_prefilt_FLP( P, pxw, pxw, HarmShapeFIR, Tilt, LF_MA_shp, LF_AR_shp, lag, psEnc->sCmn.subfr_length ); + + px += psEnc->sCmn.subfr_length; + pxw += psEnc->sCmn.subfr_length; + } + P->lagPrev = psEncCtrl->pitchL[ psEnc->sCmn.nb_subfr - 1 ]; +} + +/* +* Prefilter for finding Quantizer input signal +*/ +static OPUS_INLINE void silk_prefilt_FLP( + silk_prefilter_state_FLP *P, /* I/O state */ + silk_float st_res[], /* I */ + silk_float xw[], /* O */ + silk_float *HarmShapeFIR, /* I */ + silk_float Tilt, /* I */ + silk_float LF_MA_shp, /* I */ + silk_float LF_AR_shp, /* I */ + opus_int lag, /* I */ + opus_int length /* I */ +) +{ + opus_int i; + opus_int idx, LTP_shp_buf_idx; + silk_float n_Tilt, n_LF, n_LTP; + silk_float sLF_AR_shp, sLF_MA_shp; + silk_float *LTP_shp_buf; + + /* To speed up use temp variables instead of using the struct */ + LTP_shp_buf = P->sLTP_shp; + LTP_shp_buf_idx = P->sLTP_shp_buf_idx; + sLF_AR_shp = P->sLF_AR_shp; + sLF_MA_shp = P->sLF_MA_shp; + + for( i = 0; i < length; i++ ) { + if( lag > 0 ) { + silk_assert( HARM_SHAPE_FIR_TAPS == 3 ); + idx = lag + LTP_shp_buf_idx; + n_LTP = LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ] * HarmShapeFIR[ 0 ]; + n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 ) & LTP_MASK ] * HarmShapeFIR[ 1 ]; + n_LTP += LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ] * HarmShapeFIR[ 2 ]; + } else { + n_LTP = 0; + } + + n_Tilt = sLF_AR_shp * Tilt; + n_LF = sLF_AR_shp * LF_AR_shp + sLF_MA_shp * LF_MA_shp; + + sLF_AR_shp = st_res[ i ] - n_Tilt; + sLF_MA_shp = sLF_AR_shp - n_LF; + + LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK; + LTP_shp_buf[ LTP_shp_buf_idx ] = sLF_MA_shp; + + xw[ i ] = sLF_MA_shp - n_LTP; + } + /* Copy temp variable back to state */ + P->sLF_AR_shp = sLF_AR_shp; + P->sLF_MA_shp = sLF_MA_shp; + P->sLTP_shp_buf_idx = LTP_shp_buf_idx; +} diff --git a/drivers/opus/silk/float/process_gains_FLP.c b/drivers/opus/silk/float/process_gains_FLP.c new file mode 100644 index 0000000000..e83d05552a --- /dev/null +++ b/drivers/opus/silk/float/process_gains_FLP.c @@ -0,0 +1,103 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" +#include "tuning_parameters.h" + +/* Processing of gains */ +void silk_process_gains_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + opus_int condCoding /* I The type of conditional coding to use */ +) +{ + silk_shape_state_FLP *psShapeSt = &psEnc->sShape; + opus_int k; + opus_int32 pGains_Q16[ MAX_NB_SUBFR ]; + silk_float s, InvMaxSqrVal, gain, quant_offset; + + /* Gain reduction when LTP coding gain is high */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + s = 1.0f - 0.5f * silk_sigmoid( 0.25f * ( psEncCtrl->LTPredCodGain - 12.0f ) ); + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->Gains[ k ] *= s; + } + } + + /* Limit the quantized signal */ + InvMaxSqrVal = ( silk_float )( pow( 2.0f, 0.33f * ( 21.0f - psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) ) ) / psEnc->sCmn.subfr_length ); + + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + /* Soft limit on ratio residual energy and squared gains */ + gain = psEncCtrl->Gains[ k ]; + gain = ( silk_float )sqrt( gain * gain + psEncCtrl->ResNrg[ k ] * InvMaxSqrVal ); + psEncCtrl->Gains[ k ] = silk_min_float( gain, 32767.0f ); + } + + /* Prepare gains for noise shaping quantization */ + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + pGains_Q16[ k ] = (opus_int32)( psEncCtrl->Gains[ k ] * 65536.0f ); + } + + /* Save unquantized gains and gain Index */ + silk_memcpy( psEncCtrl->GainsUnq_Q16, pGains_Q16, psEnc->sCmn.nb_subfr * sizeof( opus_int32 ) ); + psEncCtrl->lastGainIndexPrev = psShapeSt->LastGainIndex; + + /* Quantize gains */ + silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16, + &psShapeSt->LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); + + /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ + for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { + psEncCtrl->Gains[ k ] = pGains_Q16[ k ] / 65536.0f; + } + + /* Set quantizer offset for voiced signals. Larger offset when LTP coding gain is low or tilt is high (ie low-pass) */ + if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { + if( psEncCtrl->LTPredCodGain + psEnc->sCmn.input_tilt_Q15 * ( 1.0f / 32768.0f ) > 1.0f ) { + psEnc->sCmn.indices.quantOffsetType = 0; + } else { + psEnc->sCmn.indices.quantOffsetType = 1; + } + } + + /* Quantizer boundary adjustment */ + quant_offset = silk_Quantization_Offsets_Q10[ psEnc->sCmn.indices.signalType >> 1 ][ psEnc->sCmn.indices.quantOffsetType ] / 1024.0f; + psEncCtrl->Lambda = LAMBDA_OFFSET + + LAMBDA_DELAYED_DECISIONS * psEnc->sCmn.nStatesDelayedDecision + + LAMBDA_SPEECH_ACT * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ) + + LAMBDA_INPUT_QUALITY * psEncCtrl->input_quality + + LAMBDA_CODING_QUALITY * psEncCtrl->coding_quality + + LAMBDA_QUANT_OFFSET * quant_offset; + + silk_assert( psEncCtrl->Lambda > 0.0f ); + silk_assert( psEncCtrl->Lambda < 2.0f ); +} diff --git a/drivers/opus/silk/float/regularize_correlations_FLP.c b/drivers/opus/silk/float/regularize_correlations_FLP.c new file mode 100644 index 0000000000..f056eadc57 --- /dev/null +++ b/drivers/opus/silk/float/regularize_correlations_FLP.c @@ -0,0 +1,48 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" + +/* Add noise to matrix diagonal */ +void silk_regularize_correlations_FLP( + silk_float *XX, /* I/O Correlation matrices */ + silk_float *xx, /* I/O Correlation values */ + const silk_float noise, /* I Noise energy to add */ + const opus_int D /* I Dimension of XX */ +) +{ + opus_int i; + + for( i = 0; i < D; i++ ) { + matrix_ptr( &XX[ 0 ], i, i, D ) += noise; + } + xx[ 0 ] += noise; +} diff --git a/drivers/opus/silk/float/residual_energy_FLP.c b/drivers/opus/silk/float/residual_energy_FLP.c new file mode 100644 index 0000000000..011efcef04 --- /dev/null +++ b/drivers/opus/silk/float/residual_energy_FLP.c @@ -0,0 +1,117 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" + +#define MAX_ITERATIONS_RESIDUAL_NRG 10 +#define REGULARIZATION_FACTOR 1e-8f + +/* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ +silk_float silk_residual_energy_covar_FLP( /* O Weighted residual energy */ + const silk_float *c, /* I Filter coefficients */ + silk_float *wXX, /* I/O Weighted correlation matrix, reg. out */ + const silk_float *wXx, /* I Weighted correlation vector */ + const silk_float wxx, /* I Weighted correlation value */ + const opus_int D /* I Dimension */ +) +{ + opus_int i, j, k; + silk_float tmp, nrg = 0.0f, regularization; + + /* Safety checks */ + silk_assert( D >= 0 ); + + regularization = REGULARIZATION_FACTOR * ( wXX[ 0 ] + wXX[ D * D - 1 ] ); + for( k = 0; k < MAX_ITERATIONS_RESIDUAL_NRG; k++ ) { + nrg = wxx; + + tmp = 0.0f; + for( i = 0; i < D; i++ ) { + tmp += wXx[ i ] * c[ i ]; + } + nrg -= 2.0f * tmp; + + /* compute c' * wXX * c, assuming wXX is symmetric */ + for( i = 0; i < D; i++ ) { + tmp = 0.0f; + for( j = i + 1; j < D; j++ ) { + tmp += matrix_c_ptr( wXX, i, j, D ) * c[ j ]; + } + nrg += c[ i ] * ( 2.0f * tmp + matrix_c_ptr( wXX, i, i, D ) * c[ i ] ); + } + if( nrg > 0 ) { + break; + } else { + /* Add white noise */ + for( i = 0; i < D; i++ ) { + matrix_c_ptr( wXX, i, i, D ) += regularization; + } + /* Increase noise for next run */ + regularization *= 2.0f; + } + } + if( k == MAX_ITERATIONS_RESIDUAL_NRG ) { + silk_assert( nrg == 0 ); + nrg = 1.0f; + } + + return nrg; +} + +/* Calculates residual energies of input subframes where all subframes have LPC_order */ +/* of preceding samples */ +void silk_residual_energy_FLP( + silk_float nrgs[ MAX_NB_SUBFR ], /* O Residual energy per subframe */ + const silk_float x[], /* I Input signal */ + silk_float a[ 2 ][ MAX_LPC_ORDER ], /* I AR coefs for each frame half */ + const silk_float gains[], /* I Quantization gains */ + const opus_int subfr_length, /* I Subframe length */ + const opus_int nb_subfr, /* I number of subframes */ + const opus_int LPC_order /* I LPC order */ +) +{ + opus_int shift; + silk_float *LPC_res_ptr, LPC_res[ ( MAX_FRAME_LENGTH + MAX_NB_SUBFR * MAX_LPC_ORDER ) / 2 ]; + + LPC_res_ptr = LPC_res + LPC_order; + shift = LPC_order + subfr_length; + + /* Filter input to create the LPC residual for each frame half, and measure subframe energies */ + silk_LPC_analysis_filter_FLP( LPC_res, a[ 0 ], x + 0 * shift, 2 * shift, LPC_order ); + nrgs[ 0 ] = ( silk_float )( gains[ 0 ] * gains[ 0 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) ); + nrgs[ 1 ] = ( silk_float )( gains[ 1 ] * gains[ 1 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) ); + + if( nb_subfr == MAX_NB_SUBFR ) { + silk_LPC_analysis_filter_FLP( LPC_res, a[ 1 ], x + 2 * shift, 2 * shift, LPC_order ); + nrgs[ 2 ] = ( silk_float )( gains[ 2 ] * gains[ 2 ] * silk_energy_FLP( LPC_res_ptr + 0 * shift, subfr_length ) ); + nrgs[ 3 ] = ( silk_float )( gains[ 3 ] * gains[ 3 ] * silk_energy_FLP( LPC_res_ptr + 1 * shift, subfr_length ) ); + } +} diff --git a/drivers/opus/silk/float/scale_copy_vector_FLP.c b/drivers/opus/silk/float/scale_copy_vector_FLP.c new file mode 100644 index 0000000000..7578d44894 --- /dev/null +++ b/drivers/opus/silk/float/scale_copy_vector_FLP.c @@ -0,0 +1,57 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FLP.h" + +/* copy and multiply a vector by a constant */ +void silk_scale_copy_vector_FLP( + silk_float *data_out, + const silk_float *data_in, + silk_float gain, + opus_int dataSize +) +{ + opus_int i, dataSize4; + + /* 4x unrolled loop */ + dataSize4 = dataSize & 0xFFFC; + for( i = 0; i < dataSize4; i += 4 ) { + data_out[ i + 0 ] = gain * data_in[ i + 0 ]; + data_out[ i + 1 ] = gain * data_in[ i + 1 ]; + data_out[ i + 2 ] = gain * data_in[ i + 2 ]; + data_out[ i + 3 ] = gain * data_in[ i + 3 ]; + } + + /* any remaining elements */ + for( ; i < dataSize; i++ ) { + data_out[ i ] = gain * data_in[ i ]; + } +} diff --git a/drivers/opus/silk/float/scale_vector_FLP.c b/drivers/opus/silk/float/scale_vector_FLP.c new file mode 100644 index 0000000000..03345d519d --- /dev/null +++ b/drivers/opus/silk/float/scale_vector_FLP.c @@ -0,0 +1,56 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FLP.h" + +/* multiply a vector by a constant */ +void silk_scale_vector_FLP( + silk_float *data1, + silk_float gain, + opus_int dataSize +) +{ + opus_int i, dataSize4; + + /* 4x unrolled loop */ + dataSize4 = dataSize & 0xFFFC; + for( i = 0; i < dataSize4; i += 4 ) { + data1[ i + 0 ] *= gain; + data1[ i + 1 ] *= gain; + data1[ i + 2 ] *= gain; + data1[ i + 3 ] *= gain; + } + + /* any remaining elements */ + for( ; i < dataSize; i++ ) { + data1[ i ] *= gain; + } +} diff --git a/drivers/opus/silk/float/schur_FLP.c b/drivers/opus/silk/float/schur_FLP.c new file mode 100644 index 0000000000..76b87f1304 --- /dev/null +++ b/drivers/opus/silk/float/schur_FLP.c @@ -0,0 +1,70 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FLP.h" + +silk_float silk_schur_FLP( /* O returns residual energy */ + silk_float refl_coef[], /* O reflection coefficients (length order) */ + const silk_float auto_corr[], /* I autocorrelation sequence (length order+1) */ + opus_int order /* I order */ +) +{ + opus_int k, n; + silk_float C[ SILK_MAX_ORDER_LPC + 1 ][ 2 ]; + silk_float Ctmp1, Ctmp2, rc_tmp; + + silk_assert( order==6||order==8||order==10||order==12||order==14||order==16 ); + + /* Copy correlations */ + for( k = 0; k < order+1; k++ ) { + C[ k ][ 0 ] = C[ k ][ 1 ] = auto_corr[ k ]; + } + + for( k = 0; k < order; k++ ) { + /* Get reflection coefficient */ + rc_tmp = -C[ k + 1 ][ 0 ] / silk_max_float( C[ 0 ][ 1 ], 1e-9f ); + + /* Save the output */ + refl_coef[ k ] = rc_tmp; + + /* Update correlations */ + for( n = 0; n < order - k; n++ ) { + Ctmp1 = C[ n + k + 1 ][ 0 ]; + Ctmp2 = C[ n ][ 1 ]; + C[ n + k + 1 ][ 0 ] = Ctmp1 + Ctmp2 * rc_tmp; + C[ n ][ 1 ] = Ctmp2 + Ctmp1 * rc_tmp; + } + } + + /* Return residual energy */ + return C[ 0 ][ 1 ]; +} + diff --git a/drivers/opus/silk/float/solve_LS_FLP.c b/drivers/opus/silk/float/solve_LS_FLP.c new file mode 100644 index 0000000000..9fd962b33d --- /dev/null +++ b/drivers/opus/silk/float/solve_LS_FLP.c @@ -0,0 +1,207 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" +#include "tuning_parameters.h" + +/********************************************************************** + * LDL Factorisation. Finds the upper triangular matrix L and the diagonal + * Matrix D (only the diagonal elements returned in a vector)such that + * the symmetric matric A is given by A = L*D*L'. + **********************************************************************/ +static OPUS_INLINE void silk_LDL_FLP( + silk_float *A, /* I/O Pointer to Symetric Square Matrix */ + opus_int M, /* I Size of Matrix */ + silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */ + silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */ +); + +/********************************************************************** + * Function to solve linear equation Ax = b, when A is a MxM lower + * triangular matrix, with ones on the diagonal. + **********************************************************************/ +static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP( + const silk_float *L, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const silk_float *b, /* I b Vector */ + silk_float *x /* O x Vector */ +); + +/********************************************************************** + * Function to solve linear equation (A^T)x = b, when A is a MxM lower + * triangular, with ones on the diagonal. (ie then A^T is upper triangular) + **********************************************************************/ +static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( + const silk_float *L, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const silk_float *b, /* I b Vector */ + silk_float *x /* O x Vector */ +); + +/********************************************************************** + * Function to solve linear equation Ax = b, when A is a MxM + * symmetric square matrix - using LDL factorisation + **********************************************************************/ +void silk_solve_LDL_FLP( + silk_float *A, /* I/O Symmetric square matrix, out: reg. */ + const opus_int M, /* I Size of matrix */ + const silk_float *b, /* I Pointer to b vector */ + silk_float *x /* O Pointer to x solution vector */ +) +{ + opus_int i; + silk_float L[ MAX_MATRIX_SIZE ][ MAX_MATRIX_SIZE ]; + silk_float T[ MAX_MATRIX_SIZE ]; + silk_float Dinv[ MAX_MATRIX_SIZE ]; /* inverse diagonal elements of D*/ + + silk_assert( M <= MAX_MATRIX_SIZE ); + + /*************************************************** + Factorize A by LDL such that A = L*D*(L^T), + where L is lower triangular with ones on diagonal + ****************************************************/ + silk_LDL_FLP( A, M, &L[ 0 ][ 0 ], Dinv ); + + /**************************************************** + * substitute D*(L^T) = T. ie: + L*D*(L^T)*x = b => L*T = b <=> T = inv(L)*b + ******************************************************/ + silk_SolveWithLowerTriangularWdiagOnes_FLP( &L[ 0 ][ 0 ], M, b, T ); + + /**************************************************** + D*(L^T)*x = T <=> (L^T)*x = inv(D)*T, because D is + diagonal just multiply with 1/d_i + ****************************************************/ + for( i = 0; i < M; i++ ) { + T[ i ] = T[ i ] * Dinv[ i ]; + } + /**************************************************** + x = inv(L') * inv(D) * T + *****************************************************/ + silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( &L[ 0 ][ 0 ], M, T, x ); +} + +static OPUS_INLINE void silk_SolveWithUpperTriangularFromLowerWdiagOnes_FLP( + const silk_float *L, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const silk_float *b, /* I b Vector */ + silk_float *x /* O x Vector */ +) +{ + opus_int i, j; + silk_float temp; + const silk_float *ptr1; + + for( i = M - 1; i >= 0; i-- ) { + ptr1 = matrix_adr( L, 0, i, M ); + temp = 0; + for( j = M - 1; j > i ; j-- ) { + temp += ptr1[ j * M ] * x[ j ]; + } + temp = b[ i ] - temp; + x[ i ] = temp; + } +} + +static OPUS_INLINE void silk_SolveWithLowerTriangularWdiagOnes_FLP( + const silk_float *L, /* I Pointer to Lower Triangular Matrix */ + opus_int M, /* I Dim of Matrix equation */ + const silk_float *b, /* I b Vector */ + silk_float *x /* O x Vector */ +) +{ + opus_int i, j; + silk_float temp; + const silk_float *ptr1; + + for( i = 0; i < M; i++ ) { + ptr1 = matrix_adr( L, i, 0, M ); + temp = 0; + for( j = 0; j < i; j++ ) { + temp += ptr1[ j ] * x[ j ]; + } + temp = b[ i ] - temp; + x[ i ] = temp; + } +} + +static OPUS_INLINE void silk_LDL_FLP( + silk_float *A, /* I/O Pointer to Symetric Square Matrix */ + opus_int M, /* I Size of Matrix */ + silk_float *L, /* I/O Pointer to Square Upper triangular Matrix */ + silk_float *Dinv /* I/O Pointer to vector holding the inverse diagonal elements of D */ +) +{ + opus_int i, j, k, loop_count, err = 1; + silk_float *ptr1, *ptr2; + double temp, diag_min_value; + silk_float v[ MAX_MATRIX_SIZE ], D[ MAX_MATRIX_SIZE ]; /* temp arrays*/ + + silk_assert( M <= MAX_MATRIX_SIZE ); + + diag_min_value = FIND_LTP_COND_FAC * 0.5f * ( A[ 0 ] + A[ M * M - 1 ] ); + for( loop_count = 0; loop_count < M && err == 1; loop_count++ ) { + err = 0; + for( j = 0; j < M; j++ ) { + ptr1 = matrix_adr( L, j, 0, M ); + temp = matrix_ptr( A, j, j, M ); /* element in row j column j*/ + for( i = 0; i < j; i++ ) { + v[ i ] = ptr1[ i ] * D[ i ]; + temp -= ptr1[ i ] * v[ i ]; + } + if( temp < diag_min_value ) { + /* Badly conditioned matrix: add white noise and run again */ + temp = ( loop_count + 1 ) * diag_min_value - temp; + for( i = 0; i < M; i++ ) { + matrix_ptr( A, i, i, M ) += ( silk_float )temp; + } + err = 1; + break; + } + D[ j ] = ( silk_float )temp; + Dinv[ j ] = ( silk_float )( 1.0f / temp ); + matrix_ptr( L, j, j, M ) = 1.0f; + + ptr1 = matrix_adr( A, j, 0, M ); + ptr2 = matrix_adr( L, j + 1, 0, M); + for( i = j + 1; i < M; i++ ) { + temp = 0.0; + for( k = 0; k < j; k++ ) { + temp += ptr2[ k ] * v[ k ]; + } + matrix_ptr( L, i, j, M ) = ( silk_float )( ( ptr1[ i ] - temp ) * Dinv[ j ] ); + ptr2 += M; /* go to next column*/ + } + } + } + silk_assert( err == 0 ); +} + diff --git a/drivers/opus/silk/float/sort_FLP.c b/drivers/opus/silk/float/sort_FLP.c new file mode 100644 index 0000000000..58ea485116 --- /dev/null +++ b/drivers/opus/silk/float/sort_FLP.c @@ -0,0 +1,83 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/* Insertion sort (fast for already almost sorted arrays): */ +/* Best case: O(n) for an already sorted array */ +/* Worst case: O(n^2) for an inversely sorted array */ + +#include "typedef.h" +#include "SigProc_FLP.h" + +void silk_insertion_sort_decreasing_FLP( + silk_float *a, /* I/O Unsorted / Sorted vector */ + opus_int *idx, /* O Index vector for the sorted elements */ + const opus_int L, /* I Vector length */ + const opus_int K /* I Number of correctly sorted positions */ +) +{ + silk_float value; + opus_int i, j; + + /* Safety checks */ + silk_assert( K > 0 ); + silk_assert( L > 0 ); + silk_assert( L >= K ); + + /* Write start indices in index vector */ + for( i = 0; i < K; i++ ) { + idx[ i ] = i; + } + + /* Sort vector elements by value, decreasing order */ + for( i = 1; i < K; i++ ) { + value = a[ i ]; + for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { + a[ j + 1 ] = a[ j ]; /* Shift value */ + idx[ j + 1 ] = idx[ j ]; /* Shift index */ + } + a[ j + 1 ] = value; /* Write value */ + idx[ j + 1 ] = i; /* Write index */ + } + + /* If less than L values are asked check the remaining values, */ + /* but only spend CPU to ensure that the K first values are correct */ + for( i = K; i < L; i++ ) { + value = a[ i ]; + if( value > a[ K - 1 ] ) { + for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { + a[ j + 1 ] = a[ j ]; /* Shift value */ + idx[ j + 1 ] = idx[ j ]; /* Shift index */ + } + a[ j + 1 ] = value; /* Write value */ + idx[ j + 1 ] = i; /* Write index */ + } + } +} diff --git a/drivers/opus/silk/float/structs_FLP.h b/drivers/opus/silk/float/structs_FLP.h new file mode 100644 index 0000000000..4082914d93 --- /dev/null +++ b/drivers/opus/silk/float/structs_FLP.h @@ -0,0 +1,131 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_STRUCTS_FLP_H +#define SILK_STRUCTS_FLP_H + +#include "typedef.h" +#include "silk_main.h" +#include "structs.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/********************************/ +/* Noise shaping analysis state */ +/********************************/ +typedef struct { + opus_int8 LastGainIndex; + silk_float HarmBoost_smth; + silk_float HarmShapeGain_smth; + silk_float Tilt_smth; +} silk_shape_state_FLP; + +/********************************/ +/* Prefilter state */ +/********************************/ +typedef struct { + silk_float sLTP_shp[ LTP_BUF_LENGTH ]; + silk_float sAR_shp[ MAX_SHAPE_LPC_ORDER + 1 ]; + opus_int sLTP_shp_buf_idx; + silk_float sLF_AR_shp; + silk_float sLF_MA_shp; + silk_float sHarmHP; + opus_int32 rand_seed; + opus_int lagPrev; +} silk_prefilter_state_FLP; + +/********************************/ +/* Encoder state FLP */ +/********************************/ +typedef struct { + silk_encoder_state sCmn; /* Common struct, shared with fixed-point code */ + silk_shape_state_FLP sShape; /* Noise shaping state */ + silk_prefilter_state_FLP sPrefilt; /* Prefilter State */ + + /* Buffer for find pitch and noise shape analysis */ + silk_float x_buf[ 2 * MAX_FRAME_LENGTH + LA_SHAPE_MAX ];/* Buffer for find pitch and noise shape analysis */ + silk_float LTPCorr; /* Normalized correlation from pitch lag estimator */ +} silk_encoder_state_FLP; + +/************************/ +/* Encoder control FLP */ +/************************/ +typedef struct { + /* Prediction and coding parameters */ + silk_float Gains[ MAX_NB_SUBFR ]; + silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ]; /* holds interpolated and final coefficients */ + silk_float LTPCoef[LTP_ORDER * MAX_NB_SUBFR]; + silk_float LTP_scale; + opus_int pitchL[ MAX_NB_SUBFR ]; + + /* Noise shaping parameters */ + silk_float AR1[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + silk_float AR2[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + silk_float LF_MA_shp[ MAX_NB_SUBFR ]; + silk_float LF_AR_shp[ MAX_NB_SUBFR ]; + silk_float GainsPre[ MAX_NB_SUBFR ]; + silk_float HarmBoost[ MAX_NB_SUBFR ]; + silk_float Tilt[ MAX_NB_SUBFR ]; + silk_float HarmShapeGain[ MAX_NB_SUBFR ]; + silk_float Lambda; + silk_float input_quality; + silk_float coding_quality; + + /* Measures */ + silk_float sparseness; + silk_float predGain; + silk_float LTPredCodGain; + silk_float ResNrg[ MAX_NB_SUBFR ]; /* Residual energy per subframe */ + + /* Parameters for CBR mode */ + opus_int32 GainsUnq_Q16[ MAX_NB_SUBFR ]; + opus_int8 lastGainIndexPrev; +} silk_encoder_control_FLP; + +/************************/ +/* Encoder Super Struct */ +/************************/ +typedef struct { + silk_encoder_state_FLP state_Fxx[ ENCODER_NUM_CHANNELS ]; + stereo_enc_state sStereo; + opus_int32 nBitsExceeded; + opus_int nChannelsAPI; + opus_int nChannelsInternal; + opus_int nPrevChannelsInternal; + opus_int timeSinceSwitchAllowed_ms; + opus_int allowBandwidthSwitch; + opus_int prev_decode_only_middle; +} silk_encoder; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/opus/silk/float/warped_autocorrelation_FLP.c b/drivers/opus/silk/float/warped_autocorrelation_FLP.c new file mode 100644 index 0000000000..6075dfe8d3 --- /dev/null +++ b/drivers/opus/silk/float/warped_autocorrelation_FLP.c @@ -0,0 +1,73 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" + +/* Autocorrelations for a warped frequency axis */ +void silk_warped_autocorrelation_FLP( + silk_float *corr, /* O Result [order + 1] */ + const silk_float *input, /* I Input data to correlate */ + const silk_float warping, /* I Warping coefficient */ + const opus_int length, /* I Length of input */ + const opus_int order /* I Correlation order (even) */ +) +{ + opus_int n, i; + double tmp1, tmp2; + double state[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; + double C[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; + + /* Order must be even */ + silk_assert( ( order & 1 ) == 0 ); + + /* Loop over samples */ + for( n = 0; n < length; n++ ) { + tmp1 = input[ n ]; + /* Loop over allpass sections */ + for( i = 0; i < order; i += 2 ) { + /* Output of allpass section */ + tmp2 = state[ i ] + warping * ( state[ i + 1 ] - tmp1 ); + state[ i ] = tmp1; + C[ i ] += state[ 0 ] * tmp1; + /* Output of allpass section */ + tmp1 = state[ i + 1 ] + warping * ( state[ i + 2 ] - tmp2 ); + state[ i + 1 ] = tmp2; + C[ i + 1 ] += state[ 0 ] * tmp2; + } + state[ order ] = tmp1; + C[ order ] += state[ 0 ] * tmp1; + } + + /* Copy correlations in silk_float output format */ + for( i = 0; i < order + 1; i++ ) { + corr[ i ] = ( silk_float )C[ i ]; + } +} diff --git a/drivers/opus/silk/float/wrappers_FLP.c b/drivers/opus/silk/float/wrappers_FLP.c new file mode 100644 index 0000000000..c4e34e5578 --- /dev/null +++ b/drivers/opus/silk/float/wrappers_FLP.c @@ -0,0 +1,201 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "main_FLP.h" + +/* Wrappers. Calls flp / fix code */ + +/* Convert AR filter coefficients to NLSF parameters */ +void silk_A2NLSF_FLP( + opus_int16 *NLSF_Q15, /* O NLSF vector [ LPC_order ] */ + const silk_float *pAR, /* I LPC coefficients [ LPC_order ] */ + const opus_int LPC_order /* I LPC order */ +) +{ + opus_int i; + opus_int32 a_fix_Q16[ MAX_LPC_ORDER ]; + + for( i = 0; i < LPC_order; i++ ) { + a_fix_Q16[ i ] = silk_float2int( pAR[ i ] * 65536.0f ); + } + + silk_A2NLSF( NLSF_Q15, a_fix_Q16, LPC_order ); +} + +/* Convert LSF parameters to AR prediction filter coefficients */ +void silk_NLSF2A_FLP( + silk_float *pAR, /* O LPC coefficients [ LPC_order ] */ + const opus_int16 *NLSF_Q15, /* I NLSF vector [ LPC_order ] */ + const opus_int LPC_order /* I LPC order */ +) +{ + opus_int i; + opus_int16 a_fix_Q12[ MAX_LPC_ORDER ]; + + silk_NLSF2A( a_fix_Q12, NLSF_Q15, LPC_order ); + + for( i = 0; i < LPC_order; i++ ) { + pAR[ i ] = ( silk_float )a_fix_Q12[ i ] * ( 1.0f / 4096.0f ); + } +} + +/******************************************/ +/* Floating-point NLSF processing wrapper */ +/******************************************/ +void silk_process_NLSFs_FLP( + silk_encoder_state *psEncC, /* I/O Encoder state */ + silk_float PredCoef[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ + opus_int16 NLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ + const opus_int16 prev_NLSF_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ +) +{ + opus_int i, j; + opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; + + silk_process_NLSFs( psEncC, PredCoef_Q12, NLSF_Q15, prev_NLSF_Q15); + + for( j = 0; j < 2; j++ ) { + for( i = 0; i < psEncC->predictLPCOrder; i++ ) { + PredCoef[ j ][ i ] = ( silk_float )PredCoef_Q12[ j ][ i ] * ( 1.0f / 4096.0f ); + } + } +} + +/****************************************/ +/* Floating-point Silk NSQ wrapper */ +/****************************************/ +void silk_NSQ_wrapper_FLP( + silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ + silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ + SideInfoIndices *psIndices, /* I/O Quantization indices */ + silk_nsq_state *psNSQ, /* I/O Noise Shaping Quantzation state */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const silk_float x[] /* I Prefiltered input signal */ +) +{ + opus_int i, j; + opus_int32 x_Q3[ MAX_FRAME_LENGTH ]; + opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; + silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; + opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ]; + opus_int LTP_scale_Q14; + + /* Noise shaping parameters */ + opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ]; + opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ]; /* Packs two int16 coefficients per int32 value */ + opus_int Lambda_Q10; + opus_int Tilt_Q14[ MAX_NB_SUBFR ]; + opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ]; + + /* Convert control struct to fix control struct */ + /* Noise shape parameters */ + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + for( j = 0; j < psEnc->sCmn.shapingLPCOrder; j++ ) { + AR2_Q13[ i * MAX_SHAPE_LPC_ORDER + j ] = silk_float2int( psEncCtrl->AR2[ i * MAX_SHAPE_LPC_ORDER + j ] * 8192.0f ); + } + } + + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + LF_shp_Q14[ i ] = silk_LSHIFT32( silk_float2int( psEncCtrl->LF_AR_shp[ i ] * 16384.0f ), 16 ) | + (opus_uint16)silk_float2int( psEncCtrl->LF_MA_shp[ i ] * 16384.0f ); + Tilt_Q14[ i ] = (opus_int)silk_float2int( psEncCtrl->Tilt[ i ] * 16384.0f ); + HarmShapeGain_Q14[ i ] = (opus_int)silk_float2int( psEncCtrl->HarmShapeGain[ i ] * 16384.0f ); + } + Lambda_Q10 = ( opus_int )silk_float2int( psEncCtrl->Lambda * 1024.0f ); + + /* prediction and coding parameters */ + for( i = 0; i < psEnc->sCmn.nb_subfr * LTP_ORDER; i++ ) { + LTPCoef_Q14[ i ] = (opus_int16)silk_float2int( psEncCtrl->LTPCoef[ i ] * 16384.0f ); + } + + for( j = 0; j < 2; j++ ) { + for( i = 0; i < psEnc->sCmn.predictLPCOrder; i++ ) { + PredCoef_Q12[ j ][ i ] = (opus_int16)silk_float2int( psEncCtrl->PredCoef[ j ][ i ] * 4096.0f ); + } + } + + for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { + Gains_Q16[ i ] = silk_float2int( psEncCtrl->Gains[ i ] * 65536.0f ); + silk_assert( Gains_Q16[ i ] > 0 ); + } + + if( psIndices->signalType == TYPE_VOICED ) { + LTP_scale_Q14 = silk_LTPScales_table_Q14[ psIndices->LTP_scaleIndex ]; + } else { + LTP_scale_Q14 = 0; + } + + /* Convert input to fix */ + for( i = 0; i < psEnc->sCmn.frame_length; i++ ) { + x_Q3[ i ] = silk_float2int( 8.0f * x[ i ] ); + } + + /* Call NSQ */ + if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { + silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, + AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 ); + } else { + silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, + AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 ); + } +} + +/***********************************************/ +/* Floating-point Silk LTP quantiation wrapper */ +/***********************************************/ +void silk_quant_LTP_gains_FLP( + silk_float B[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (Un-)quantized LTP gains */ + opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook index */ + opus_int8 *periodicity_index, /* O Periodicity index */ + opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ + const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ + const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ + const opus_int lowComplexity, /* I Flag for low complexity */ + const opus_int nb_subfr /* I number of subframes */ +) +{ + opus_int i; + opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ]; + opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ]; + + for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { + B_Q14[ i ] = (opus_int16)silk_float2int( B[ i ] * 16384.0f ); + } + for( i = 0; i < nb_subfr * LTP_ORDER * LTP_ORDER; i++ ) { + W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f ); + } + + silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr ); + + for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { + B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f ); + } +} diff --git a/drivers/opus/silk/gain_quant.c b/drivers/opus/silk/gain_quant.c new file mode 100644 index 0000000000..e9467198eb --- /dev/null +++ b/drivers/opus/silk/gain_quant.c @@ -0,0 +1,141 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +#define OFFSET ( ( MIN_QGAIN_DB * 128 ) / 6 + 16 * 128 ) +#define SCALE_Q16 ( ( 65536 * ( N_LEVELS_QGAIN - 1 ) ) / ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) ) +#define INV_SCALE_Q16 ( ( 65536 * ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) ) / ( N_LEVELS_QGAIN - 1 ) ) + +/* Gain scalar quantization with hysteresis, uniform on log scale */ +void silk_gains_quant( + opus_int8 ind[ MAX_NB_SUBFR ], /* O gain indices */ + opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* I/O gains (quantized out) */ + opus_int8 *prev_ind, /* I/O last index in previous frame */ + const opus_int conditional, /* I first gain is delta coded if 1 */ + const opus_int nb_subfr /* I number of subframes */ +) +{ + opus_int k, double_step_size_threshold; + + for( k = 0; k < nb_subfr; k++ ) { + /* Convert to log scale, scale, floor() */ + ind[ k ] = silk_SMULWB( SCALE_Q16, silk_lin2log( gain_Q16[ k ] ) - OFFSET ); + + /* Round towards previous quantized gain (hysteresis) */ + if( ind[ k ] < *prev_ind ) { + ind[ k ]++; + } + ind[ k ] = silk_LIMIT_int( ind[ k ], 0, N_LEVELS_QGAIN - 1 ); + + /* Compute delta indices and limit */ + if( k == 0 && conditional == 0 ) { + /* Full index */ + ind[ k ] = silk_LIMIT_int( ind[ k ], *prev_ind + MIN_DELTA_GAIN_QUANT, N_LEVELS_QGAIN - 1 ); + *prev_ind = ind[ k ]; + } else { + /* Delta index */ + ind[ k ] = ind[ k ] - *prev_ind; + + /* Double the quantization step size for large gain increases, so that the max gain level can be reached */ + double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind; + if( ind[ k ] > double_step_size_threshold ) { + ind[ k ] = double_step_size_threshold + silk_RSHIFT( ind[ k ] - double_step_size_threshold + 1, 1 ); + } + + ind[ k ] = silk_LIMIT_int( ind[ k ], MIN_DELTA_GAIN_QUANT, MAX_DELTA_GAIN_QUANT ); + + /* Accumulate deltas */ + if( ind[ k ] > double_step_size_threshold ) { + *prev_ind += silk_LSHIFT( ind[ k ], 1 ) - double_step_size_threshold; + } else { + *prev_ind += ind[ k ]; + } + + /* Shift to make non-negative */ + ind[ k ] -= MIN_DELTA_GAIN_QUANT; + } + + /* Scale and convert to linear scale */ + gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */ + } +} + +/* Gains scalar dequantization, uniform on log scale */ +void silk_gains_dequant( + opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* O quantized gains */ + const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ + opus_int8 *prev_ind, /* I/O last index in previous frame */ + const opus_int conditional, /* I first gain is delta coded if 1 */ + const opus_int nb_subfr /* I number of subframes */ +) +{ + opus_int k, ind_tmp, double_step_size_threshold; + + for( k = 0; k < nb_subfr; k++ ) { + if( k == 0 && conditional == 0 ) { + /* Gain index is not allowed to go down more than 16 steps (~21.8 dB) */ + *prev_ind = silk_max_int( ind[ k ], *prev_ind - 16 ); + } else { + /* Delta index */ + ind_tmp = ind[ k ] + MIN_DELTA_GAIN_QUANT; + + /* Accumulate deltas */ + double_step_size_threshold = 2 * MAX_DELTA_GAIN_QUANT - N_LEVELS_QGAIN + *prev_ind; + if( ind_tmp > double_step_size_threshold ) { + *prev_ind += silk_LSHIFT( ind_tmp, 1 ) - double_step_size_threshold; + } else { + *prev_ind += ind_tmp; + } + } + *prev_ind = silk_LIMIT_int( *prev_ind, 0, N_LEVELS_QGAIN - 1 ); + + /* Scale and convert to linear scale */ + gain_Q16[ k ] = silk_log2lin( silk_min_32( silk_SMULWB( INV_SCALE_Q16, *prev_ind ) + OFFSET, 3967 ) ); /* 3967 = 31 in Q7 */ + } +} + +/* Compute unique identifier of gain indices vector */ +opus_int32 silk_gains_ID( /* O returns unique identifier of gains */ + const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ + const opus_int nb_subfr /* I number of subframes */ +) +{ + opus_int k; + opus_int32 gainsID; + + gainsID = 0; + for( k = 0; k < nb_subfr; k++ ) { + gainsID = silk_ADD_LSHIFT32( ind[ k ], gainsID, 8 ); + } + + return gainsID; +} diff --git a/drivers/opus/silk/init_decoder.c b/drivers/opus/silk/init_decoder.c new file mode 100644 index 0000000000..88c1ff7b43 --- /dev/null +++ b/drivers/opus/silk/init_decoder.c @@ -0,0 +1,56 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/************************/ +/* Init Decoder State */ +/************************/ +opus_int silk_init_decoder( + silk_decoder_state *psDec /* I/O Decoder state pointer */ +) +{ + /* Clear the entire encoder state, except anything copied */ + silk_memset( psDec, 0, sizeof( silk_decoder_state ) ); + + /* Used to deactivate LSF interpolation */ + psDec->first_frame_after_reset = 1; + psDec->prev_gain_Q16 = 65536; + + /* Reset CNG state */ + silk_CNG_Reset( psDec ); + + /* Reset PLC state */ + silk_PLC_Reset( psDec ); + + return(0); +} + diff --git a/drivers/opus/silk/init_encoder.c b/drivers/opus/silk/init_encoder.c new file mode 100644 index 0000000000..baf97d49e7 --- /dev/null +++ b/drivers/opus/silk/init_encoder.c @@ -0,0 +1,64 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif +#ifdef OPUS_FIXED_POINT +#include "main_FIX.h" +#else +#include "main_FLP.h" +#endif +#include "tuning_parameters.h" +#include "cpu_support.h" + +/*********************************/ +/* Initialize Silk Encoder state */ +/*********************************/ +opus_int silk_init_encoder( + silk_encoder_state_Fxx *psEnc, /* I/O Pointer to Silk FIX encoder state */ + int arch /* I Run-time architecture */ +) +{ + opus_int ret = 0; + + /* Clear the entire encoder state */ + silk_memset( psEnc, 0, sizeof( silk_encoder_state_Fxx ) ); + + psEnc->sCmn.arch = arch; + + psEnc->sCmn.variable_HP_smth1_Q15 = silk_LSHIFT( silk_lin2log( SILK_FIX_CONST( VARIABLE_HP_MIN_CUTOFF_HZ, 16 ) ) - ( 16 << 7 ), 8 ); + psEnc->sCmn.variable_HP_smth2_Q15 = psEnc->sCmn.variable_HP_smth1_Q15; + + /* Used to deactivate LSF interpolation, pitch prediction */ + psEnc->sCmn.first_frame_after_reset = 1; + + /* Initialize Silk VAD */ + ret += silk_VAD_Init( &psEnc->sCmn.sVAD ); + + return ret; +} diff --git a/drivers/opus/silk/inner_prod_aligned.c b/drivers/opus/silk/inner_prod_aligned.c new file mode 100644 index 0000000000..d625001db7 --- /dev/null +++ b/drivers/opus/silk/inner_prod_aligned.c @@ -0,0 +1,47 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +opus_int32 silk_inner_prod_aligned_scale( + const opus_int16 *const inVec1, /* I input vector 1 */ + const opus_int16 *const inVec2, /* I input vector 2 */ + const opus_int scale, /* I number of bits to shift */ + const opus_int len /* I vector lengths */ +) +{ + opus_int i; + opus_int32 sum = 0; + for( i = 0; i < len; i++ ) { + sum = silk_ADD_RSHIFT32( sum, silk_SMULBB( inVec1[ i ], inVec2[ i ] ), scale ); + } + return sum; +} diff --git a/drivers/opus/silk/interpolate.c b/drivers/opus/silk/interpolate.c new file mode 100644 index 0000000000..d5df0feddb --- /dev/null +++ b/drivers/opus/silk/interpolate.c @@ -0,0 +1,51 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Interpolate two vectors */ +void silk_interpolate( + opus_int16 xi[ MAX_LPC_ORDER ], /* O interpolated vector */ + const opus_int16 x0[ MAX_LPC_ORDER ], /* I first vector */ + const opus_int16 x1[ MAX_LPC_ORDER ], /* I second vector */ + const opus_int ifact_Q2, /* I interp. factor, weight on 2nd vector */ + const opus_int d /* I number of parameters */ +) +{ + opus_int i; + + silk_assert( ifact_Q2 >= 0 ); + silk_assert( ifact_Q2 <= 4 ); + + for( i = 0; i < d; i++ ) { + xi[ i ] = (opus_int16)silk_ADD_RSHIFT( x0[ i ], silk_SMULBB( x1[ i ] - x0[ i ], ifact_Q2 ), 2 ); + } +} diff --git a/drivers/opus/silk/lin2log.c b/drivers/opus/silk/lin2log.c new file mode 100644 index 0000000000..77bfc8c8ab --- /dev/null +++ b/drivers/opus/silk/lin2log.c @@ -0,0 +1,46 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +/* Approximation of 128 * log2() (very close inverse of silk_log2lin()) */ +/* Convert input to a log scale */ +opus_int32 silk_lin2log( + const opus_int32 inLin /* I input in linear scale */ +) +{ + opus_int32 lz, frac_Q7; + + silk_CLZ_FRAC( inLin, &lz, &frac_Q7 ); + + /* Piece-wise parabolic approximation */ + return silk_LSHIFT( 31 - lz, 7 ) + silk_SMLAWB( frac_Q7, silk_MUL( frac_Q7, 128 - frac_Q7 ), 179 ); +} + diff --git a/drivers/opus/silk/log2lin.c b/drivers/opus/silk/log2lin.c new file mode 100644 index 0000000000..0ed2a12efd --- /dev/null +++ b/drivers/opus/silk/log2lin.c @@ -0,0 +1,58 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Approximation of 2^() (very close inverse of silk_lin2log()) */ +/* Convert input to a linear scale */ +opus_int32 silk_log2lin( + const opus_int32 inLog_Q7 /* I input on log scale */ +) +{ + opus_int32 out, frac_Q7; + + if( inLog_Q7 < 0 ) { + return 0; + } else if ( inLog_Q7 >= 3967 ) { + return silk_int32_MAX; + } + + out = silk_LSHIFT( 1, silk_RSHIFT( inLog_Q7, 7 ) ); + frac_Q7 = inLog_Q7 & 0x7F; + if( inLog_Q7 < 2048 ) { + /* Piece-wise parabolic approximation */ + out = silk_ADD_RSHIFT32( out, silk_MUL( out, silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ), 7 ); + } else { + /* Piece-wise parabolic approximation */ + out = silk_MLA( out, silk_RSHIFT( out, 7 ), silk_SMLAWB( frac_Q7, silk_SMULBB( frac_Q7, 128 - frac_Q7 ), -174 ) ); + } + return out; +} diff --git a/drivers/opus/silk/macros.h b/drivers/opus/silk/macros.h new file mode 100644 index 0000000000..6cf2e93dbc --- /dev/null +++ b/drivers/opus/silk/macros.h @@ -0,0 +1,113 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_MACROS_H +#define SILK_MACROS_H + +#include "opus_config.h" + +#include "opus_types.h" +#include "opus_defines.h" + +/* This is an OPUS_INLINE header file for general platform. */ + +/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ +#define silk_SMULWB(a32, b32) ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16)) + +/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ +#define silk_SMLAWB(a32, b32, c32) ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))) + +/* (a32 * (b32 >> 16)) >> 16 */ +#define silk_SMULWT(a32, b32) (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16)) + +/* a32 + (b32 * (c32 >> 16)) >> 16 */ +#define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16)) + +/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */ +#define silk_SMULBB(a32, b32) ((opus_int32)((opus_int16)(a32)) * (opus_int32)((opus_int16)(b32))) + +/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */ +#define silk_SMLABB(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32))) + +/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */ +#define silk_SMULBT(a32, b32) ((opus_int32)((opus_int16)(a32)) * ((b32) >> 16)) + +/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */ +#define silk_SMLABT(a32, b32, c32) ((a32) + ((opus_int32)((opus_int16)(b32))) * ((c32) >> 16)) + +/* a64 + (b32 * c32) */ +#define silk_SMLAL(a64, b32, c32) (silk_ADD64((a64), ((opus_int64)(b32) * (opus_int64)(c32)))) + +/* (a32 * b32) >> 16 */ +#define silk_SMULWW(a32, b32) silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)) + +/* a32 + ((b32 * c32) >> 16) */ +#define silk_SMLAWW(a32, b32, c32) silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16)) + +/* add/subtract with output saturated */ +#define silk_ADD_SAT32(a, b) ((((opus_uint32)(a) + (opus_uint32)(b)) & 0x80000000) == 0 ? \ + ((((a) & (b)) & 0x80000000) != 0 ? silk_int32_MIN : (a)+(b)) : \ + ((((a) | (b)) & 0x80000000) == 0 ? silk_int32_MAX : (a)+(b)) ) + +#define silk_SUB_SAT32(a, b) ((((opus_uint32)(a)-(opus_uint32)(b)) & 0x80000000) == 0 ? \ + (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) : \ + ((((a)^0x80000000) & (b) & 0x80000000) ? silk_int32_MAX : (a)-(b)) ) + +#include "ecintrin.h" + +static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16) +{ + return 32 - EC_ILOG(in16<<16|0x8000); +} + +static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32) +{ + return in32 ? 32 - EC_ILOG(in32) : 32; +} + +/* Row based */ +#define matrix_ptr(Matrix_base_adr, row, column, N) \ + (*((Matrix_base_adr) + ((row)*(N)+(column)))) +#define matrix_adr(Matrix_base_adr, row, column, N) \ + ((Matrix_base_adr) + ((row)*(N)+(column))) + +/* Column based */ +#ifndef matrix_c_ptr +# define matrix_c_ptr(Matrix_base_adr, row, column, M) \ + (*((Matrix_base_adr) + ((row)+(M)*(column)))) +#endif + +#ifdef OPUS_ARM_INLINE_ASM +#include "arm/macros_armv4.h" +#endif + +#ifdef OPUS_ARM_INLINE_EDSP +#include "arm/macros_armv5e.h" +#endif + +#endif /* SILK_MACROS_H */ + diff --git a/drivers/opus/silk/pitch_est_defines.h b/drivers/opus/silk/pitch_est_defines.h new file mode 100644 index 0000000000..e1e4b5d768 --- /dev/null +++ b/drivers/opus/silk/pitch_est_defines.h @@ -0,0 +1,88 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_PE_DEFINES_H +#define SILK_PE_DEFINES_H + +#include "SigProc_FIX.h" + +/********************************************************/ +/* Definitions for pitch estimator */ +/********************************************************/ + +#define PE_MAX_FS_KHZ 16 /* Maximum sampling frequency used */ + +#define PE_MAX_NB_SUBFR 4 +#define PE_SUBFR_LENGTH_MS 5 /* 5 ms */ + +#define PE_LTP_MEM_LENGTH_MS ( 4 * PE_SUBFR_LENGTH_MS ) + +#define PE_MAX_FRAME_LENGTH_MS ( PE_LTP_MEM_LENGTH_MS + PE_MAX_NB_SUBFR * PE_SUBFR_LENGTH_MS ) +#define PE_MAX_FRAME_LENGTH ( PE_MAX_FRAME_LENGTH_MS * PE_MAX_FS_KHZ ) +#define PE_MAX_FRAME_LENGTH_ST_1 ( PE_MAX_FRAME_LENGTH >> 2 ) +#define PE_MAX_FRAME_LENGTH_ST_2 ( PE_MAX_FRAME_LENGTH >> 1 ) + +#define PE_MAX_LAG_MS 18 /* 18 ms -> 56 Hz */ +#define PE_MIN_LAG_MS 2 /* 2 ms -> 500 Hz */ +#define PE_MAX_LAG ( PE_MAX_LAG_MS * PE_MAX_FS_KHZ ) +#define PE_MIN_LAG ( PE_MIN_LAG_MS * PE_MAX_FS_KHZ ) + +#define PE_D_SRCH_LENGTH 24 + +#define PE_NB_STAGE3_LAGS 5 + +#define PE_NB_CBKS_STAGE2 3 +#define PE_NB_CBKS_STAGE2_EXT 11 + +#define PE_NB_CBKS_STAGE3_MAX 34 +#define PE_NB_CBKS_STAGE3_MID 24 +#define PE_NB_CBKS_STAGE3_MIN 16 + +#define PE_NB_CBKS_STAGE3_10MS 12 +#define PE_NB_CBKS_STAGE2_10MS 3 + +#define PE_SHORTLAG_BIAS 0.2f /* for logarithmic weighting */ +#define PE_PREVLAG_BIAS 0.2f /* for logarithmic weighting */ +#define PE_FLATCONTOUR_BIAS 0.05f + +#define SILK_PE_MIN_COMPLEX 0 +#define SILK_PE_MID_COMPLEX 1 +#define SILK_PE_MAX_COMPLEX 2 + +/* Tables for 20 ms frames */ +extern const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ]; +extern const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ]; +extern const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ]; +extern const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ]; + +/* Tables for 10 ms frames */ +extern const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ 3 ]; +extern const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 12 ]; +extern const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ]; + +#endif + diff --git a/drivers/opus/silk/pitch_est_tables.c b/drivers/opus/silk/pitch_est_tables.c new file mode 100644 index 0000000000..97ddbab010 --- /dev/null +++ b/drivers/opus/silk/pitch_est_tables.c @@ -0,0 +1,99 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "typedef.h" +#include "pitch_est_defines.h" + +const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ PE_NB_CBKS_STAGE2_10MS ] = +{ + {0, 1, 0}, + {0, 0, 1} +}; + +const opus_int8 silk_CB_lags_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ PE_NB_CBKS_STAGE3_10MS ] = +{ + { 0, 0, 1,-1, 1,-1, 2,-2, 2,-2, 3,-3}, + { 0, 1, 0, 1,-1, 2,-1, 2,-2, 3,-2, 3} +}; + +const opus_int8 silk_Lag_range_stage3_10_ms[ PE_MAX_NB_SUBFR >> 1 ][ 2 ] = +{ + {-3, 7}, + {-2, 7} +}; + +const opus_int8 silk_CB_lags_stage2[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE2_EXT ] = +{ + {0, 2,-1,-1,-1, 0, 0, 1, 1, 0, 1}, + {0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, + {0,-1, 2, 1, 0, 1, 1, 0, 0,-1,-1} +}; + +const opus_int8 silk_CB_lags_stage3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ] = +{ + {0, 0, 1,-1, 0, 1,-1, 0,-1, 1,-2, 2,-2,-2, 2,-3, 2, 3,-3,-4, 3,-4, 4, 4,-5, 5,-6,-5, 6,-7, 6, 5, 8,-9}, + {0, 0, 1, 0, 0, 0, 0, 0, 0, 0,-1, 1, 0, 0, 1,-1, 0, 1,-1,-1, 1,-1, 2, 1,-1, 2,-2,-2, 2,-2, 2, 2, 3,-3}, + {0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1,-1, 1, 0, 0, 2, 1,-1, 2,-1,-1, 2,-1, 2, 2,-1, 3,-2,-2,-2, 3}, + {0, 1, 0, 0, 1, 0, 1,-1, 2,-1, 2,-1, 2, 3,-2, 3,-2,-2, 4, 4,-3, 5,-3,-4, 6,-4, 6, 5,-5, 8,-6,-5,-7, 9} +}; + +const opus_int8 silk_Lag_range_stage3[ SILK_PE_MAX_COMPLEX + 1 ] [ PE_MAX_NB_SUBFR ][ 2 ] = +{ + /* Lags to search for low number of stage3 cbks */ + { + {-5,8}, + {-1,6}, + {-1,6}, + {-4,10} + }, + /* Lags to search for middle number of stage3 cbks */ + { + {-6,10}, + {-2,6}, + {-1,6}, + {-5,10} + }, + /* Lags to search for max number of stage3 cbks */ + { + {-9,12}, + {-3,7}, + {-2,7}, + {-7,13} + } +}; + +const opus_int8 silk_nb_cbk_searchs_stage3[ SILK_PE_MAX_COMPLEX + 1 ] = +{ + PE_NB_CBKS_STAGE3_MIN, + PE_NB_CBKS_STAGE3_MID, + PE_NB_CBKS_STAGE3_MAX +}; diff --git a/drivers/opus/silk/process_NLSFs.c b/drivers/opus/silk/process_NLSFs.c new file mode 100644 index 0000000000..0193fda1f1 --- /dev/null +++ b/drivers/opus/silk/process_NLSFs.c @@ -0,0 +1,105 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Limit, stabilize, convert and quantize NLSFs */ +void silk_process_NLSFs( + silk_encoder_state *psEncC, /* I/O Encoder state */ + opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ + opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ + const opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ +) +{ + opus_int i, doInterpolate; + opus_int NLSF_mu_Q20; + opus_int32 i_sqr_Q15; + opus_int16 pNLSF0_temp_Q15[ MAX_LPC_ORDER ]; + opus_int16 pNLSFW_QW[ MAX_LPC_ORDER ]; + opus_int16 pNLSFW0_temp_QW[ MAX_LPC_ORDER ]; + + silk_assert( psEncC->speech_activity_Q8 >= 0 ); + silk_assert( psEncC->speech_activity_Q8 <= SILK_FIX_CONST( 1.0, 8 ) ); + silk_assert( psEncC->useInterpolatedNLSFs == 1 || psEncC->indices.NLSFInterpCoef_Q2 == ( 1 << 2 ) ); + + /***********************/ + /* Calculate mu values */ + /***********************/ + /* NLSF_mu = 0.003 - 0.0015 * psEnc->speech_activity; */ + NLSF_mu_Q20 = silk_SMLAWB( SILK_FIX_CONST( 0.003, 20 ), SILK_FIX_CONST( -0.001, 28 ), psEncC->speech_activity_Q8 ); + if( psEncC->nb_subfr == 2 ) { + /* Multiply by 1.5 for 10 ms packets */ + NLSF_mu_Q20 = silk_ADD_RSHIFT( NLSF_mu_Q20, NLSF_mu_Q20, 1 ); + } + + silk_assert( NLSF_mu_Q20 > 0 ); + silk_assert( NLSF_mu_Q20 <= SILK_FIX_CONST( 0.005, 20 ) ); + + /* Calculate NLSF weights */ + silk_NLSF_VQ_weights_laroia( pNLSFW_QW, pNLSF_Q15, psEncC->predictLPCOrder ); + + /* Update NLSF weights for interpolated NLSFs */ + doInterpolate = ( psEncC->useInterpolatedNLSFs == 1 ) && ( psEncC->indices.NLSFInterpCoef_Q2 < 4 ); + if( doInterpolate ) { + /* Calculate the interpolated NLSF vector for the first half */ + silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15, + psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder ); + + /* Calculate first half NLSF weights for the interpolated NLSFs */ + silk_NLSF_VQ_weights_laroia( pNLSFW0_temp_QW, pNLSF0_temp_Q15, psEncC->predictLPCOrder ); + + /* Update NLSF weights with contribution from first half */ + i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 ); + for( i = 0; i < psEncC->predictLPCOrder; i++ ) { + pNLSFW_QW[ i ] = silk_SMLAWB( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), (opus_int32)pNLSFW0_temp_QW[ i ], i_sqr_Q15 ); + silk_assert( pNLSFW_QW[ i ] >= 1 ); + } + } + + silk_NLSF_encode( psEncC->indices.NLSFIndices, pNLSF_Q15, psEncC->psNLSF_CB, pNLSFW_QW, + NLSF_mu_Q20, psEncC->NLSF_MSVQ_Survivors, psEncC->indices.signalType ); + + /* Convert quantized NLSFs back to LPC coefficients */ + silk_NLSF2A( PredCoef_Q12[ 1 ], pNLSF_Q15, psEncC->predictLPCOrder ); + + if( doInterpolate ) { + /* Calculate the interpolated, quantized LSF vector for the first half */ + silk_interpolate( pNLSF0_temp_Q15, prev_NLSFq_Q15, pNLSF_Q15, + psEncC->indices.NLSFInterpCoef_Q2, psEncC->predictLPCOrder ); + + /* Convert back to LPC coefficients */ + silk_NLSF2A( PredCoef_Q12[ 0 ], pNLSF0_temp_Q15, psEncC->predictLPCOrder ); + + } else { + /* Copy LPC coefficients for first half from second half */ + silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) ); + } +} diff --git a/drivers/opus/silk/quant_LTP_gains.c b/drivers/opus/silk/quant_LTP_gains.c new file mode 100644 index 0000000000..34bcd3acdb --- /dev/null +++ b/drivers/opus/silk/quant_LTP_gains.c @@ -0,0 +1,128 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "tuning_parameters.h" + +void silk_quant_LTP_gains( + opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ + opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ + opus_int8 *periodicity_index, /* O Periodicity Index */ + opus_int32 *sum_log_gain_Q7, /* I/O Cumulative max prediction gain */ + const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ + opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ + opus_int lowComplexity, /* I Flag for low complexity */ + const opus_int nb_subfr /* I number of subframes */ +) +{ + opus_int j, k, cbk_size; + opus_int8 temp_idx[ MAX_NB_SUBFR ]; + const opus_uint8 *cl_ptr_Q5; + const opus_int8 *cbk_ptr_Q7; + const opus_uint8 *cbk_gain_ptr_Q7; + const opus_int16 *b_Q14_ptr; + const opus_int32 *W_Q18_ptr; + opus_int32 rate_dist_Q14_subfr, rate_dist_Q14, min_rate_dist_Q14; + opus_int32 sum_log_gain_tmp_Q7, best_sum_log_gain_Q7, max_gain_Q7, gain_Q7; + + /***************************************************/ + /* iterate over different codebooks with different */ + /* rates/distortions, and choose best */ + /***************************************************/ + min_rate_dist_Q14 = silk_int32_MAX; + best_sum_log_gain_Q7 = 0; + for( k = 0; k < 3; k++ ) { + /* Safety margin for pitch gain control, to take into account factors + such as state rescaling/rewhitening. */ + opus_int32 gain_safety = SILK_FIX_CONST( 0.4, 7 ); + + cl_ptr_Q5 = silk_LTP_gain_BITS_Q5_ptrs[ k ]; + cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ k ]; + cbk_gain_ptr_Q7 = silk_LTP_vq_gain_ptrs_Q7[ k ]; + cbk_size = silk_LTP_vq_sizes[ k ]; + + /* Set up pointer to first subframe */ + W_Q18_ptr = W_Q18; + b_Q14_ptr = B_Q14; + + rate_dist_Q14 = 0; + sum_log_gain_tmp_Q7 = *sum_log_gain_Q7; + for( j = 0; j < nb_subfr; j++ ) { + max_gain_Q7 = silk_log2lin( ( SILK_FIX_CONST( MAX_SUM_LOG_GAIN_DB / 6.0, 7 ) - sum_log_gain_tmp_Q7 ) + + SILK_FIX_CONST( 7, 7 ) ) - gain_safety; + + silk_VQ_WMat_EC( + &temp_idx[ j ], /* O index of best codebook vector */ + &rate_dist_Q14_subfr, /* O best weighted quantization error + mu * rate */ + &gain_Q7, /* O sum of absolute LTP coefficients */ + b_Q14_ptr, /* I input vector to be quantized */ + W_Q18_ptr, /* I weighting matrix */ + cbk_ptr_Q7, /* I codebook */ + cbk_gain_ptr_Q7, /* I codebook effective gains */ + cl_ptr_Q5, /* I code length for each codebook vector */ + mu_Q9, /* I tradeoff between weighted error and rate */ + max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ + cbk_size /* I number of vectors in codebook */ + ); + + rate_dist_Q14 = silk_ADD_POS_SAT32( rate_dist_Q14, rate_dist_Q14_subfr ); + sum_log_gain_tmp_Q7 = silk_max(0, sum_log_gain_tmp_Q7 + + silk_lin2log( gain_safety + gain_Q7 ) - SILK_FIX_CONST( 7, 7 )); + + b_Q14_ptr += LTP_ORDER; + W_Q18_ptr += LTP_ORDER * LTP_ORDER; + } + + /* Avoid never finding a codebook */ + rate_dist_Q14 = silk_min( silk_int32_MAX - 1, rate_dist_Q14 ); + + if( rate_dist_Q14 < min_rate_dist_Q14 ) { + min_rate_dist_Q14 = rate_dist_Q14; + *periodicity_index = (opus_int8)k; + silk_memcpy( cbk_index, temp_idx, nb_subfr * sizeof( opus_int8 ) ); + best_sum_log_gain_Q7 = sum_log_gain_tmp_Q7; + } + + /* Break early in low-complexity mode if rate distortion is below threshold */ + if( lowComplexity && ( rate_dist_Q14 < silk_LTP_gain_middle_avg_RD_Q14 ) ) { + break; + } + } + + cbk_ptr_Q7 = silk_LTP_vq_ptrs_Q7[ *periodicity_index ]; + for( j = 0; j < nb_subfr; j++ ) { + for( k = 0; k < LTP_ORDER; k++ ) { + B_Q14[ j * LTP_ORDER + k ] = silk_LSHIFT( cbk_ptr_Q7[ cbk_index[ j ] * LTP_ORDER + k ], 7 ); + } + } + *sum_log_gain_Q7 = best_sum_log_gain_Q7; +} + diff --git a/drivers/opus/silk/resampler.c b/drivers/opus/silk/resampler.c new file mode 100644 index 0000000000..14b185c45e --- /dev/null +++ b/drivers/opus/silk/resampler.c @@ -0,0 +1,215 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/* + * Matrix of resampling methods used: + * Fs_out (kHz) + * 8 12 16 24 48 + * + * 8 C UF U UF UF + * 12 AF C UF U UF + * Fs_in (kHz) 16 D AF C UF UF + * 24 AF D AF C U + * 48 AF AF AF D C + * + * C -> Copy (no resampling) + * D -> Allpass-based 2x downsampling + * U -> Allpass-based 2x upsampling + * UF -> Allpass-based 2x upsampling followed by FIR interpolation + * AF -> AR2 filter followed by FIR interpolation + */ + +#include "resampler_private.h" + +/* Tables with delay compensation values to equalize total delay for different modes */ +static const opus_int8 delay_matrix_enc[ 5 ][ 3 ] = { +/* in \ out 8 12 16 */ +/* 8 */ { 6, 0, 3 }, +/* 12 */ { 0, 7, 3 }, +/* 16 */ { 0, 1, 10 }, +/* 24 */ { 0, 2, 6 }, +/* 48 */ { 18, 10, 12 } +}; + +static const opus_int8 delay_matrix_dec[ 3 ][ 5 ] = { +/* in \ out 8 12 16 24 48 */ +/* 8 */ { 4, 0, 2, 0, 0 }, +/* 12 */ { 0, 9, 4, 7, 4 }, +/* 16 */ { 0, 3, 12, 7, 7 } +}; + +/* Simple way to make [8000, 12000, 16000, 24000, 48000] to [0, 1, 2, 3, 4] */ +#define rateID(R) ( ( ( ((R)>>12) - ((R)>16000) ) >> ((R)>24000) ) - 1 ) + +#define USE_silk_resampler_copy (0) +#define USE_silk_resampler_private_up2_HQ_wrapper (1) +#define USE_silk_resampler_private_IIR_FIR (2) +#define USE_silk_resampler_private_down_FIR (3) + +/* Initialize/reset the resampler state for a given pair of input/output sampling rates */ +opus_int silk_resampler_init( + silk_resampler_state_struct *S, /* I/O Resampler state */ + opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */ + opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */ + opus_int forEnc /* I If 1: encoder; if 0: decoder */ +) +{ + opus_int up2x; + + /* Clear state */ + silk_memset( S, 0, sizeof( silk_resampler_state_struct ) ); + + /* Input checking */ + if( forEnc ) { + if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 && Fs_Hz_in != 24000 && Fs_Hz_in != 48000 ) || + ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 ) ) { + silk_assert( 0 ); + return -1; + } + S->inputDelay = delay_matrix_enc[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ]; + } else { + if( ( Fs_Hz_in != 8000 && Fs_Hz_in != 12000 && Fs_Hz_in != 16000 ) || + ( Fs_Hz_out != 8000 && Fs_Hz_out != 12000 && Fs_Hz_out != 16000 && Fs_Hz_out != 24000 && Fs_Hz_out != 48000 ) ) { + silk_assert( 0 ); + return -1; + } + S->inputDelay = delay_matrix_dec[ rateID( Fs_Hz_in ) ][ rateID( Fs_Hz_out ) ]; + } + + S->Fs_in_kHz = silk_DIV32_16( Fs_Hz_in, 1000 ); + S->Fs_out_kHz = silk_DIV32_16( Fs_Hz_out, 1000 ); + + /* Number of samples processed per batch */ + S->batchSize = S->Fs_in_kHz * RESAMPLER_MAX_BATCH_SIZE_MS; + + /* Find resampler with the right sampling ratio */ + up2x = 0; + if( Fs_Hz_out > Fs_Hz_in ) { + /* Upsample */ + if( Fs_Hz_out == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 1 */ + /* Special case: directly use 2x upsampler */ + S->resampler_function = USE_silk_resampler_private_up2_HQ_wrapper; + } else { + /* Default resampler */ + S->resampler_function = USE_silk_resampler_private_IIR_FIR; + up2x = 1; + } + } else if ( Fs_Hz_out < Fs_Hz_in ) { + /* Downsample */ + S->resampler_function = USE_silk_resampler_private_down_FIR; + if( silk_MUL( Fs_Hz_out, 4 ) == silk_MUL( Fs_Hz_in, 3 ) ) { /* Fs_out : Fs_in = 3 : 4 */ + S->FIR_Fracs = 3; + S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0; + S->Coefs = silk_Resampler_3_4_COEFS; + } else if( silk_MUL( Fs_Hz_out, 3 ) == silk_MUL( Fs_Hz_in, 2 ) ) { /* Fs_out : Fs_in = 2 : 3 */ + S->FIR_Fracs = 2; + S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR0; + S->Coefs = silk_Resampler_2_3_COEFS; + } else if( silk_MUL( Fs_Hz_out, 2 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 2 */ + S->FIR_Fracs = 1; + S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR1; + S->Coefs = silk_Resampler_1_2_COEFS; + } else if( silk_MUL( Fs_Hz_out, 3 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 3 */ + S->FIR_Fracs = 1; + S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; + S->Coefs = silk_Resampler_1_3_COEFS; + } else if( silk_MUL( Fs_Hz_out, 4 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 4 */ + S->FIR_Fracs = 1; + S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; + S->Coefs = silk_Resampler_1_4_COEFS; + } else if( silk_MUL( Fs_Hz_out, 6 ) == Fs_Hz_in ) { /* Fs_out : Fs_in = 1 : 6 */ + S->FIR_Fracs = 1; + S->FIR_Order = RESAMPLER_DOWN_ORDER_FIR2; + S->Coefs = silk_Resampler_1_6_COEFS; + } else { + /* None available */ + silk_assert( 0 ); + return -1; + } + } else { + /* Input and output sampling rates are equal: copy */ + S->resampler_function = USE_silk_resampler_copy; + } + + /* Ratio of input/output samples */ + S->invRatio_Q16 = silk_LSHIFT32( silk_DIV32( silk_LSHIFT32( Fs_Hz_in, 14 + up2x ), Fs_Hz_out ), 2 ); + /* Make sure the ratio is rounded up */ + while( silk_SMULWW( S->invRatio_Q16, Fs_Hz_out ) < silk_LSHIFT32( Fs_Hz_in, up2x ) ) { + S->invRatio_Q16++; + } + + return 0; +} + +/* Resampler: convert from one sampling rate to another */ +/* Input and output sampling rate are at most 48000 Hz */ +opus_int silk_resampler( + silk_resampler_state_struct *S, /* I/O Resampler state */ + opus_int16 out[], /* O Output signal */ + const opus_int16 in[], /* I Input signal */ + opus_int32 inLen /* I Number of input samples */ +) +{ + opus_int nSamples; + + /* Need at least 1 ms of input data */ + silk_assert( inLen >= S->Fs_in_kHz ); + /* Delay can't exceed the 1 ms of buffering */ + silk_assert( S->inputDelay <= S->Fs_in_kHz ); + + nSamples = S->Fs_in_kHz - S->inputDelay; + + /* Copy to delay buffer */ + silk_memcpy( &S->delayBuf[ S->inputDelay ], in, nSamples * sizeof( opus_int16 ) ); + + switch( S->resampler_function ) { + case USE_silk_resampler_private_up2_HQ_wrapper: + silk_resampler_private_up2_HQ_wrapper( S, out, S->delayBuf, S->Fs_in_kHz ); + silk_resampler_private_up2_HQ_wrapper( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz ); + break; + case USE_silk_resampler_private_IIR_FIR: + silk_resampler_private_IIR_FIR( S, out, S->delayBuf, S->Fs_in_kHz ); + silk_resampler_private_IIR_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz ); + break; + case USE_silk_resampler_private_down_FIR: + silk_resampler_private_down_FIR( S, out, S->delayBuf, S->Fs_in_kHz ); + silk_resampler_private_down_FIR( S, &out[ S->Fs_out_kHz ], &in[ nSamples ], inLen - S->Fs_in_kHz ); + break; + default: + silk_memcpy( out, S->delayBuf, S->Fs_in_kHz * sizeof( opus_int16 ) ); + silk_memcpy( &out[ S->Fs_out_kHz ], &in[ nSamples ], ( inLen - S->Fs_in_kHz ) * sizeof( opus_int16 ) ); + } + + /* Copy to delay buffer */ + silk_memcpy( S->delayBuf, &in[ inLen - S->inputDelay ], S->inputDelay * sizeof( opus_int16 ) ); + + return 0; +} diff --git a/drivers/opus/silk/resampler_down2.c b/drivers/opus/silk/resampler_down2.c new file mode 100644 index 0000000000..5c4b27759a --- /dev/null +++ b/drivers/opus/silk/resampler_down2.c @@ -0,0 +1,74 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "resampler_rom.h" + +/* Downsample by a factor 2 */ +void silk_resampler_down2( + opus_int32 *S, /* I/O State vector [ 2 ] */ + opus_int16 *out, /* O Output signal [ floor(len/2) ] */ + const opus_int16 *in, /* I Input signal [ len ] */ + opus_int32 inLen /* I Number of input samples */ +) +{ + opus_int32 k, len2 = silk_RSHIFT32( inLen, 1 ); + opus_int32 in32, out32, Y, X; + + silk_assert( silk_resampler_down2_0 > 0 ); + silk_assert( silk_resampler_down2_1 < 0 ); + + /* Internal variables and state are in Q10 format */ + for( k = 0; k < len2; k++ ) { + /* Convert to Q10 */ + in32 = silk_LSHIFT( (opus_int32)in[ 2 * k ], 10 ); + + /* All-pass section for even input sample */ + Y = silk_SUB32( in32, S[ 0 ] ); + X = silk_SMLAWB( Y, Y, silk_resampler_down2_1 ); + out32 = silk_ADD32( S[ 0 ], X ); + S[ 0 ] = silk_ADD32( in32, X ); + + /* Convert to Q10 */ + in32 = silk_LSHIFT( (opus_int32)in[ 2 * k + 1 ], 10 ); + + /* All-pass section for odd input sample, and add to output of previous section */ + Y = silk_SUB32( in32, S[ 1 ] ); + X = silk_SMULWB( Y, silk_resampler_down2_0 ); + out32 = silk_ADD32( out32, S[ 1 ] ); + out32 = silk_ADD32( out32, X ); + S[ 1 ] = silk_ADD32( in32, X ); + + /* Add, convert back to int16 and store to output */ + out[ k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32, 11 ) ); + } +} + diff --git a/drivers/opus/silk/resampler_down2_3.c b/drivers/opus/silk/resampler_down2_3.c new file mode 100644 index 0000000000..2733072fe6 --- /dev/null +++ b/drivers/opus/silk/resampler_down2_3.c @@ -0,0 +1,103 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "resampler_private.h" +#include "stack_alloc.h" + +#define ORDER_FIR 4 + +/* Downsample by a factor 2/3, low quality */ +void silk_resampler_down2_3( + opus_int32 *S, /* I/O State vector [ 6 ] */ + opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */ + const opus_int16 *in, /* I Input signal [ inLen ] */ + opus_int32 inLen /* I Number of input samples */ +) +{ + opus_int32 nSamplesIn, counter, res_Q6; + VARDECL( opus_int32, buf ); + opus_int32 *buf_ptr; + SAVE_STACK; + + ALLOC( buf, RESAMPLER_MAX_BATCH_SIZE_IN + ORDER_FIR, opus_int32 ); + + /* Copy buffered samples to start of buffer */ + silk_memcpy( buf, S, ORDER_FIR * sizeof( opus_int32 ) ); + + /* Iterate over blocks of frameSizeIn input samples */ + while( 1 ) { + nSamplesIn = silk_min( inLen, RESAMPLER_MAX_BATCH_SIZE_IN ); + + /* Second-order AR filter (output in Q8) */ + silk_resampler_private_AR2( &S[ ORDER_FIR ], &buf[ ORDER_FIR ], in, + silk_Resampler_2_3_COEFS_LQ, nSamplesIn ); + + /* Interpolate filtered signal */ + buf_ptr = buf; + counter = nSamplesIn; + while( counter > 2 ) { + /* Inner product */ + res_Q6 = silk_SMULWB( buf_ptr[ 0 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] ); + + /* Scale down, saturate and store in output array */ + *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); + + res_Q6 = silk_SMULWB( buf_ptr[ 1 ], silk_Resampler_2_3_COEFS_LQ[ 4 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], silk_Resampler_2_3_COEFS_LQ[ 5 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], silk_Resampler_2_3_COEFS_LQ[ 3 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], silk_Resampler_2_3_COEFS_LQ[ 2 ] ); + + /* Scale down, saturate and store in output array */ + *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); + + buf_ptr += 3; + counter -= 3; + } + + in += nSamplesIn; + inLen -= nSamplesIn; + + if( inLen > 0 ) { + /* More iterations to do; copy last part of filtered signal to beginning of buffer */ + silk_memcpy( buf, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) ); + } else { + break; + } + } + + /* Copy last part of filtered signal to the state for the next call */ + silk_memcpy( S, &buf[ nSamplesIn ], ORDER_FIR * sizeof( opus_int32 ) ); + RESTORE_STACK; +} diff --git a/drivers/opus/silk/resampler_private.h b/drivers/opus/silk/resampler_private.h new file mode 100644 index 0000000000..422a7d9d95 --- /dev/null +++ b/drivers/opus/silk/resampler_private.h @@ -0,0 +1,88 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_RESAMPLER_PRIVATE_H +#define SILK_RESAMPLER_PRIVATE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "SigProc_FIX.h" +#include "resampler_structs.h" +#include "resampler_rom.h" + +/* Number of input samples to process in the inner loop */ +#define RESAMPLER_MAX_BATCH_SIZE_MS 10 +#define RESAMPLER_MAX_FS_KHZ 48 +#define RESAMPLER_MAX_BATCH_SIZE_IN ( RESAMPLER_MAX_BATCH_SIZE_MS * RESAMPLER_MAX_FS_KHZ ) + +/* Description: Hybrid IIR/FIR polyphase implementation of resampling */ +void silk_resampler_private_IIR_FIR( + void *SS, /* I/O Resampler state */ + opus_int16 out[], /* O Output signal */ + const opus_int16 in[], /* I Input signal */ + opus_int32 inLen /* I Number of input samples */ +); + +/* Description: Hybrid IIR/FIR polyphase implementation of resampling */ +void silk_resampler_private_down_FIR( + void *SS, /* I/O Resampler state */ + opus_int16 out[], /* O Output signal */ + const opus_int16 in[], /* I Input signal */ + opus_int32 inLen /* I Number of input samples */ +); + +/* Upsample by a factor 2, high quality */ +void silk_resampler_private_up2_HQ_wrapper( + void *SS, /* I/O Resampler state (unused) */ + opus_int16 *out, /* O Output signal [ 2 * len ] */ + const opus_int16 *in, /* I Input signal [ len ] */ + opus_int32 len /* I Number of input samples */ +); + +/* Upsample by a factor 2, high quality */ +void silk_resampler_private_up2_HQ( + opus_int32 *S, /* I/O Resampler state [ 6 ] */ + opus_int16 *out, /* O Output signal [ 2 * len ] */ + const opus_int16 *in, /* I Input signal [ len ] */ + opus_int32 len /* I Number of input samples */ +); + +/* Second order AR filter */ +void silk_resampler_private_AR2( + opus_int32 S[], /* I/O State vector [ 2 ] */ + opus_int32 out_Q8[], /* O Output signal */ + const opus_int16 in[], /* I Input signal */ + const opus_int16 A_Q14[], /* I AR coefficients, Q14 */ + opus_int32 len /* I Signal length */ +); + +#ifdef __cplusplus +} +#endif +#endif /* SILK_RESAMPLER_PRIVATE_H */ diff --git a/drivers/opus/silk/resampler_private_AR2.c b/drivers/opus/silk/resampler_private_AR2.c new file mode 100644 index 0000000000..84157d17ba --- /dev/null +++ b/drivers/opus/silk/resampler_private_AR2.c @@ -0,0 +1,55 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "resampler_private.h" + +/* Second order AR filter with single delay elements */ +void silk_resampler_private_AR2( + opus_int32 S[], /* I/O State vector [ 2 ] */ + opus_int32 out_Q8[], /* O Output signal */ + const opus_int16 in[], /* I Input signal */ + const opus_int16 A_Q14[], /* I AR coefficients, Q14 */ + opus_int32 len /* I Signal length */ +) +{ + opus_int32 k; + opus_int32 out32; + + for( k = 0; k < len; k++ ) { + out32 = silk_ADD_LSHIFT32( S[ 0 ], (opus_int32)in[ k ], 8 ); + out_Q8[ k ] = out32; + out32 = silk_LSHIFT( out32, 2 ); + S[ 0 ] = silk_SMLAWB( S[ 1 ], out32, A_Q14[ 0 ] ); + S[ 1 ] = silk_SMULWB( out32, A_Q14[ 1 ] ); + } +} + diff --git a/drivers/opus/silk/resampler_private_IIR_FIR.c b/drivers/opus/silk/resampler_private_IIR_FIR.c new file mode 100644 index 0000000000..f45c3e7413 --- /dev/null +++ b/drivers/opus/silk/resampler_private_IIR_FIR.c @@ -0,0 +1,107 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "resampler_private.h" +#include "stack_alloc.h" + +static OPUS_INLINE opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL( + opus_int16 *out, + opus_int16 *buf, + opus_int32 max_index_Q16, + opus_int32 index_increment_Q16 +) +{ + opus_int32 index_Q16, res_Q15; + opus_int16 *buf_ptr; + opus_int32 table_index; + + /* Interpolate upsampled signal and store in output array */ + for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { + table_index = silk_SMULWB( index_Q16 & 0xFFFF, 12 ); + buf_ptr = &buf[ index_Q16 >> 16 ]; + + res_Q15 = silk_SMULBB( buf_ptr[ 0 ], silk_resampler_frac_FIR_12[ table_index ][ 0 ] ); + res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 1 ], silk_resampler_frac_FIR_12[ table_index ][ 1 ] ); + res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 2 ], silk_resampler_frac_FIR_12[ table_index ][ 2 ] ); + res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 3 ], silk_resampler_frac_FIR_12[ table_index ][ 3 ] ); + res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 4 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 3 ] ); + res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 5 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 2 ] ); + res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 6 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 1 ] ); + res_Q15 = silk_SMLABB( res_Q15, buf_ptr[ 7 ], silk_resampler_frac_FIR_12[ 11 - table_index ][ 0 ] ); + *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q15, 15 ) ); + } + return out; +} +/* Upsample using a combination of allpass-based 2x upsampling and FIR interpolation */ +void silk_resampler_private_IIR_FIR( + void *SS, /* I/O Resampler state */ + opus_int16 out[], /* O Output signal */ + const opus_int16 in[], /* I Input signal */ + opus_int32 inLen /* I Number of input samples */ +) +{ + silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; + opus_int32 nSamplesIn; + opus_int32 max_index_Q16, index_increment_Q16; + VARDECL( opus_int16, buf ); + SAVE_STACK; + + ALLOC( buf, 2 * S->batchSize + RESAMPLER_ORDER_FIR_12, opus_int16 ); + + /* Copy buffered samples to start of buffer */ + silk_memcpy( buf, S->sFIR.i16, RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); + + /* Iterate over blocks of frameSizeIn input samples */ + index_increment_Q16 = S->invRatio_Q16; + while( 1 ) { + nSamplesIn = silk_min( inLen, S->batchSize ); + + /* Upsample 2x */ + silk_resampler_private_up2_HQ( S->sIIR, &buf[ RESAMPLER_ORDER_FIR_12 ], in, nSamplesIn ); + + max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 + 1 ); /* + 1 because 2x upsampling */ + out = silk_resampler_private_IIR_FIR_INTERPOL( out, buf, max_index_Q16, index_increment_Q16 ); + in += nSamplesIn; + inLen -= nSamplesIn; + + if( inLen > 0 ) { + /* More iterations to do; copy last part of filtered signal to beginning of buffer */ + silk_memcpy( buf, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); + } else { + break; + } + } + + /* Copy last part of filtered signal to the state for the next call */ + silk_memcpy( S->sFIR.i16, &buf[ nSamplesIn << 1 ], RESAMPLER_ORDER_FIR_12 * sizeof( opus_int16 ) ); + RESTORE_STACK; +} diff --git a/drivers/opus/silk/resampler_private_down_FIR.c b/drivers/opus/silk/resampler_private_down_FIR.c new file mode 100644 index 0000000000..f4de303546 --- /dev/null +++ b/drivers/opus/silk/resampler_private_down_FIR.c @@ -0,0 +1,194 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "resampler_private.h" +#include "stack_alloc.h" + +static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL( + opus_int16 *out, + opus_int32 *buf, + const opus_int16 *FIR_Coefs, + opus_int FIR_Order, + opus_int FIR_Fracs, + opus_int32 max_index_Q16, + opus_int32 index_increment_Q16 +) +{ + opus_int32 index_Q16, res_Q6; + opus_int32 *buf_ptr; + opus_int32 interpol_ind; + const opus_int16 *interpol_ptr; + + switch( FIR_Order ) { + case RESAMPLER_DOWN_ORDER_FIR0: + for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { + /* Integer part gives pointer to buffered input */ + buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); + + /* Fractional part gives interpolation coefficients */ + interpol_ind = silk_SMULWB( index_Q16 & 0xFFFF, FIR_Fracs ); + + /* Inner product */ + interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * interpol_ind ]; + res_Q6 = silk_SMULWB( buf_ptr[ 0 ], interpol_ptr[ 0 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 1 ], interpol_ptr[ 1 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 2 ], interpol_ptr[ 2 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 3 ], interpol_ptr[ 3 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 4 ], interpol_ptr[ 4 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 5 ], interpol_ptr[ 5 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 6 ], interpol_ptr[ 6 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 7 ], interpol_ptr[ 7 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 8 ], interpol_ptr[ 8 ] ); + interpol_ptr = &FIR_Coefs[ RESAMPLER_DOWN_ORDER_FIR0 / 2 * ( FIR_Fracs - 1 - interpol_ind ) ]; + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 17 ], interpol_ptr[ 0 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 16 ], interpol_ptr[ 1 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 15 ], interpol_ptr[ 2 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 14 ], interpol_ptr[ 3 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 13 ], interpol_ptr[ 4 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 12 ], interpol_ptr[ 5 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 11 ], interpol_ptr[ 6 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 10 ], interpol_ptr[ 7 ] ); + res_Q6 = silk_SMLAWB( res_Q6, buf_ptr[ 9 ], interpol_ptr[ 8 ] ); + + /* Scale down, saturate and store in output array */ + *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); + } + break; + case RESAMPLER_DOWN_ORDER_FIR1: + for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { + /* Integer part gives pointer to buffered input */ + buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); + + /* Inner product */ + res_Q6 = silk_SMULWB( silk_ADD32( buf_ptr[ 0 ], buf_ptr[ 23 ] ), FIR_Coefs[ 0 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 1 ], buf_ptr[ 22 ] ), FIR_Coefs[ 1 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 2 ], buf_ptr[ 21 ] ), FIR_Coefs[ 2 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 3 ], buf_ptr[ 20 ] ), FIR_Coefs[ 3 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 4 ], buf_ptr[ 19 ] ), FIR_Coefs[ 4 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 5 ], buf_ptr[ 18 ] ), FIR_Coefs[ 5 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 6 ], buf_ptr[ 17 ] ), FIR_Coefs[ 6 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 7 ], buf_ptr[ 16 ] ), FIR_Coefs[ 7 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 8 ], buf_ptr[ 15 ] ), FIR_Coefs[ 8 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 9 ], buf_ptr[ 14 ] ), FIR_Coefs[ 9 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 13 ] ), FIR_Coefs[ 10 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 12 ] ), FIR_Coefs[ 11 ] ); + + /* Scale down, saturate and store in output array */ + *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); + } + break; + case RESAMPLER_DOWN_ORDER_FIR2: + for( index_Q16 = 0; index_Q16 < max_index_Q16; index_Q16 += index_increment_Q16 ) { + /* Integer part gives pointer to buffered input */ + buf_ptr = buf + silk_RSHIFT( index_Q16, 16 ); + + /* Inner product */ + res_Q6 = silk_SMULWB( silk_ADD32( buf_ptr[ 0 ], buf_ptr[ 35 ] ), FIR_Coefs[ 0 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 1 ], buf_ptr[ 34 ] ), FIR_Coefs[ 1 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 2 ], buf_ptr[ 33 ] ), FIR_Coefs[ 2 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 3 ], buf_ptr[ 32 ] ), FIR_Coefs[ 3 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 4 ], buf_ptr[ 31 ] ), FIR_Coefs[ 4 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 5 ], buf_ptr[ 30 ] ), FIR_Coefs[ 5 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 6 ], buf_ptr[ 29 ] ), FIR_Coefs[ 6 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 7 ], buf_ptr[ 28 ] ), FIR_Coefs[ 7 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 8 ], buf_ptr[ 27 ] ), FIR_Coefs[ 8 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 9 ], buf_ptr[ 26 ] ), FIR_Coefs[ 9 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 10 ], buf_ptr[ 25 ] ), FIR_Coefs[ 10 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 11 ], buf_ptr[ 24 ] ), FIR_Coefs[ 11 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 12 ], buf_ptr[ 23 ] ), FIR_Coefs[ 12 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 13 ], buf_ptr[ 22 ] ), FIR_Coefs[ 13 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 14 ], buf_ptr[ 21 ] ), FIR_Coefs[ 14 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 15 ], buf_ptr[ 20 ] ), FIR_Coefs[ 15 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 16 ], buf_ptr[ 19 ] ), FIR_Coefs[ 16 ] ); + res_Q6 = silk_SMLAWB( res_Q6, silk_ADD32( buf_ptr[ 17 ], buf_ptr[ 18 ] ), FIR_Coefs[ 17 ] ); + + /* Scale down, saturate and store in output array */ + *out++ = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( res_Q6, 6 ) ); + } + break; + default: + silk_assert( 0 ); + } + return out; +} + +/* Resample with a 2nd order AR filter followed by FIR interpolation */ +void silk_resampler_private_down_FIR( + void *SS, /* I/O Resampler state */ + opus_int16 out[], /* O Output signal */ + const opus_int16 in[], /* I Input signal */ + opus_int32 inLen /* I Number of input samples */ +) +{ + silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; + opus_int32 nSamplesIn; + opus_int32 max_index_Q16, index_increment_Q16; + VARDECL( opus_int32, buf ); + const opus_int16 *FIR_Coefs; + SAVE_STACK; + + ALLOC( buf, S->batchSize + S->FIR_Order, opus_int32 ); + + /* Copy buffered samples to start of buffer */ + silk_memcpy( buf, S->sFIR.i32, S->FIR_Order * sizeof( opus_int32 ) ); + + FIR_Coefs = &S->Coefs[ 2 ]; + + /* Iterate over blocks of frameSizeIn input samples */ + index_increment_Q16 = S->invRatio_Q16; + while( 1 ) { + nSamplesIn = silk_min( inLen, S->batchSize ); + + /* Second-order AR filter (output in Q8) */ + silk_resampler_private_AR2( S->sIIR, &buf[ S->FIR_Order ], in, S->Coefs, nSamplesIn ); + + max_index_Q16 = silk_LSHIFT32( nSamplesIn, 16 ); + + /* Interpolate filtered signal */ + out = silk_resampler_private_down_FIR_INTERPOL( out, buf, FIR_Coefs, S->FIR_Order, + S->FIR_Fracs, max_index_Q16, index_increment_Q16 ); + + in += nSamplesIn; + inLen -= nSamplesIn; + + if( inLen > 1 ) { + /* More iterations to do; copy last part of filtered signal to beginning of buffer */ + silk_memcpy( buf, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) ); + } else { + break; + } + } + + /* Copy last part of filtered signal to the state for the next call */ + silk_memcpy( S->sFIR.i32, &buf[ nSamplesIn ], S->FIR_Order * sizeof( opus_int32 ) ); + RESTORE_STACK; +} diff --git a/drivers/opus/silk/resampler_private_up2_HQ.c b/drivers/opus/silk/resampler_private_up2_HQ.c new file mode 100644 index 0000000000..39f4818454 --- /dev/null +++ b/drivers/opus/silk/resampler_private_up2_HQ.c @@ -0,0 +1,113 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" +#include "resampler_private.h" + +/* Upsample by a factor 2, high quality */ +/* Uses 2nd order allpass filters for the 2x upsampling, followed by a */ +/* notch filter just above Nyquist. */ +void silk_resampler_private_up2_HQ( + opus_int32 *S, /* I/O Resampler state [ 6 ] */ + opus_int16 *out, /* O Output signal [ 2 * len ] */ + const opus_int16 *in, /* I Input signal [ len ] */ + opus_int32 len /* I Number of input samples */ +) +{ + opus_int32 k; + opus_int32 in32, out32_1, out32_2, Y, X; + + silk_assert( silk_resampler_up2_hq_0[ 0 ] > 0 ); + silk_assert( silk_resampler_up2_hq_0[ 1 ] > 0 ); + silk_assert( silk_resampler_up2_hq_0[ 2 ] < 0 ); + silk_assert( silk_resampler_up2_hq_1[ 0 ] > 0 ); + silk_assert( silk_resampler_up2_hq_1[ 1 ] > 0 ); + silk_assert( silk_resampler_up2_hq_1[ 2 ] < 0 ); + + /* Internal variables and state are in Q10 format */ + for( k = 0; k < len; k++ ) { + /* Convert to Q10 */ + in32 = silk_LSHIFT( (opus_int32)in[ k ], 10 ); + + /* First all-pass section for even output sample */ + Y = silk_SUB32( in32, S[ 0 ] ); + X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 0 ] ); + out32_1 = silk_ADD32( S[ 0 ], X ); + S[ 0 ] = silk_ADD32( in32, X ); + + /* Second all-pass section for even output sample */ + Y = silk_SUB32( out32_1, S[ 1 ] ); + X = silk_SMULWB( Y, silk_resampler_up2_hq_0[ 1 ] ); + out32_2 = silk_ADD32( S[ 1 ], X ); + S[ 1 ] = silk_ADD32( out32_1, X ); + + /* Third all-pass section for even output sample */ + Y = silk_SUB32( out32_2, S[ 2 ] ); + X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_0[ 2 ] ); + out32_1 = silk_ADD32( S[ 2 ], X ); + S[ 2 ] = silk_ADD32( out32_2, X ); + + /* Apply gain in Q15, convert back to int16 and store to output */ + out[ 2 * k ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) ); + + /* First all-pass section for odd output sample */ + Y = silk_SUB32( in32, S[ 3 ] ); + X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 0 ] ); + out32_1 = silk_ADD32( S[ 3 ], X ); + S[ 3 ] = silk_ADD32( in32, X ); + + /* Second all-pass section for odd output sample */ + Y = silk_SUB32( out32_1, S[ 4 ] ); + X = silk_SMULWB( Y, silk_resampler_up2_hq_1[ 1 ] ); + out32_2 = silk_ADD32( S[ 4 ], X ); + S[ 4 ] = silk_ADD32( out32_1, X ); + + /* Third all-pass section for odd output sample */ + Y = silk_SUB32( out32_2, S[ 5 ] ); + X = silk_SMLAWB( Y, Y, silk_resampler_up2_hq_1[ 2 ] ); + out32_1 = silk_ADD32( S[ 5 ], X ); + S[ 5 ] = silk_ADD32( out32_2, X ); + + /* Apply gain in Q15, convert back to int16 and store to output */ + out[ 2 * k + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( out32_1, 10 ) ); + } +} + +void silk_resampler_private_up2_HQ_wrapper( + void *SS, /* I/O Resampler state (unused) */ + opus_int16 *out, /* O Output signal [ 2 * len ] */ + const opus_int16 *in, /* I Input signal [ len ] */ + opus_int32 len /* I Number of input samples */ +) +{ + silk_resampler_state_struct *S = (silk_resampler_state_struct *)SS; + silk_resampler_private_up2_HQ( S->sIIR, out, in, len ); +} diff --git a/drivers/opus/silk/resampler_rom.c b/drivers/opus/silk/resampler_rom.c new file mode 100644 index 0000000000..0098e18ba8 --- /dev/null +++ b/drivers/opus/silk/resampler_rom.c @@ -0,0 +1,96 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/* Filter coefficients for IIR/FIR polyphase resampling * + * Total size: 179 Words (358 Bytes) */ + +#include "resampler_private.h" + +/* Matlab code for the notch filter coefficients: */ +/* B = [1, 0.147, 1]; A = [1, 0.107, 0.89]; G = 0.93; freqz(G * B, A, 2^14, 16e3); axis([0, 8000, -10, 1]) */ +/* fprintf('\t%6d, %6d, %6d, %6d\n', round(B(2)*2^16), round(-A(2)*2^16), round((1-A(3))*2^16), round(G*2^15)) */ +/* const opus_int16 silk_resampler_up2_hq_notch[ 4 ] = { 9634, -7012, 7209, 30474 }; */ + +/* Tables with IIR and FIR coefficients for fractional downsamplers (123 Words) */ +silk_DWORD_ALIGN const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = { + -20694, -13867, + -49, 64, 17, -157, 353, -496, 163, 11047, 22205, + -39, 6, 91, -170, 186, 23, -896, 6336, 19928, + -19, -36, 102, -89, -24, 328, -951, 2568, 15909, +}; + +silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ] = { + -14457, -14019, + 64, 128, -122, 36, 310, -768, 584, 9267, 17733, + 12, 128, 18, -142, 288, -117, -865, 4123, 14459, +}; + +silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ] = { + 616, -14323, + -10, 39, 58, -46, -84, 120, 184, -315, -541, 1284, 5380, 9024, +}; + +silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { + 16102, -15162, + -13, 0, 20, 26, 5, -31, -43, -4, 65, 90, 7, -157, -248, -44, 593, 1583, 2612, 3271, +}; + +silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { + 22500, -15099, + 3, -14, -20, -15, 2, 25, 37, 25, -16, -71, -107, -79, 50, 292, 623, 982, 1288, 1464, +}; + +silk_DWORD_ALIGN const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ] = { + 27540, -15257, + 17, 12, 8, 1, -10, -22, -30, -32, -22, 3, 44, 100, 168, 243, 317, 381, 429, 455, +}; + +silk_DWORD_ALIGN const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ] = { + -2797, -6507, + 4697, 10739, + 1567, 8276, +}; + +/* Table with interplation fractions of 1/24, 3/24, 5/24, ... , 23/24 : 23/24 (46 Words) */ +silk_DWORD_ALIGN const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ] = { + { 189, -600, 617, 30567 }, + { 117, -159, -1070, 29704 }, + { 52, 221, -2392, 28276 }, + { -4, 529, -3350, 26341 }, + { -48, 758, -3956, 23973 }, + { -80, 905, -4235, 21254 }, + { -99, 972, -4222, 18278 }, + { -107, 967, -3957, 15143 }, + { -103, 896, -3487, 11950 }, + { -91, 773, -2865, 8798 }, + { -71, 611, -2143, 5784 }, + { -46, 425, -1375, 2996 }, +}; diff --git a/drivers/opus/silk/resampler_rom.h b/drivers/opus/silk/resampler_rom.h new file mode 100644 index 0000000000..490b3388dc --- /dev/null +++ b/drivers/opus/silk/resampler_rom.h @@ -0,0 +1,68 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_FIX_RESAMPLER_ROM_H +#define SILK_FIX_RESAMPLER_ROM_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "typedef.h" +#include "resampler_structs.h" + +#define RESAMPLER_DOWN_ORDER_FIR0 18 +#define RESAMPLER_DOWN_ORDER_FIR1 24 +#define RESAMPLER_DOWN_ORDER_FIR2 36 +#define RESAMPLER_ORDER_FIR_12 8 + +/* Tables for 2x downsampler */ +static const opus_int16 silk_resampler_down2_0 = 9872; +static const opus_int16 silk_resampler_down2_1 = 39809 - 65536; + +/* Tables for 2x upsampler, high quality */ +static const opus_int16 silk_resampler_up2_hq_0[ 3 ] = { 1746, 14986, 39083 - 65536 }; +static const opus_int16 silk_resampler_up2_hq_1[ 3 ] = { 6854, 25769, 55542 - 65536 }; + +/* Tables with IIR and FIR coefficients for fractional downsamplers */ +extern const opus_int16 silk_Resampler_3_4_COEFS[ 2 + 3 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ]; +extern const opus_int16 silk_Resampler_2_3_COEFS[ 2 + 2 * RESAMPLER_DOWN_ORDER_FIR0 / 2 ]; +extern const opus_int16 silk_Resampler_1_2_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR1 / 2 ]; +extern const opus_int16 silk_Resampler_1_3_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ]; +extern const opus_int16 silk_Resampler_1_4_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ]; +extern const opus_int16 silk_Resampler_1_6_COEFS[ 2 + RESAMPLER_DOWN_ORDER_FIR2 / 2 ]; +extern const opus_int16 silk_Resampler_2_3_COEFS_LQ[ 2 + 2 * 2 ]; + +/* Table with interplation fractions of 1/24, 3/24, ..., 23/24 */ +extern const opus_int16 silk_resampler_frac_FIR_12[ 12 ][ RESAMPLER_ORDER_FIR_12 / 2 ]; + +#ifdef __cplusplus +} +#endif + +#endif /* SILK_FIX_RESAMPLER_ROM_H */ diff --git a/drivers/opus/silk/resampler_structs.h b/drivers/opus/silk/resampler_structs.h new file mode 100644 index 0000000000..9e9457d11c --- /dev/null +++ b/drivers/opus/silk/resampler_structs.h @@ -0,0 +1,60 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_RESAMPLER_STRUCTS_H +#define SILK_RESAMPLER_STRUCTS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define SILK_RESAMPLER_MAX_FIR_ORDER 36 +#define SILK_RESAMPLER_MAX_IIR_ORDER 6 + +typedef struct _silk_resampler_state_struct{ + opus_int32 sIIR[ SILK_RESAMPLER_MAX_IIR_ORDER ]; /* this must be the first element of this struct */ + union{ + opus_int32 i32[ SILK_RESAMPLER_MAX_FIR_ORDER ]; + opus_int16 i16[ SILK_RESAMPLER_MAX_FIR_ORDER ]; + } sFIR; + opus_int16 delayBuf[ 48 ]; + opus_int resampler_function; + opus_int batchSize; + opus_int32 invRatio_Q16; + opus_int FIR_Order; + opus_int FIR_Fracs; + opus_int Fs_in_kHz; + opus_int Fs_out_kHz; + opus_int inputDelay; + const opus_int16 *Coefs; +} silk_resampler_state_struct; + +#ifdef __cplusplus +} +#endif +#endif /* SILK_RESAMPLER_STRUCTS_H */ + diff --git a/drivers/opus/silk/shell_coder.c b/drivers/opus/silk/shell_coder.c new file mode 100644 index 0000000000..79e392bd98 --- /dev/null +++ b/drivers/opus/silk/shell_coder.c @@ -0,0 +1,151 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* shell coder; pulse-subframe length is hardcoded */ + +static OPUS_INLINE void combine_pulses( + opus_int *out, /* O combined pulses vector [len] */ + const opus_int *in, /* I input vector [2 * len] */ + const opus_int len /* I number of OUTPUT samples */ +) +{ + opus_int k; + for( k = 0; k < len; k++ ) { + out[ k ] = in[ 2 * k ] + in[ 2 * k + 1 ]; + } +} + +static OPUS_INLINE void encode_split( + ec_enc *psRangeEnc, /* I/O compressor data structure */ + const opus_int p_child1, /* I pulse amplitude of first child subframe */ + const opus_int p, /* I pulse amplitude of current subframe */ + const opus_uint8 *shell_table /* I table of shell cdfs */ +) +{ + if( p > 0 ) { + ec_enc_icdf( psRangeEnc, p_child1, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 ); + } +} + +static OPUS_INLINE void decode_split( + opus_int *p_child1, /* O pulse amplitude of first child subframe */ + opus_int *p_child2, /* O pulse amplitude of second child subframe */ + ec_dec *psRangeDec, /* I/O Compressor data structure */ + const opus_int p, /* I pulse amplitude of current subframe */ + const opus_uint8 *shell_table /* I table of shell cdfs */ +) +{ + if( p > 0 ) { + p_child1[ 0 ] = ec_dec_icdf( psRangeDec, &shell_table[ silk_shell_code_table_offsets[ p ] ], 8 ); + p_child2[ 0 ] = p - p_child1[ 0 ]; + } else { + p_child1[ 0 ] = 0; + p_child2[ 0 ] = 0; + } +} + +/* Shell encoder, operates on one shell code frame of 16 pulses */ +void silk_shell_encoder( + ec_enc *psRangeEnc, /* I/O compressor data structure */ + const opus_int *pulses0 /* I data: nonnegative pulse amplitudes */ +) +{ + opus_int pulses1[ 8 ], pulses2[ 4 ], pulses3[ 2 ], pulses4[ 1 ]; + + /* this function operates on one shell code frame of 16 pulses */ + silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 ); + + /* tree representation per pulse-subframe */ + combine_pulses( pulses1, pulses0, 8 ); + combine_pulses( pulses2, pulses1, 4 ); + combine_pulses( pulses3, pulses2, 2 ); + combine_pulses( pulses4, pulses3, 1 ); + + encode_split( psRangeEnc, pulses3[ 0 ], pulses4[ 0 ], silk_shell_code_table3 ); + + encode_split( psRangeEnc, pulses2[ 0 ], pulses3[ 0 ], silk_shell_code_table2 ); + + encode_split( psRangeEnc, pulses1[ 0 ], pulses2[ 0 ], silk_shell_code_table1 ); + encode_split( psRangeEnc, pulses0[ 0 ], pulses1[ 0 ], silk_shell_code_table0 ); + encode_split( psRangeEnc, pulses0[ 2 ], pulses1[ 1 ], silk_shell_code_table0 ); + + encode_split( psRangeEnc, pulses1[ 2 ], pulses2[ 1 ], silk_shell_code_table1 ); + encode_split( psRangeEnc, pulses0[ 4 ], pulses1[ 2 ], silk_shell_code_table0 ); + encode_split( psRangeEnc, pulses0[ 6 ], pulses1[ 3 ], silk_shell_code_table0 ); + + encode_split( psRangeEnc, pulses2[ 2 ], pulses3[ 1 ], silk_shell_code_table2 ); + + encode_split( psRangeEnc, pulses1[ 4 ], pulses2[ 2 ], silk_shell_code_table1 ); + encode_split( psRangeEnc, pulses0[ 8 ], pulses1[ 4 ], silk_shell_code_table0 ); + encode_split( psRangeEnc, pulses0[ 10 ], pulses1[ 5 ], silk_shell_code_table0 ); + + encode_split( psRangeEnc, pulses1[ 6 ], pulses2[ 3 ], silk_shell_code_table1 ); + encode_split( psRangeEnc, pulses0[ 12 ], pulses1[ 6 ], silk_shell_code_table0 ); + encode_split( psRangeEnc, pulses0[ 14 ], pulses1[ 7 ], silk_shell_code_table0 ); +} + + +/* Shell decoder, operates on one shell code frame of 16 pulses */ +void silk_shell_decoder( + opus_int *pulses0, /* O data: nonnegative pulse amplitudes */ + ec_dec *psRangeDec, /* I/O Compressor data structure */ + const opus_int pulses4 /* I number of pulses per pulse-subframe */ +) +{ + opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ]; + + /* this function operates on one shell code frame of 16 pulses */ + silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 ); + + decode_split( &pulses3[ 0 ], &pulses3[ 1 ], psRangeDec, pulses4, silk_shell_code_table3 ); + + decode_split( &pulses2[ 0 ], &pulses2[ 1 ], psRangeDec, pulses3[ 0 ], silk_shell_code_table2 ); + + decode_split( &pulses1[ 0 ], &pulses1[ 1 ], psRangeDec, pulses2[ 0 ], silk_shell_code_table1 ); + decode_split( &pulses0[ 0 ], &pulses0[ 1 ], psRangeDec, pulses1[ 0 ], silk_shell_code_table0 ); + decode_split( &pulses0[ 2 ], &pulses0[ 3 ], psRangeDec, pulses1[ 1 ], silk_shell_code_table0 ); + + decode_split( &pulses1[ 2 ], &pulses1[ 3 ], psRangeDec, pulses2[ 1 ], silk_shell_code_table1 ); + decode_split( &pulses0[ 4 ], &pulses0[ 5 ], psRangeDec, pulses1[ 2 ], silk_shell_code_table0 ); + decode_split( &pulses0[ 6 ], &pulses0[ 7 ], psRangeDec, pulses1[ 3 ], silk_shell_code_table0 ); + + decode_split( &pulses2[ 2 ], &pulses2[ 3 ], psRangeDec, pulses3[ 1 ], silk_shell_code_table2 ); + + decode_split( &pulses1[ 4 ], &pulses1[ 5 ], psRangeDec, pulses2[ 2 ], silk_shell_code_table1 ); + decode_split( &pulses0[ 8 ], &pulses0[ 9 ], psRangeDec, pulses1[ 4 ], silk_shell_code_table0 ); + decode_split( &pulses0[ 10 ], &pulses0[ 11 ], psRangeDec, pulses1[ 5 ], silk_shell_code_table0 ); + + decode_split( &pulses1[ 6 ], &pulses1[ 7 ], psRangeDec, pulses2[ 3 ], silk_shell_code_table1 ); + decode_split( &pulses0[ 12 ], &pulses0[ 13 ], psRangeDec, pulses1[ 6 ], silk_shell_code_table0 ); + decode_split( &pulses0[ 14 ], &pulses0[ 15 ], psRangeDec, pulses1[ 7 ], silk_shell_code_table0 ); +} diff --git a/drivers/opus/silk/sigm_Q15.c b/drivers/opus/silk/sigm_Q15.c new file mode 100644 index 0000000000..2df5b9695c --- /dev/null +++ b/drivers/opus/silk/sigm_Q15.c @@ -0,0 +1,76 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/* Approximate sigmoid function */ + +#include "SigProc_FIX.h" + +/* fprintf(1, '%d, ', round(1024 * ([1 ./ (1 + exp(-(1:5))), 1] - 1 ./ (1 + exp(-(0:5)))))); */ +static const opus_int32 sigm_LUT_slope_Q10[ 6 ] = { + 237, 153, 73, 30, 12, 7 +}; +/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp(-(0:5))))); */ +static const opus_int32 sigm_LUT_pos_Q15[ 6 ] = { + 16384, 23955, 28861, 31213, 32178, 32548 +}; +/* fprintf(1, '%d, ', round(32767 * 1 ./ (1 + exp((0:5))))); */ +static const opus_int32 sigm_LUT_neg_Q15[ 6 ] = { + 16384, 8812, 3906, 1554, 589, 219 +}; + +opus_int silk_sigm_Q15( + opus_int in_Q5 /* I */ +) +{ + opus_int ind; + + if( in_Q5 < 0 ) { + /* Negative input */ + in_Q5 = -in_Q5; + if( in_Q5 >= 6 * 32 ) { + return 0; /* Clip */ + } else { + /* Linear interpolation of look up table */ + ind = silk_RSHIFT( in_Q5, 5 ); + return( sigm_LUT_neg_Q15[ ind ] - silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) ); + } + } else { + /* Positive input */ + if( in_Q5 >= 6 * 32 ) { + return 32767; /* clip */ + } else { + /* Linear interpolation of look up table */ + ind = silk_RSHIFT( in_Q5, 5 ); + return( sigm_LUT_pos_Q15[ ind ] + silk_SMULBB( sigm_LUT_slope_Q10[ ind ], in_Q5 & 0x1F ) ); + } + } +} + diff --git a/drivers/opus/silk/silk_main.h b/drivers/opus/silk/silk_main.h new file mode 100644 index 0000000000..2bdf89784d --- /dev/null +++ b/drivers/opus/silk/silk_main.h @@ -0,0 +1,438 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_MAIN_H +#define SILK_MAIN_H + +#include "SigProc_FIX.h" +#include "define.h" +#include "structs.h" +#include "tables.h" +#include "PLC.h" +#include "control.h" +#include "debug.h" +#include "entenc.h" +#include "entdec.h" + +/* Convert Left/Right stereo signal to adaptive Mid/Side representation */ +void silk_stereo_LR_to_MS( + stereo_enc_state *state, /* I/O State */ + opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ + opus_int16 x2[], /* I/O Right input signal, becomes side signal */ + opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */ + opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */ + opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */ + opus_int32 total_rate_bps, /* I Total bitrate */ + opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */ + opus_int toMono, /* I Last frame before a stereo->mono transition */ + opus_int fs_kHz, /* I Sample rate (kHz) */ + opus_int frame_length /* I Number of samples */ +); + +/* Convert adaptive Mid/Side representation to Left/Right stereo signal */ +void silk_stereo_MS_to_LR( + stereo_dec_state *state, /* I/O State */ + opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ + opus_int16 x2[], /* I/O Right input signal, becomes side signal */ + const opus_int32 pred_Q13[], /* I Predictors */ + opus_int fs_kHz, /* I Samples rate (kHz) */ + opus_int frame_length /* I Number of samples */ +); + +/* Find least-squares prediction gain for one signal based on another and quantize it */ +opus_int32 silk_stereo_find_predictor( /* O Returns predictor in Q13 */ + opus_int32 *ratio_Q14, /* O Ratio of residual and mid energies */ + const opus_int16 x[], /* I Basis signal */ + const opus_int16 y[], /* I Target signal */ + opus_int32 mid_res_amp_Q0[], /* I/O Smoothed mid, residual norms */ + opus_int length, /* I Number of samples */ + opus_int smooth_coef_Q16 /* I Smoothing coefficient */ +); + +/* Quantize mid/side predictors */ +void silk_stereo_quant_pred( + opus_int32 pred_Q13[], /* I/O Predictors (out: quantized) */ + opus_int8 ix[ 2 ][ 3 ] /* O Quantization indices */ +); + +/* Entropy code the mid/side quantization indices */ +void silk_stereo_encode_pred( + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + opus_int8 ix[ 2 ][ 3 ] /* I Quantization indices */ +); + +/* Entropy code the mid-only flag */ +void silk_stereo_encode_mid_only( + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + opus_int8 mid_only_flag +); + +/* Decode mid/side predictors */ +void silk_stereo_decode_pred( + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int32 pred_Q13[] /* O Predictors */ +); + +/* Decode mid-only flag */ +void silk_stereo_decode_mid_only( + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int *decode_only_mid /* O Flag that only mid channel has been coded */ +); + +/* Encodes signs of excitation */ +void silk_encode_signs( + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + const opus_int8 pulses[], /* I pulse signal */ + opus_int length, /* I length of input */ + const opus_int signalType, /* I Signal type */ + const opus_int quantOffsetType, /* I Quantization offset type */ + const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ +); + +/* Decodes signs of excitation */ +void silk_decode_signs( + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int pulses[], /* I/O pulse signal */ + opus_int length, /* I length of input */ + const opus_int signalType, /* I Signal type */ + const opus_int quantOffsetType, /* I Quantization offset type */ + const opus_int sum_pulses[ MAX_NB_SHELL_BLOCKS ] /* I Sum of absolute pulses per block */ +); + +/* Check encoder control struct */ +opus_int check_control_input( + silk_EncControlStruct *encControl /* I Control structure */ +); + +/* Control internal sampling rate */ +opus_int silk_control_audio_bandwidth( + silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ + silk_EncControlStruct *encControl /* I Control structure */ +); + +/* Control SNR of redidual quantizer */ +opus_int silk_control_SNR( + silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ + opus_int32 TargetRate_bps /* I Target max bitrate (bps) */ +); + +/***************/ +/* Shell coder */ +/***************/ + +/* Encode quantization indices of excitation */ +void silk_encode_pulses( + ec_enc *psRangeEnc, /* I/O compressor data structure */ + const opus_int signalType, /* I Signal type */ + const opus_int quantOffsetType, /* I quantOffsetType */ + opus_int8 pulses[], /* I quantization indices */ + const opus_int frame_length /* I Frame length */ +); + +/* Shell encoder, operates on one shell code frame of 16 pulses */ +void silk_shell_encoder( + ec_enc *psRangeEnc, /* I/O compressor data structure */ + const opus_int *pulses0 /* I data: nonnegative pulse amplitudes */ +); + +/* Shell decoder, operates on one shell code frame of 16 pulses */ +void silk_shell_decoder( + opus_int *pulses0, /* O data: nonnegative pulse amplitudes */ + ec_dec *psRangeDec, /* I/O Compressor data structure */ + const opus_int pulses4 /* I number of pulses per pulse-subframe */ +); + +/* Gain scalar quantization with hysteresis, uniform on log scale */ +void silk_gains_quant( + opus_int8 ind[ MAX_NB_SUBFR ], /* O gain indices */ + opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* I/O gains (quantized out) */ + opus_int8 *prev_ind, /* I/O last index in previous frame */ + const opus_int conditional, /* I first gain is delta coded if 1 */ + const opus_int nb_subfr /* I number of subframes */ +); + +/* Gains scalar dequantization, uniform on log scale */ +void silk_gains_dequant( + opus_int32 gain_Q16[ MAX_NB_SUBFR ], /* O quantized gains */ + const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ + opus_int8 *prev_ind, /* I/O last index in previous frame */ + const opus_int conditional, /* I first gain is delta coded if 1 */ + const opus_int nb_subfr /* I number of subframes */ +); + +/* Compute unique identifier of gain indices vector */ +opus_int32 silk_gains_ID( /* O returns unique identifier of gains */ + const opus_int8 ind[ MAX_NB_SUBFR ], /* I gain indices */ + const opus_int nb_subfr /* I number of subframes */ +); + +/* Interpolate two vectors */ +void silk_interpolate( + opus_int16 xi[ MAX_LPC_ORDER ], /* O interpolated vector */ + const opus_int16 x0[ MAX_LPC_ORDER ], /* I first vector */ + const opus_int16 x1[ MAX_LPC_ORDER ], /* I second vector */ + const opus_int ifact_Q2, /* I interp. factor, weight on 2nd vector */ + const opus_int d /* I number of parameters */ +); + +/* LTP tap quantizer */ +void silk_quant_LTP_gains( + opus_int16 B_Q14[ MAX_NB_SUBFR * LTP_ORDER ], /* I/O (un)quantized LTP gains */ + opus_int8 cbk_index[ MAX_NB_SUBFR ], /* O Codebook Index */ + opus_int8 *periodicity_index, /* O Periodicity Index */ + opus_int32 *sum_gain_dB_Q7, /* I/O Cumulative max prediction gain */ + const opus_int32 W_Q18[ MAX_NB_SUBFR*LTP_ORDER*LTP_ORDER ], /* I Error Weights in Q18 */ + opus_int mu_Q9, /* I Mu value (R/D tradeoff) */ + opus_int lowComplexity, /* I Flag for low complexity */ + const opus_int nb_subfr /* I number of subframes */ +); + +/* Entropy constrained matrix-weighted VQ, for a single input data vector */ +void silk_VQ_WMat_EC( + opus_int8 *ind, /* O index of best codebook vector */ + opus_int32 *rate_dist_Q14, /* O best weighted quant error + mu * rate */ + opus_int *gain_Q7, /* O sum of absolute LTP coefficients */ + const opus_int16 *in_Q14, /* I input vector to be quantized */ + const opus_int32 *W_Q18, /* I weighting matrix */ + const opus_int8 *cb_Q7, /* I codebook */ + const opus_uint8 *cb_gain_Q7, /* I codebook effective gain */ + const opus_uint8 *cl_Q5, /* I code length for each codebook vector */ + const opus_int mu_Q9, /* I tradeoff betw. weighted error and rate */ + const opus_int32 max_gain_Q7, /* I maximum sum of absolute LTP coefficients */ + opus_int L /* I number of vectors in codebook */ +); + +/************************************/ +/* Noise shaping quantization (NSQ) */ +/************************************/ +void silk_NSQ( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +); + +/* Noise shaping using delayed decision */ +void silk_NSQ_del_dec( + const silk_encoder_state *psEncC, /* I/O Encoder State */ + silk_nsq_state *NSQ, /* I/O NSQ state */ + SideInfoIndices *psIndices, /* I/O Quantization Indices */ + const opus_int32 x_Q3[], /* I Prefiltered input signal */ + opus_int8 pulses[], /* O Quantized pulse signal */ + const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */ + const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */ + const opus_int16 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */ + const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */ + const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */ + const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */ + const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */ + const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */ + const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */ + const opus_int LTP_scale_Q14 /* I LTP state scaling */ +); + +/************/ +/* Silk VAD */ +/************/ +/* Initialize the Silk VAD */ +opus_int silk_VAD_Init( /* O Return value, 0 if success */ + silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */ +); + +/* Get speech activity level in Q8 */ +opus_int silk_VAD_GetSA_Q8( /* O Return value, 0 if success */ + silk_encoder_state *psEncC, /* I/O Encoder state */ + const opus_int16 pIn[] /* I PCM input */ +); + +/* Low-pass filter with variable cutoff frequency based on */ +/* piece-wise linear interpolation between elliptic filters */ +/* Start by setting transition_frame_no = 1; */ +void silk_LP_variable_cutoff( + silk_LP_state *psLP, /* I/O LP filter state */ + opus_int16 *frame, /* I/O Low-pass filtered output signal */ + const opus_int frame_length /* I Frame length */ +); + +/******************/ +/* NLSF Quantizer */ +/******************/ +/* Limit, stabilize, convert and quantize NLSFs */ +void silk_process_NLSFs( + silk_encoder_state *psEncC, /* I/O Encoder state */ + opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ], /* O Prediction coefficients */ + opus_int16 pNLSF_Q15[ MAX_LPC_ORDER ], /* I/O Normalized LSFs (quant out) (0 - (2^15-1)) */ + const opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ] /* I Previous Normalized LSFs (0 - (2^15-1)) */ +); + +opus_int32 silk_NLSF_encode( /* O Returns RD value in Q25 */ + opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ + opus_int16 *pNLSF_Q15, /* I/O Quantized NLSF vector [ LPC_ORDER ] */ + const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ + const opus_int16 *pW_QW, /* I NLSF weight vector [ LPC_ORDER ] */ + const opus_int NLSF_mu_Q20, /* I Rate weight for the RD optimization */ + const opus_int nSurvivors, /* I Max survivors after first stage */ + const opus_int signalType /* I Signal type: 0/1/2 */ +); + +/* Compute quantization errors for an LPC_order element input vector for a VQ codebook */ +void silk_NLSF_VQ( + opus_int32 err_Q26[], /* O Quantization errors [K] */ + const opus_int16 in_Q15[], /* I Input vectors to be quantized [LPC_order] */ + const opus_uint8 pCB_Q8[], /* I Codebook vectors [K*LPC_order] */ + const opus_int K, /* I Number of codebook vectors */ + const opus_int LPC_order /* I Number of LPCs */ +); + +/* Delayed-decision quantizer for NLSF residuals */ +opus_int32 silk_NLSF_del_dec_quant( /* O Returns RD value in Q25 */ + opus_int8 indices[], /* O Quantization indices [ order ] */ + const opus_int16 x_Q10[], /* I Input [ order ] */ + const opus_int16 w_Q5[], /* I Weights [ order ] */ + const opus_uint8 pred_coef_Q8[], /* I Backward predictor coefs [ order ] */ + const opus_int16 ec_ix[], /* I Indices to entropy coding tables [ order ] */ + const opus_uint8 ec_rates_Q5[], /* I Rates [] */ + const opus_int quant_step_size_Q16, /* I Quantization step size */ + const opus_int16 inv_quant_step_size_Q6, /* I Inverse quantization step size */ + const opus_int32 mu_Q20, /* I R/D tradeoff */ + const opus_int16 order /* I Number of input values */ +); + +/* Unpack predictor values and indices for entropy coding tables */ +void silk_NLSF_unpack( + opus_int16 ec_ix[], /* O Indices to entropy tables [ LPC_ORDER ] */ + opus_uint8 pred_Q8[], /* O LSF predictor [ LPC_ORDER ] */ + const silk_NLSF_CB_struct *psNLSF_CB, /* I Codebook object */ + const opus_int CB1_index /* I Index of vector in first LSF codebook */ +); + +/***********************/ +/* NLSF vector decoder */ +/***********************/ +void silk_NLSF_decode( + opus_int16 *pNLSF_Q15, /* O Quantized NLSF vector [ LPC_ORDER ] */ + opus_int8 *NLSFIndices, /* I Codebook path vector [ LPC_ORDER + 1 ] */ + const silk_NLSF_CB_struct *psNLSF_CB /* I Codebook object */ +); + +/****************************************************/ +/* Decoder Functions */ +/****************************************************/ +opus_int silk_init_decoder( + silk_decoder_state *psDec /* I/O Decoder state pointer */ +); + +/* Set decoder sampling rate */ +opus_int silk_decoder_set_fs( + silk_decoder_state *psDec, /* I/O Decoder state pointer */ + opus_int fs_kHz, /* I Sampling frequency (kHz) */ + opus_int32 fs_API_Hz /* I API Sampling frequency (Hz) */ +); + +/****************/ +/* Decode frame */ +/****************/ +opus_int silk_decode_frame( + silk_decoder_state *psDec, /* I/O Pointer to Silk decoder state */ + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int16 pOut[], /* O Pointer to output speech frame */ + opus_int32 *pN, /* O Pointer to size of output frame */ + opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ + opus_int condCoding /* I The type of conditional coding to use */ +); + +/* Decode indices from bitstream */ +void silk_decode_indices( + silk_decoder_state *psDec, /* I/O State */ + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int FrameIndex, /* I Frame number */ + opus_int decode_LBRR, /* I Flag indicating LBRR data is being decoded */ + opus_int condCoding /* I The type of conditional coding to use */ +); + +/* Decode parameters from payload */ +void silk_decode_parameters( + silk_decoder_state *psDec, /* I/O State */ + silk_decoder_control *psDecCtrl, /* I/O Decoder control */ + opus_int condCoding /* I The type of conditional coding to use */ +); + +/* Core decoder. Performs inverse NSQ operation LTP + LPC */ +void silk_decode_core( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I Decoder control */ + opus_int16 xq[], /* O Decoded speech */ + const opus_int pulses[ MAX_FRAME_LENGTH ] /* I Pulse signal */ +); + +/* Decode quantization indices of excitation (Shell coding) */ +void silk_decode_pulses( + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int pulses[], /* O Excitation signal */ + const opus_int signalType, /* I Sigtype */ + const opus_int quantOffsetType, /* I quantOffsetType */ + const opus_int frame_length /* I Frame length */ +); + +/******************/ +/* CNG */ +/******************/ + +/* Reset CNG */ +void silk_CNG_Reset( + silk_decoder_state *psDec /* I/O Decoder state */ +); + +/* Updates CNG estimate, and applies the CNG when packet was lost */ +void silk_CNG( + silk_decoder_state *psDec, /* I/O Decoder state */ + silk_decoder_control *psDecCtrl, /* I/O Decoder control */ + opus_int16 frame[], /* I/O Signal */ + opus_int length /* I Length of residual */ +); + +/* Encoding of various parameters */ +void silk_encode_indices( + silk_encoder_state *psEncC, /* I/O Encoder state */ + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + opus_int FrameIndex, /* I Frame number */ + opus_int encode_LBRR, /* I Flag indicating LBRR data is being encoded */ + opus_int condCoding /* I The type of conditional coding to use */ +); + +#endif diff --git a/drivers/opus/silk/sort.c b/drivers/opus/silk/sort.c new file mode 100644 index 0000000000..5e9ba08616 --- /dev/null +++ b/drivers/opus/silk/sort.c @@ -0,0 +1,154 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +/* Insertion sort (fast for already almost sorted arrays): */ +/* Best case: O(n) for an already sorted array */ +/* Worst case: O(n^2) for an inversely sorted array */ +/* */ +/* Shell short: http://en.wikipedia.org/wiki/Shell_sort */ + +#include "SigProc_FIX.h" + +void silk_insertion_sort_increasing( + opus_int32 *a, /* I/O Unsorted / Sorted vector */ + opus_int *idx, /* O Index vector for the sorted elements */ + const opus_int L, /* I Vector length */ + const opus_int K /* I Number of correctly sorted positions */ +) +{ + opus_int32 value; + opus_int i, j; + + /* Safety checks */ + silk_assert( K > 0 ); + silk_assert( L > 0 ); + silk_assert( L >= K ); + + /* Write start indices in index vector */ + for( i = 0; i < K; i++ ) { + idx[ i ] = i; + } + + /* Sort vector elements by value, increasing order */ + for( i = 1; i < K; i++ ) { + value = a[ i ]; + for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) { + a[ j + 1 ] = a[ j ]; /* Shift value */ + idx[ j + 1 ] = idx[ j ]; /* Shift index */ + } + a[ j + 1 ] = value; /* Write value */ + idx[ j + 1 ] = i; /* Write index */ + } + + /* If less than L values are asked for, check the remaining values, */ + /* but only spend CPU to ensure that the K first values are correct */ + for( i = K; i < L; i++ ) { + value = a[ i ]; + if( value < a[ K - 1 ] ) { + for( j = K - 2; ( j >= 0 ) && ( value < a[ j ] ); j-- ) { + a[ j + 1 ] = a[ j ]; /* Shift value */ + idx[ j + 1 ] = idx[ j ]; /* Shift index */ + } + a[ j + 1 ] = value; /* Write value */ + idx[ j + 1 ] = i; /* Write index */ + } + } +} + +#ifdef OPUS_FIXED_POINT +/* This function is only used by the fixed-point build */ +void silk_insertion_sort_decreasing_int16( + opus_int16 *a, /* I/O Unsorted / Sorted vector */ + opus_int *idx, /* O Index vector for the sorted elements */ + const opus_int L, /* I Vector length */ + const opus_int K /* I Number of correctly sorted positions */ +) +{ + opus_int i, j; + opus_int value; + + /* Safety checks */ + silk_assert( K > 0 ); + silk_assert( L > 0 ); + silk_assert( L >= K ); + + /* Write start indices in index vector */ + for( i = 0; i < K; i++ ) { + idx[ i ] = i; + } + + /* Sort vector elements by value, decreasing order */ + for( i = 1; i < K; i++ ) { + value = a[ i ]; + for( j = i - 1; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { + a[ j + 1 ] = a[ j ]; /* Shift value */ + idx[ j + 1 ] = idx[ j ]; /* Shift index */ + } + a[ j + 1 ] = value; /* Write value */ + idx[ j + 1 ] = i; /* Write index */ + } + + /* If less than L values are asked for, check the remaining values, */ + /* but only spend CPU to ensure that the K first values are correct */ + for( i = K; i < L; i++ ) { + value = a[ i ]; + if( value > a[ K - 1 ] ) { + for( j = K - 2; ( j >= 0 ) && ( value > a[ j ] ); j-- ) { + a[ j + 1 ] = a[ j ]; /* Shift value */ + idx[ j + 1 ] = idx[ j ]; /* Shift index */ + } + a[ j + 1 ] = value; /* Write value */ + idx[ j + 1 ] = i; /* Write index */ + } + } +} +#endif + +void silk_insertion_sort_increasing_all_values_int16( + opus_int16 *a, /* I/O Unsorted / Sorted vector */ + const opus_int L /* I Vector length */ +) +{ + opus_int value; + opus_int i, j; + + /* Safety checks */ + silk_assert( L > 0 ); + + /* Sort vector elements by value, increasing order */ + for( i = 1; i < L; i++ ) { + value = a[ i ]; + for( j = i - 1; ( j >= 0 ) && ( value < a[ j ] ); j-- ) { + a[ j + 1 ] = a[ j ]; /* Shift value */ + } + a[ j + 1 ] = value; /* Write value */ + } +} diff --git a/drivers/opus/silk/stereo_LR_to_MS.c b/drivers/opus/silk/stereo_LR_to_MS.c new file mode 100644 index 0000000000..678f46984b --- /dev/null +++ b/drivers/opus/silk/stereo_LR_to_MS.c @@ -0,0 +1,229 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" +#include "stack_alloc.h" + +/* Convert Left/Right stereo signal to adaptive Mid/Side representation */ +void silk_stereo_LR_to_MS( + stereo_enc_state *state, /* I/O State */ + opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ + opus_int16 x2[], /* I/O Right input signal, becomes side signal */ + opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */ + opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */ + opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */ + opus_int32 total_rate_bps, /* I Total bitrate */ + opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */ + opus_int toMono, /* I Last frame before a stereo->mono transition */ + opus_int fs_kHz, /* I Sample rate (kHz) */ + opus_int frame_length /* I Number of samples */ +) +{ + opus_int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13; + opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13; + opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24; + VARDECL( opus_int16, side ); + VARDECL( opus_int16, LP_mid ); + VARDECL( opus_int16, HP_mid ); + VARDECL( opus_int16, LP_side ); + VARDECL( opus_int16, HP_side ); + opus_int16 *mid = &x1[ -2 ]; + SAVE_STACK; + + ALLOC( side, frame_length + 2, opus_int16 ); + /* Convert to basic mid/side signals */ + for( n = 0; n < frame_length + 2; n++ ) { + sum = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ]; + diff = x1[ n - 2 ] - (opus_int32)x2[ n - 2 ]; + mid[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 ); + side[ n ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( diff, 1 ) ); + } + + /* Buffering */ + silk_memcpy( mid, state->sMid, 2 * sizeof( opus_int16 ) ); + silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) ); + silk_memcpy( state->sMid, &mid[ frame_length ], 2 * sizeof( opus_int16 ) ); + silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) ); + + /* LP and HP filter mid signal */ + ALLOC( LP_mid, frame_length, opus_int16 ); + ALLOC( HP_mid, frame_length, opus_int16 ); + for( n = 0; n < frame_length; n++ ) { + sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 ); + LP_mid[ n ] = sum; + HP_mid[ n ] = mid[ n + 1 ] - sum; + } + + /* LP and HP filter side signal */ + ALLOC( LP_side, frame_length, opus_int16 ); + ALLOC( HP_side, frame_length, opus_int16 ); + for( n = 0; n < frame_length; n++ ) { + sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 ); + LP_side[ n ] = sum; + HP_side[ n ] = side[ n + 1 ] - sum; + } + + /* Find energies and predictors */ + is10msFrame = frame_length == 10 * fs_kHz; + smooth_coef_Q16 = is10msFrame ? + SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) : + SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF, 16 ); + smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 ); + + pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 ); + pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 ); + /* Ratio of the norms of residual and mid signals */ + frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 ); + frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) ); + + /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */ + total_rate_bps -= is10msFrame ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */ + if( total_rate_bps < 1 ) { + total_rate_bps = 1; + } + min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 ); + silk_assert( min_mid_rate_bps < 32767 ); + /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */ + frac_3_Q16 = silk_MUL( 3, frac_Q16 ); + mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 ); + /* If Mid bitrate below minimum, reduce stereo width */ + if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) { + mid_side_rates_bps[ 0 ] = min_mid_rate_bps; + mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ]; + /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */ + width_Q14 = silk_DIV32_varQ( silk_LSHIFT( mid_side_rates_bps[ 1 ], 1 ) - min_mid_rate_bps, + silk_SMULWB( SILK_FIX_CONST( 1, 16 ) + frac_3_Q16, min_mid_rate_bps ), 14+2 ); + width_Q14 = silk_LIMIT( width_Q14, 0, SILK_FIX_CONST( 1, 14 ) ); + } else { + mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ]; + width_Q14 = SILK_FIX_CONST( 1, 14 ); + } + + /* Smoother */ + state->smth_width_Q14 = (opus_int16)silk_SMLAWB( state->smth_width_Q14, width_Q14 - state->smth_width_Q14, smooth_coef_Q16 ); + + /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */ + *mid_only_flag = 0; + if( toMono ) { + /* Last frame before stereo->mono transition; collapse stereo width */ + width_Q14 = 0; + pred_Q13[ 0 ] = 0; + pred_Q13[ 1 ] = 0; + silk_stereo_quant_pred( pred_Q13, ix ); + } else if( state->width_prev_Q14 == 0 && + ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) ) + { + /* Code as panned-mono; previous frame already had zero width */ + /* Scale down and quantize predictors */ + pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); + pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); + silk_stereo_quant_pred( pred_Q13, ix ); + /* Collapse stereo width */ + width_Q14 = 0; + pred_Q13[ 0 ] = 0; + pred_Q13[ 1 ] = 0; + mid_side_rates_bps[ 0 ] = total_rate_bps; + mid_side_rates_bps[ 1 ] = 0; + *mid_only_flag = 1; + } else if( state->width_prev_Q14 != 0 && + ( 8 * total_rate_bps < 11 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.02, 14 ) ) ) + { + /* Transition to zero-width stereo */ + /* Scale down and quantize predictors */ + pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); + pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); + silk_stereo_quant_pred( pred_Q13, ix ); + /* Collapse stereo width */ + width_Q14 = 0; + pred_Q13[ 0 ] = 0; + pred_Q13[ 1 ] = 0; + } else if( state->smth_width_Q14 > SILK_FIX_CONST( 0.95, 14 ) ) { + /* Full-width stereo coding */ + silk_stereo_quant_pred( pred_Q13, ix ); + width_Q14 = SILK_FIX_CONST( 1, 14 ); + } else { + /* Reduced-width stereo coding; scale down and quantize predictors */ + pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 ); + pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 ); + silk_stereo_quant_pred( pred_Q13, ix ); + width_Q14 = state->smth_width_Q14; + } + + /* Make sure to keep on encoding until the tapered output has been transmitted */ + if( *mid_only_flag == 1 ) { + state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz; + if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) { + *mid_only_flag = 0; + } else { + /* Limit to avoid wrapping around */ + state->silent_side_len = 10000; + } + } else { + state->silent_side_len = 0; + } + + if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) { + mid_side_rates_bps[ 1 ] = 1; + mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]); + } + + /* Interpolate predictors and subtract prediction from side channel */ + pred0_Q13 = -state->pred_prev_Q13[ 0 ]; + pred1_Q13 = -state->pred_prev_Q13[ 1 ]; + w_Q24 = silk_LSHIFT( state->width_prev_Q14, 10 ); + denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz ); + delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 ); + delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 ); + deltaw_Q24 = silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 ); + for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) { + pred0_Q13 += delta0_Q13; + pred1_Q13 += delta1_Q13; + w_Q24 += deltaw_Q24; + sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ + sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ + sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ + x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); + } + + pred0_Q13 = -pred_Q13[ 0 ]; + pred1_Q13 = -pred_Q13[ 1 ]; + w_Q24 = silk_LSHIFT( width_Q14, 10 ); + for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { + sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ + sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ + sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ + x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); + } + state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ]; + state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ]; + state->width_prev_Q14 = (opus_int16)width_Q14; + RESTORE_STACK; +} diff --git a/drivers/opus/silk/stereo_MS_to_LR.c b/drivers/opus/silk/stereo_MS_to_LR.c new file mode 100644 index 0000000000..34f43cf795 --- /dev/null +++ b/drivers/opus/silk/stereo_MS_to_LR.c @@ -0,0 +1,85 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Convert adaptive Mid/Side representation to Left/Right stereo signal */ +void silk_stereo_MS_to_LR( + stereo_dec_state *state, /* I/O State */ + opus_int16 x1[], /* I/O Left input signal, becomes mid signal */ + opus_int16 x2[], /* I/O Right input signal, becomes side signal */ + const opus_int32 pred_Q13[], /* I Predictors */ + opus_int fs_kHz, /* I Samples rate (kHz) */ + opus_int frame_length /* I Number of samples */ +) +{ + opus_int n, denom_Q16, delta0_Q13, delta1_Q13; + opus_int32 sum, diff, pred0_Q13, pred1_Q13; + + /* Buffering */ + silk_memcpy( x1, state->sMid, 2 * sizeof( opus_int16 ) ); + silk_memcpy( x2, state->sSide, 2 * sizeof( opus_int16 ) ); + silk_memcpy( state->sMid, &x1[ frame_length ], 2 * sizeof( opus_int16 ) ); + silk_memcpy( state->sSide, &x2[ frame_length ], 2 * sizeof( opus_int16 ) ); + + /* Interpolate predictors and add prediction to side channel */ + pred0_Q13 = state->pred_prev_Q13[ 0 ]; + pred1_Q13 = state->pred_prev_Q13[ 1 ]; + denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz ); + delta0_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 ); + delta1_Q13 = silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 ); + for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) { + pred0_Q13 += delta0_Q13; + pred1_Q13 += delta1_Q13; + sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ + sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */ + sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ + x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); + } + pred0_Q13 = pred_Q13[ 0 ]; + pred1_Q13 = pred_Q13[ 1 ]; + for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { + sum = silk_LSHIFT( silk_ADD_LSHIFT( x1[ n ] + x1[ n + 2 ], x1[ n + 1 ], 1 ), 9 ); /* Q11 */ + sum = silk_SMLAWB( silk_LSHIFT( (opus_int32)x2[ n + 1 ], 8 ), sum, pred0_Q13 ); /* Q8 */ + sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)x1[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ + x2[ n + 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); + } + state->pred_prev_Q13[ 0 ] = pred_Q13[ 0 ]; + state->pred_prev_Q13[ 1 ] = pred_Q13[ 1 ]; + + /* Convert to left/right signals */ + for( n = 0; n < frame_length; n++ ) { + sum = x1[ n + 1 ] + (opus_int32)x2[ n + 1 ]; + diff = x1[ n + 1 ] - (opus_int32)x2[ n + 1 ]; + x1[ n + 1 ] = (opus_int16)silk_SAT16( sum ); + x2[ n + 1 ] = (opus_int16)silk_SAT16( diff ); + } +} diff --git a/drivers/opus/silk/stereo_decode_pred.c b/drivers/opus/silk/stereo_decode_pred.c new file mode 100644 index 0000000000..56d94e56fe --- /dev/null +++ b/drivers/opus/silk/stereo_decode_pred.c @@ -0,0 +1,73 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Decode mid/side predictors */ +void silk_stereo_decode_pred( + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int32 pred_Q13[] /* O Predictors */ +) +{ + opus_int n, ix[ 2 ][ 3 ]; + opus_int32 low_Q13, step_Q13; + + /* Entropy decoding */ + n = ec_dec_icdf( psRangeDec, silk_stereo_pred_joint_iCDF, 8 ); + ix[ 0 ][ 2 ] = silk_DIV32_16( n, 5 ); + ix[ 1 ][ 2 ] = n - 5 * ix[ 0 ][ 2 ]; + for( n = 0; n < 2; n++ ) { + ix[ n ][ 0 ] = ec_dec_icdf( psRangeDec, silk_uniform3_iCDF, 8 ); + ix[ n ][ 1 ] = ec_dec_icdf( psRangeDec, silk_uniform5_iCDF, 8 ); + } + + /* Dequantize */ + for( n = 0; n < 2; n++ ) { + ix[ n ][ 0 ] += 3 * ix[ n ][ 2 ]; + low_Q13 = silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] ]; + step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ ix[ n ][ 0 ] + 1 ] - low_Q13, + SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) ); + pred_Q13[ n ] = silk_SMLABB( low_Q13, step_Q13, 2 * ix[ n ][ 1 ] + 1 ); + } + + /* Subtract second from first predictor (helps when actually applying these) */ + pred_Q13[ 0 ] -= pred_Q13[ 1 ]; +} + +/* Decode mid-only flag */ +void silk_stereo_decode_mid_only( + ec_dec *psRangeDec, /* I/O Compressor data structure */ + opus_int *decode_only_mid /* O Flag that only mid channel has been coded */ +) +{ + /* Decode flag that only mid channel is coded */ + *decode_only_mid = ec_dec_icdf( psRangeDec, silk_stereo_only_code_mid_iCDF, 8 ); +} diff --git a/drivers/opus/silk/stereo_encode_pred.c b/drivers/opus/silk/stereo_encode_pred.c new file mode 100644 index 0000000000..bfe75b08e4 --- /dev/null +++ b/drivers/opus/silk/stereo_encode_pred.c @@ -0,0 +1,62 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Entropy code the mid/side quantization indices */ +void silk_stereo_encode_pred( + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + opus_int8 ix[ 2 ][ 3 ] /* I Quantization indices */ +) +{ + opus_int n; + + /* Entropy coding */ + n = 5 * ix[ 0 ][ 2 ] + ix[ 1 ][ 2 ]; + silk_assert( n < 25 ); + ec_enc_icdf( psRangeEnc, n, silk_stereo_pred_joint_iCDF, 8 ); + for( n = 0; n < 2; n++ ) { + silk_assert( ix[ n ][ 0 ] < 3 ); + silk_assert( ix[ n ][ 1 ] < STEREO_QUANT_SUB_STEPS ); + ec_enc_icdf( psRangeEnc, ix[ n ][ 0 ], silk_uniform3_iCDF, 8 ); + ec_enc_icdf( psRangeEnc, ix[ n ][ 1 ], silk_uniform5_iCDF, 8 ); + } +} + +/* Entropy code the mid-only flag */ +void silk_stereo_encode_mid_only( + ec_enc *psRangeEnc, /* I/O Compressor data structure */ + opus_int8 mid_only_flag +) +{ + /* Encode flag that only mid channel is coded */ + ec_enc_icdf( psRangeEnc, mid_only_flag, silk_stereo_only_code_mid_iCDF, 8 ); +} diff --git a/drivers/opus/silk/stereo_find_predictor.c b/drivers/opus/silk/stereo_find_predictor.c new file mode 100644 index 0000000000..266293dff3 --- /dev/null +++ b/drivers/opus/silk/stereo_find_predictor.c @@ -0,0 +1,79 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Find least-squares prediction gain for one signal based on another and quantize it */ +opus_int32 silk_stereo_find_predictor( /* O Returns predictor in Q13 */ + opus_int32 *ratio_Q14, /* O Ratio of residual and mid energies */ + const opus_int16 x[], /* I Basis signal */ + const opus_int16 y[], /* I Target signal */ + opus_int32 mid_res_amp_Q0[], /* I/O Smoothed mid, residual norms */ + opus_int length, /* I Number of samples */ + opus_int smooth_coef_Q16 /* I Smoothing coefficient */ +) +{ + opus_int scale, scale1, scale2; + opus_int32 nrgx, nrgy, corr, pred_Q13, pred2_Q10; + + /* Find predictor */ + silk_sum_sqr_shift( &nrgx, &scale1, x, length ); + silk_sum_sqr_shift( &nrgy, &scale2, y, length ); + scale = silk_max_int( scale1, scale2 ); + scale = scale + ( scale & 1 ); /* make even */ + nrgy = silk_RSHIFT32( nrgy, scale - scale2 ); + nrgx = silk_RSHIFT32( nrgx, scale - scale1 ); + nrgx = silk_max_int( nrgx, 1 ); + corr = silk_inner_prod_aligned_scale( x, y, scale, length ); + pred_Q13 = silk_DIV32_varQ( corr, nrgx, 13 ); + pred_Q13 = silk_LIMIT( pred_Q13, -(1 << 14), 1 << 14 ); + pred2_Q10 = silk_SMULWB( pred_Q13, pred_Q13 ); + + /* Faster update for signals with large prediction parameters */ + smooth_coef_Q16 = (opus_int)silk_max_int( smooth_coef_Q16, silk_abs( pred2_Q10 ) ); + + /* Smoothed mid and residual norms */ + silk_assert( smooth_coef_Q16 < 32768 ); + scale = silk_RSHIFT( scale, 1 ); + mid_res_amp_Q0[ 0 ] = silk_SMLAWB( mid_res_amp_Q0[ 0 ], silk_LSHIFT( silk_SQRT_APPROX( nrgx ), scale ) - mid_res_amp_Q0[ 0 ], + smooth_coef_Q16 ); + /* Residual energy = nrgy - 2 * pred * corr + pred^2 * nrgx */ + nrgy = silk_SUB_LSHIFT32( nrgy, silk_SMULWB( corr, pred_Q13 ), 3 + 1 ); + nrgy = silk_ADD_LSHIFT32( nrgy, silk_SMULWB( nrgx, pred2_Q10 ), 6 ); + mid_res_amp_Q0[ 1 ] = silk_SMLAWB( mid_res_amp_Q0[ 1 ], silk_LSHIFT( silk_SQRT_APPROX( nrgy ), scale ) - mid_res_amp_Q0[ 1 ], + smooth_coef_Q16 ); + + /* Ratio of smoothed residual and mid norms */ + *ratio_Q14 = silk_DIV32_varQ( mid_res_amp_Q0[ 1 ], silk_max( mid_res_amp_Q0[ 0 ], 1 ), 14 ); + *ratio_Q14 = silk_LIMIT( *ratio_Q14, 0, 32767 ); + + return pred_Q13; +} diff --git a/drivers/opus/silk/stereo_quant_pred.c b/drivers/opus/silk/stereo_quant_pred.c new file mode 100644 index 0000000000..834020d715 --- /dev/null +++ b/drivers/opus/silk/stereo_quant_pred.c @@ -0,0 +1,73 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "silk_main.h" + +/* Quantize mid/side predictors */ +void silk_stereo_quant_pred( + opus_int32 pred_Q13[], /* I/O Predictors (out: quantized) */ + opus_int8 ix[ 2 ][ 3 ] /* O Quantization indices */ +) +{ + opus_int i, j, n; + opus_int32 low_Q13, step_Q13, lvl_Q13, err_min_Q13, err_Q13, quant_pred_Q13 = 0; + + /* Quantize */ + for( n = 0; n < 2; n++ ) { + /* Brute-force search over quantization levels */ + err_min_Q13 = silk_int32_MAX; + for( i = 0; i < STEREO_QUANT_TAB_SIZE - 1; i++ ) { + low_Q13 = silk_stereo_pred_quant_Q13[ i ]; + step_Q13 = silk_SMULWB( silk_stereo_pred_quant_Q13[ i + 1 ] - low_Q13, + SILK_FIX_CONST( 0.5 / STEREO_QUANT_SUB_STEPS, 16 ) ); + for( j = 0; j < STEREO_QUANT_SUB_STEPS; j++ ) { + lvl_Q13 = silk_SMLABB( low_Q13, step_Q13, 2 * j + 1 ); + err_Q13 = silk_abs( pred_Q13[ n ] - lvl_Q13 ); + if( err_Q13 < err_min_Q13 ) { + err_min_Q13 = err_Q13; + quant_pred_Q13 = lvl_Q13; + ix[ n ][ 0 ] = i; + ix[ n ][ 1 ] = j; + } else { + /* Error increasing, so we're past the optimum */ + goto done; + } + } + } + done: + ix[ n ][ 2 ] = silk_DIV32_16( ix[ n ][ 0 ], 3 ); + ix[ n ][ 0 ] -= ix[ n ][ 2 ] * 3; + pred_Q13[ n ] = quant_pred_Q13; + } + + /* Subtract second from first predictor (helps when actually applying these) */ + pred_Q13[ 0 ] -= pred_Q13[ 1 ]; +} diff --git a/drivers/opus/silk/structs.h b/drivers/opus/silk/structs.h new file mode 100644 index 0000000000..1826b36a80 --- /dev/null +++ b/drivers/opus/silk/structs.h @@ -0,0 +1,327 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_STRUCTS_H +#define SILK_STRUCTS_H + +#include "typedef.h" +#include "SigProc_FIX.h" +#include "define.h" +#include "entenc.h" +#include "entdec.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/************************************/ +/* Noise shaping quantization state */ +/************************************/ +typedef struct { + opus_int16 xq[ 2 * MAX_FRAME_LENGTH ]; /* Buffer for quantized output signal */ + opus_int32 sLTP_shp_Q14[ 2 * MAX_FRAME_LENGTH ]; + opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; + opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ]; + opus_int32 sLF_AR_shp_Q14; + opus_int lagPrev; + opus_int sLTP_buf_idx; + opus_int sLTP_shp_buf_idx; + opus_int32 rand_seed; + opus_int32 prev_gain_Q16; + opus_int rewhite_flag; +} silk_nsq_state; + +/********************************/ +/* VAD state */ +/********************************/ +typedef struct { + opus_int32 AnaState[ 2 ]; /* Analysis filterbank state: 0-8 kHz */ + opus_int32 AnaState1[ 2 ]; /* Analysis filterbank state: 0-4 kHz */ + opus_int32 AnaState2[ 2 ]; /* Analysis filterbank state: 0-2 kHz */ + opus_int32 XnrgSubfr[ VAD_N_BANDS ]; /* Subframe energies */ + opus_int32 NrgRatioSmth_Q8[ VAD_N_BANDS ]; /* Smoothed energy level in each band */ + opus_int16 HPstate; /* State of differentiator in the lowest band */ + opus_int32 NL[ VAD_N_BANDS ]; /* Noise energy level in each band */ + opus_int32 inv_NL[ VAD_N_BANDS ]; /* Inverse noise energy level in each band */ + opus_int32 NoiseLevelBias[ VAD_N_BANDS ]; /* Noise level estimator bias/offset */ + opus_int32 counter; /* Frame counter used in the initial phase */ +} silk_VAD_state; + +/* Variable cut-off low-pass filter state */ +typedef struct { + opus_int32 In_LP_State[ 2 ]; /* Low pass filter state */ + opus_int32 transition_frame_no; /* Counter which is mapped to a cut-off frequency */ + opus_int mode; /* Operating mode, <0: switch down, >0: switch up; 0: do nothing */ +} silk_LP_state; + +/* Structure containing NLSF codebook */ +typedef struct { + const opus_int16 nVectors; + const opus_int16 order; + const opus_int16 quantStepSize_Q16; + const opus_int16 invQuantStepSize_Q6; + const opus_uint8 *CB1_NLSF_Q8; + const opus_uint8 *CB1_iCDF; + const opus_uint8 *pred_Q8; + const opus_uint8 *ec_sel; + const opus_uint8 *ec_iCDF; + const opus_uint8 *ec_Rates_Q5; + const opus_int16 *deltaMin_Q15; +} silk_NLSF_CB_struct; + +typedef struct { + opus_int16 pred_prev_Q13[ 2 ]; + opus_int16 sMid[ 2 ]; + opus_int16 sSide[ 2 ]; + opus_int32 mid_side_amp_Q0[ 4 ]; + opus_int16 smth_width_Q14; + opus_int16 width_prev_Q14; + opus_int16 silent_side_len; + opus_int8 predIx[ MAX_FRAMES_PER_PACKET ][ 2 ][ 3 ]; + opus_int8 mid_only_flags[ MAX_FRAMES_PER_PACKET ]; +} stereo_enc_state; + +typedef struct { + opus_int16 pred_prev_Q13[ 2 ]; + opus_int16 sMid[ 2 ]; + opus_int16 sSide[ 2 ]; +} stereo_dec_state; + +typedef struct { + opus_int8 GainsIndices[ MAX_NB_SUBFR ]; + opus_int8 LTPIndex[ MAX_NB_SUBFR ]; + opus_int8 NLSFIndices[ MAX_LPC_ORDER + 1 ]; + opus_int16 lagIndex; + opus_int8 contourIndex; + opus_int8 signalType; + opus_int8 quantOffsetType; + opus_int8 NLSFInterpCoef_Q2; + opus_int8 PERIndex; + opus_int8 LTP_scaleIndex; + opus_int8 Seed; +} SideInfoIndices; + +/********************************/ +/* Encoder state */ +/********************************/ +typedef struct { + opus_int32 In_HP_State[ 2 ]; /* High pass filter state */ + opus_int32 variable_HP_smth1_Q15; /* State of first smoother */ + opus_int32 variable_HP_smth2_Q15; /* State of second smoother */ + silk_LP_state sLP; /* Low pass filter state */ + silk_VAD_state sVAD; /* Voice activity detector state */ + silk_nsq_state sNSQ; /* Noise Shape Quantizer State */ + opus_int16 prev_NLSFq_Q15[ MAX_LPC_ORDER ]; /* Previously quantized NLSF vector */ + opus_int speech_activity_Q8; /* Speech activity */ + opus_int allow_bandwidth_switch; /* Flag indicating that switching of internal bandwidth is allowed */ + opus_int8 LBRRprevLastGainIndex; + opus_int8 prevSignalType; + opus_int prevLag; + opus_int pitch_LPC_win_length; + opus_int max_pitch_lag; /* Highest possible pitch lag (samples) */ + opus_int32 API_fs_Hz; /* API sampling frequency (Hz) */ + opus_int32 prev_API_fs_Hz; /* Previous API sampling frequency (Hz) */ + opus_int maxInternal_fs_Hz; /* Maximum internal sampling frequency (Hz) */ + opus_int minInternal_fs_Hz; /* Minimum internal sampling frequency (Hz) */ + opus_int desiredInternal_fs_Hz; /* Soft request for internal sampling frequency (Hz) */ + opus_int fs_kHz; /* Internal sampling frequency (kHz) */ + opus_int nb_subfr; /* Number of 5 ms subframes in a frame */ + opus_int frame_length; /* Frame length (samples) */ + opus_int subfr_length; /* Subframe length (samples) */ + opus_int ltp_mem_length; /* Length of LTP memory */ + opus_int la_pitch; /* Look-ahead for pitch analysis (samples) */ + opus_int la_shape; /* Look-ahead for noise shape analysis (samples) */ + opus_int shapeWinLength; /* Window length for noise shape analysis (samples) */ + opus_int32 TargetRate_bps; /* Target bitrate (bps) */ + opus_int PacketSize_ms; /* Number of milliseconds to put in each packet */ + opus_int PacketLoss_perc; /* Packet loss rate measured by farend */ + opus_int32 frameCounter; + opus_int Complexity; /* Complexity setting */ + opus_int nStatesDelayedDecision; /* Number of states in delayed decision quantization */ + opus_int useInterpolatedNLSFs; /* Flag for using NLSF interpolation */ + opus_int shapingLPCOrder; /* Filter order for noise shaping filters */ + opus_int predictLPCOrder; /* Filter order for prediction filters */ + opus_int pitchEstimationComplexity; /* Complexity level for pitch estimator */ + opus_int pitchEstimationLPCOrder; /* Whitening filter order for pitch estimator */ + opus_int32 pitchEstimationThreshold_Q16; /* Threshold for pitch estimator */ + opus_int LTPQuantLowComplexity; /* Flag for low complexity LTP quantization */ + opus_int mu_LTP_Q9; /* Rate-distortion tradeoff in LTP quantization */ + opus_int32 sum_log_gain_Q7; /* Cumulative max prediction gain */ + opus_int NLSF_MSVQ_Survivors; /* Number of survivors in NLSF MSVQ */ + opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation, pitch prediction */ + opus_int controlled_since_last_payload; /* Flag for ensuring codec_control only runs once per packet */ + opus_int warping_Q16; /* Warping parameter for warped noise shaping */ + opus_int useCBR; /* Flag to enable constant bitrate */ + opus_int prefillFlag; /* Flag to indicate that only buffers are prefilled, no coding */ + const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */ + const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */ + const silk_NLSF_CB_struct *psNLSF_CB; /* Pointer to NLSF codebook */ + opus_int input_quality_bands_Q15[ VAD_N_BANDS ]; + opus_int input_tilt_Q15; + opus_int SNR_dB_Q7; /* Quality setting */ + + opus_int8 VAD_flags[ MAX_FRAMES_PER_PACKET ]; + opus_int8 LBRR_flag; + opus_int LBRR_flags[ MAX_FRAMES_PER_PACKET ]; + + SideInfoIndices indices; + opus_int8 pulses[ MAX_FRAME_LENGTH ]; + + int arch; + + /* Input/output buffering */ + opus_int16 inputBuf[ MAX_FRAME_LENGTH + 2 ]; /* Buffer containing input signal */ + opus_int inputBufIx; + opus_int nFramesPerPacket; + opus_int nFramesEncoded; /* Number of frames analyzed in current packet */ + + opus_int nChannelsAPI; + opus_int nChannelsInternal; + opus_int channelNb; + + /* Parameters For LTP scaling Control */ + opus_int frames_since_onset; + + /* Specifically for entropy coding */ + opus_int ec_prevSignalType; + opus_int16 ec_prevLagIndex; + + silk_resampler_state_struct resampler_state; + + /* DTX */ + opus_int useDTX; /* Flag to enable DTX */ + opus_int inDTX; /* Flag to signal DTX period */ + opus_int noSpeechCounter; /* Counts concecutive nonactive frames, used by DTX */ + + /* Inband Low Bitrate Redundancy (LBRR) data */ + opus_int useInBandFEC; /* Saves the API setting for query */ + opus_int LBRR_enabled; /* Depends on useInBandFRC, bitrate and packet loss rate */ + opus_int LBRR_GainIncreases; /* Gains increment for coding LBRR frames */ + SideInfoIndices indices_LBRR[ MAX_FRAMES_PER_PACKET ]; + opus_int8 pulses_LBRR[ MAX_FRAMES_PER_PACKET ][ MAX_FRAME_LENGTH ]; +} silk_encoder_state; + + +/* Struct for Packet Loss Concealment */ +typedef struct { + opus_int32 pitchL_Q8; /* Pitch lag to use for voiced concealment */ + opus_int16 LTPCoef_Q14[ LTP_ORDER ]; /* LTP coeficients to use for voiced concealment */ + opus_int16 prevLPC_Q12[ MAX_LPC_ORDER ]; + opus_int last_frame_lost; /* Was previous frame lost */ + opus_int32 rand_seed; /* Seed for unvoiced signal generation */ + opus_int16 randScale_Q14; /* Scaling of unvoiced random signal */ + opus_int32 conc_energy; + opus_int conc_energy_shift; + opus_int16 prevLTP_scale_Q14; + opus_int32 prevGain_Q16[ 2 ]; + opus_int fs_kHz; + opus_int nb_subfr; + opus_int subfr_length; +} silk_PLC_struct; + +/* Struct for CNG */ +typedef struct { + opus_int32 CNG_exc_buf_Q14[ MAX_FRAME_LENGTH ]; + opus_int16 CNG_smth_NLSF_Q15[ MAX_LPC_ORDER ]; + opus_int32 CNG_synth_state[ MAX_LPC_ORDER ]; + opus_int32 CNG_smth_Gain_Q16; + opus_int32 rand_seed; + opus_int fs_kHz; +} silk_CNG_struct; + +/********************************/ +/* Decoder state */ +/********************************/ +typedef struct { + opus_int32 prev_gain_Q16; + opus_int32 exc_Q14[ MAX_FRAME_LENGTH ]; + opus_int32 sLPC_Q14_buf[ MAX_LPC_ORDER ]; + opus_int16 outBuf[ MAX_FRAME_LENGTH + 2 * MAX_SUB_FRAME_LENGTH ]; /* Buffer for output signal */ + opus_int lagPrev; /* Previous Lag */ + opus_int8 LastGainIndex; /* Previous gain index */ + opus_int fs_kHz; /* Sampling frequency in kHz */ + opus_int32 fs_API_hz; /* API sample frequency (Hz) */ + opus_int nb_subfr; /* Number of 5 ms subframes in a frame */ + opus_int frame_length; /* Frame length (samples) */ + opus_int subfr_length; /* Subframe length (samples) */ + opus_int ltp_mem_length; /* Length of LTP memory */ + opus_int LPC_order; /* LPC order */ + opus_int16 prevNLSF_Q15[ MAX_LPC_ORDER ]; /* Used to interpolate LSFs */ + opus_int first_frame_after_reset; /* Flag for deactivating NLSF interpolation */ + const opus_uint8 *pitch_lag_low_bits_iCDF; /* Pointer to iCDF table for low bits of pitch lag index */ + const opus_uint8 *pitch_contour_iCDF; /* Pointer to iCDF table for pitch contour index */ + + /* For buffering payload in case of more frames per packet */ + opus_int nFramesDecoded; + opus_int nFramesPerPacket; + + /* Specifically for entropy coding */ + opus_int ec_prevSignalType; + opus_int16 ec_prevLagIndex; + + opus_int VAD_flags[ MAX_FRAMES_PER_PACKET ]; + opus_int LBRR_flag; + opus_int LBRR_flags[ MAX_FRAMES_PER_PACKET ]; + + silk_resampler_state_struct resampler_state; + + const silk_NLSF_CB_struct *psNLSF_CB; /* Pointer to NLSF codebook */ + + /* Quantization indices */ + SideInfoIndices indices; + + /* CNG state */ + silk_CNG_struct sCNG; + + /* Stuff used for PLC */ + opus_int lossCnt; + opus_int prevSignalType; + + silk_PLC_struct sPLC; + +} silk_decoder_state; + +/************************/ +/* Decoder control */ +/************************/ +typedef struct { + /* Prediction and coding parameters */ + opus_int pitchL[ MAX_NB_SUBFR ]; + opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; + /* Holds interpolated and final coefficients, 4-byte aligned */ + silk_DWORD_ALIGN opus_int16 PredCoef_Q12[ 2 ][ MAX_LPC_ORDER ]; + opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ]; + opus_int LTP_scale_Q14; +} silk_decoder_control; + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/opus/silk/sum_sqr_shift.c b/drivers/opus/silk/sum_sqr_shift.c new file mode 100644 index 0000000000..8ec27f8a03 --- /dev/null +++ b/drivers/opus/silk/sum_sqr_shift.c @@ -0,0 +1,85 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "SigProc_FIX.h" + +/* Compute number of bits to right shift the sum of squares of a vector */ +/* of int16s to make it fit in an int32 */ +void silk_sum_sqr_shift( + opus_int32 *energy, /* O Energy of x, after shifting to the right */ + opus_int *shift, /* O Number of bits right shift applied to energy */ + const opus_int16 *x, /* I Input vector */ + opus_int len /* I Length of input vector */ +) +{ + opus_int i, shft; + opus_int32 nrg_tmp, nrg; + + nrg = 0; + shft = 0; + len--; + for( i = 0; i < len; i += 2 ) { + nrg = silk_SMLABB_ovflw( nrg, x[ i ], x[ i ] ); + nrg = silk_SMLABB_ovflw( nrg, x[ i + 1 ], x[ i + 1 ] ); + if( nrg < 0 ) { + /* Scale down */ + nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); + shft = 2; + break; + } + } + for( ; i < len; i += 2 ) { + nrg_tmp = silk_SMULBB( x[ i ], x[ i ] ); + nrg_tmp = silk_SMLABB_ovflw( nrg_tmp, x[ i + 1 ], x[ i + 1 ] ); + nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, (opus_uint32)nrg_tmp, shft ); + if( nrg < 0 ) { + /* Scale down */ + nrg = (opus_int32)silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); + shft += 2; + } + } + if( i == len ) { + /* One sample left to process */ + nrg_tmp = silk_SMULBB( x[ i ], x[ i ] ); + nrg = (opus_int32)silk_ADD_RSHIFT_uint( nrg, nrg_tmp, shft ); + } + + /* Make sure to have at least one extra leading zero (two leading zeros in total) */ + if( nrg & 0xC0000000 ) { + nrg = silk_RSHIFT_uint( (opus_uint32)nrg, 2 ); + shft += 2; + } + + /* Output arguments */ + *shift = shft; + *energy = nrg; +} + diff --git a/drivers/opus/silk/table_LSF_cos.c b/drivers/opus/silk/table_LSF_cos.c new file mode 100644 index 0000000000..674b6a03e6 --- /dev/null +++ b/drivers/opus/silk/table_LSF_cos.c @@ -0,0 +1,70 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "tables.h" + +/* Cosine approximation table for LSF conversion */ +/* Q12 values (even) */ +const opus_int16 silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ] = { + 8192, 8190, 8182, 8170, + 8152, 8130, 8104, 8072, + 8034, 7994, 7946, 7896, + 7840, 7778, 7714, 7644, + 7568, 7490, 7406, 7318, + 7226, 7128, 7026, 6922, + 6812, 6698, 6580, 6458, + 6332, 6204, 6070, 5934, + 5792, 5648, 5502, 5352, + 5198, 5040, 4880, 4718, + 4552, 4382, 4212, 4038, + 3862, 3684, 3502, 3320, + 3136, 2948, 2760, 2570, + 2378, 2186, 1990, 1794, + 1598, 1400, 1202, 1002, + 802, 602, 402, 202, + 0, -202, -402, -602, + -802, -1002, -1202, -1400, + -1598, -1794, -1990, -2186, + -2378, -2570, -2760, -2948, + -3136, -3320, -3502, -3684, + -3862, -4038, -4212, -4382, + -4552, -4718, -4880, -5040, + -5198, -5352, -5502, -5648, + -5792, -5934, -6070, -6204, + -6332, -6458, -6580, -6698, + -6812, -6922, -7026, -7128, + -7226, -7318, -7406, -7490, + -7568, -7644, -7714, -7778, + -7840, -7896, -7946, -7994, + -8034, -8072, -8104, -8130, + -8152, -8170, -8182, -8190, + -8192 +}; diff --git a/drivers/opus/silk/tables.h b/drivers/opus/silk/tables.h new file mode 100644 index 0000000000..a91431e854 --- /dev/null +++ b/drivers/opus/silk/tables.h @@ -0,0 +1,122 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_TABLES_H +#define SILK_TABLES_H + +#include "define.h" +#include "structs.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* Entropy coding tables (with size in bytes indicated) */ +extern const opus_uint8 silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ]; /* 24 */ +extern const opus_uint8 silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ]; /* 41 */ + +extern const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ];/* 32 */ +extern const opus_uint8 silk_pitch_delta_iCDF[ 21 ]; /* 21 */ +extern const opus_uint8 silk_pitch_contour_iCDF[ 34 ]; /* 34 */ +extern const opus_uint8 silk_pitch_contour_NB_iCDF[ 11 ]; /* 11 */ +extern const opus_uint8 silk_pitch_contour_10_ms_iCDF[ 12 ]; /* 12 */ +extern const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[ 3 ]; /* 3 */ + +extern const opus_uint8 silk_pulses_per_block_iCDF[ N_RATE_LEVELS ][ MAX_PULSES + 2 ]; /* 180 */ +extern const opus_uint8 silk_pulses_per_block_BITS_Q5[ N_RATE_LEVELS - 1 ][ MAX_PULSES + 2 ]; /* 162 */ + +extern const opus_uint8 silk_rate_levels_iCDF[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */ +extern const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ N_RATE_LEVELS - 1 ]; /* 18 */ + +extern const opus_uint8 silk_max_pulses_table[ 4 ]; /* 4 */ + +extern const opus_uint8 silk_shell_code_table0[ 152 ]; /* 152 */ +extern const opus_uint8 silk_shell_code_table1[ 152 ]; /* 152 */ +extern const opus_uint8 silk_shell_code_table2[ 152 ]; /* 152 */ +extern const opus_uint8 silk_shell_code_table3[ 152 ]; /* 152 */ +extern const opus_uint8 silk_shell_code_table_offsets[ MAX_PULSES + 1 ]; /* 17 */ + +extern const opus_uint8 silk_lsb_iCDF[ 2 ]; /* 2 */ + +extern const opus_uint8 silk_sign_iCDF[ 42 ]; /* 42 */ + +extern const opus_uint8 silk_uniform3_iCDF[ 3 ]; /* 3 */ +extern const opus_uint8 silk_uniform4_iCDF[ 4 ]; /* 4 */ +extern const opus_uint8 silk_uniform5_iCDF[ 5 ]; /* 5 */ +extern const opus_uint8 silk_uniform6_iCDF[ 6 ]; /* 6 */ +extern const opus_uint8 silk_uniform8_iCDF[ 8 ]; /* 8 */ + +extern const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ]; /* 7 */ + +extern const opus_uint8 silk_LTP_per_index_iCDF[ 3 ]; /* 3 */ +extern const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[ NB_LTP_CBKS ]; /* 3 */ +extern const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[ NB_LTP_CBKS ]; /* 3 */ +extern const opus_int16 silk_LTP_gain_middle_avg_RD_Q14; +extern const opus_int8 * const silk_LTP_vq_ptrs_Q7[ NB_LTP_CBKS ]; /* 168 */ +extern const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS]; + +extern const opus_int8 silk_LTP_vq_sizes[ NB_LTP_CBKS ]; /* 3 */ + +extern const opus_uint8 silk_LTPscale_iCDF[ 3 ]; /* 4 */ +extern const opus_int16 silk_LTPScales_table_Q14[ 3 ]; /* 6 */ + +extern const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ]; /* 4 */ +extern const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ]; /* 2 */ + +extern const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ]; /* 32 */ +extern const opus_uint8 silk_stereo_pred_joint_iCDF[ 25 ]; /* 25 */ +extern const opus_uint8 silk_stereo_only_code_mid_iCDF[ 2 ]; /* 2 */ + +extern const opus_uint8 * const silk_LBRR_flags_iCDF_ptr[ 2 ]; /* 10 */ + +extern const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ]; /* 5 */ + +extern const silk_NLSF_CB_struct silk_NLSF_CB_WB; /* 1040 */ +extern const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB; /* 728 */ + +/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */ +extern const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ]; /* 32 */ +extern const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ]; /* 32 */ +extern const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ]; /* 32 */ +extern const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ]; /* 32 */ + +/* Quantization offsets */ +extern const opus_int16 silk_Quantization_Offsets_Q10[ 2 ][ 2 ]; /* 8 */ + +/* Interpolation points for filter coefficients used in the bandwidth transition smoother */ +extern const opus_int32 silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ]; /* 60 */ +extern const opus_int32 silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ]; /* 60 */ + +/* Rom table with cosine values */ +extern const opus_int16 silk_LSFCosTab_FIX_Q12[ LSF_COS_TAB_SZ_FIX + 1 ]; /* 258 */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/drivers/opus/silk/tables_LTP.c b/drivers/opus/silk/tables_LTP.c new file mode 100644 index 0000000000..56b672db8b --- /dev/null +++ b/drivers/opus/silk/tables_LTP.c @@ -0,0 +1,296 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "tables.h" + +const opus_uint8 silk_LTP_per_index_iCDF[3] = { + 179, 99, 0 +}; + +static const opus_uint8 silk_LTP_gain_iCDF_0[8] = { + 71, 56, 43, 30, 21, 12, 6, 0 +}; + +static const opus_uint8 silk_LTP_gain_iCDF_1[16] = { + 199, 165, 144, 124, 109, 96, 84, 71, + 61, 51, 42, 32, 23, 15, 8, 0 +}; + +static const opus_uint8 silk_LTP_gain_iCDF_2[32] = { + 241, 225, 211, 199, 187, 175, 164, 153, + 142, 132, 123, 114, 105, 96, 88, 80, + 72, 64, 57, 50, 44, 38, 33, 29, + 24, 20, 16, 12, 9, 5, 2, 0 +}; + +const opus_int16 silk_LTP_gain_middle_avg_RD_Q14 = 12304; + +static const opus_uint8 silk_LTP_gain_BITS_Q5_0[8] = { + 15, 131, 138, 138, 155, 155, 173, 173 +}; + +static const opus_uint8 silk_LTP_gain_BITS_Q5_1[16] = { + 69, 93, 115, 118, 131, 138, 141, 138, + 150, 150, 155, 150, 155, 160, 166, 160 +}; + +static const opus_uint8 silk_LTP_gain_BITS_Q5_2[32] = { + 131, 128, 134, 141, 141, 141, 145, 145, + 145, 150, 155, 155, 155, 155, 160, 160, + 160, 160, 166, 166, 173, 173, 182, 192, + 182, 192, 192, 192, 205, 192, 205, 224 +}; + +const opus_uint8 * const silk_LTP_gain_iCDF_ptrs[NB_LTP_CBKS] = { + silk_LTP_gain_iCDF_0, + silk_LTP_gain_iCDF_1, + silk_LTP_gain_iCDF_2 +}; + +const opus_uint8 * const silk_LTP_gain_BITS_Q5_ptrs[NB_LTP_CBKS] = { + silk_LTP_gain_BITS_Q5_0, + silk_LTP_gain_BITS_Q5_1, + silk_LTP_gain_BITS_Q5_2 +}; + +static const opus_int8 silk_LTP_gain_vq_0[8][5] = +{ +{ + 4, 6, 24, 7, 5 +}, +{ + 0, 0, 2, 0, 0 +}, +{ + 12, 28, 41, 13, -4 +}, +{ + -9, 15, 42, 25, 14 +}, +{ + 1, -2, 62, 41, -9 +}, +{ + -10, 37, 65, -4, 3 +}, +{ + -6, 4, 66, 7, -8 +}, +{ + 16, 14, 38, -3, 33 +} +}; + +static const opus_int8 silk_LTP_gain_vq_1[16][5] = +{ +{ + 13, 22, 39, 23, 12 +}, +{ + -1, 36, 64, 27, -6 +}, +{ + -7, 10, 55, 43, 17 +}, +{ + 1, 1, 8, 1, 1 +}, +{ + 6, -11, 74, 53, -9 +}, +{ + -12, 55, 76, -12, 8 +}, +{ + -3, 3, 93, 27, -4 +}, +{ + 26, 39, 59, 3, -8 +}, +{ + 2, 0, 77, 11, 9 +}, +{ + -8, 22, 44, -6, 7 +}, +{ + 40, 9, 26, 3, 9 +}, +{ + -7, 20, 101, -7, 4 +}, +{ + 3, -8, 42, 26, 0 +}, +{ + -15, 33, 68, 2, 23 +}, +{ + -2, 55, 46, -2, 15 +}, +{ + 3, -1, 21, 16, 41 +} +}; + +static const opus_int8 silk_LTP_gain_vq_2[32][5] = +{ +{ + -6, 27, 61, 39, 5 +}, +{ + -11, 42, 88, 4, 1 +}, +{ + -2, 60, 65, 6, -4 +}, +{ + -1, -5, 73, 56, 1 +}, +{ + -9, 19, 94, 29, -9 +}, +{ + 0, 12, 99, 6, 4 +}, +{ + 8, -19, 102, 46, -13 +}, +{ + 3, 2, 13, 3, 2 +}, +{ + 9, -21, 84, 72, -18 +}, +{ + -11, 46, 104, -22, 8 +}, +{ + 18, 38, 48, 23, 0 +}, +{ + -16, 70, 83, -21, 11 +}, +{ + 5, -11, 117, 22, -8 +}, +{ + -6, 23, 117, -12, 3 +}, +{ + 3, -8, 95, 28, 4 +}, +{ + -10, 15, 77, 60, -15 +}, +{ + -1, 4, 124, 2, -4 +}, +{ + 3, 38, 84, 24, -25 +}, +{ + 2, 13, 42, 13, 31 +}, +{ + 21, -4, 56, 46, -1 +}, +{ + -1, 35, 79, -13, 19 +}, +{ + -7, 65, 88, -9, -14 +}, +{ + 20, 4, 81, 49, -29 +}, +{ + 20, 0, 75, 3, -17 +}, +{ + 5, -9, 44, 92, -8 +}, +{ + 1, -3, 22, 69, 31 +}, +{ + -6, 95, 41, -12, 5 +}, +{ + 39, 67, 16, -4, 1 +}, +{ + 0, -6, 120, 55, -36 +}, +{ + -13, 44, 122, 4, -24 +}, +{ + 81, 5, 11, 3, 7 +}, +{ + 2, 0, 9, 10, 88 +} +}; + +const opus_int8 * const silk_LTP_vq_ptrs_Q7[NB_LTP_CBKS] = { + (opus_int8 *)&silk_LTP_gain_vq_0[0][0], + (opus_int8 *)&silk_LTP_gain_vq_1[0][0], + (opus_int8 *)&silk_LTP_gain_vq_2[0][0] +}; + +/* Maximum frequency-dependent response of the pitch taps above, + computed as max(abs(freqz(taps))) */ +static const opus_uint8 silk_LTP_gain_vq_0_gain[8] = { + 46, 2, 90, 87, 93, 91, 82, 98 +}; + +static const opus_uint8 silk_LTP_gain_vq_1_gain[16] = { + 109, 120, 118, 12, 113, 115, 117, 119, + 99, 59, 87, 111, 63, 111, 112, 80 +}; + +static const opus_uint8 silk_LTP_gain_vq_2_gain[32] = { + 126, 124, 125, 124, 129, 121, 126, 23, + 132, 127, 127, 127, 126, 127, 122, 133, + 130, 134, 101, 118, 119, 145, 126, 86, + 124, 120, 123, 119, 170, 173, 107, 109 +}; + +const opus_uint8 * const silk_LTP_vq_gain_ptrs_Q7[NB_LTP_CBKS] = { + &silk_LTP_gain_vq_0_gain[0], + &silk_LTP_gain_vq_1_gain[0], + &silk_LTP_gain_vq_2_gain[0] +}; + +const opus_int8 silk_LTP_vq_sizes[NB_LTP_CBKS] = { + 8, 16, 32 +}; diff --git a/drivers/opus/silk/tables_NLSF_CB_NB_MB.c b/drivers/opus/silk/tables_NLSF_CB_NB_MB.c new file mode 100644 index 0000000000..ded35eee74 --- /dev/null +++ b/drivers/opus/silk/tables_NLSF_CB_NB_MB.c @@ -0,0 +1,159 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "tables.h" + +static const opus_uint8 silk_NLSF_CB1_NB_MB_Q8[ 320 ] = { + 12, 35, 60, 83, 108, 132, 157, 180, + 206, 228, 15, 32, 55, 77, 101, 125, + 151, 175, 201, 225, 19, 42, 66, 89, + 114, 137, 162, 184, 209, 230, 12, 25, + 50, 72, 97, 120, 147, 172, 200, 223, + 26, 44, 69, 90, 114, 135, 159, 180, + 205, 225, 13, 22, 53, 80, 106, 130, + 156, 180, 205, 228, 15, 25, 44, 64, + 90, 115, 142, 168, 196, 222, 19, 24, + 62, 82, 100, 120, 145, 168, 190, 214, + 22, 31, 50, 79, 103, 120, 151, 170, + 203, 227, 21, 29, 45, 65, 106, 124, + 150, 171, 196, 224, 30, 49, 75, 97, + 121, 142, 165, 186, 209, 229, 19, 25, + 52, 70, 93, 116, 143, 166, 192, 219, + 26, 34, 62, 75, 97, 118, 145, 167, + 194, 217, 25, 33, 56, 70, 91, 113, + 143, 165, 196, 223, 21, 34, 51, 72, + 97, 117, 145, 171, 196, 222, 20, 29, + 50, 67, 90, 117, 144, 168, 197, 221, + 22, 31, 48, 66, 95, 117, 146, 168, + 196, 222, 24, 33, 51, 77, 116, 134, + 158, 180, 200, 224, 21, 28, 70, 87, + 106, 124, 149, 170, 194, 217, 26, 33, + 53, 64, 83, 117, 152, 173, 204, 225, + 27, 34, 65, 95, 108, 129, 155, 174, + 210, 225, 20, 26, 72, 99, 113, 131, + 154, 176, 200, 219, 34, 43, 61, 78, + 93, 114, 155, 177, 205, 229, 23, 29, + 54, 97, 124, 138, 163, 179, 209, 229, + 30, 38, 56, 89, 118, 129, 158, 178, + 200, 231, 21, 29, 49, 63, 85, 111, + 142, 163, 193, 222, 27, 48, 77, 103, + 133, 158, 179, 196, 215, 232, 29, 47, + 74, 99, 124, 151, 176, 198, 220, 237, + 33, 42, 61, 76, 93, 121, 155, 174, + 207, 225, 29, 53, 87, 112, 136, 154, + 170, 188, 208, 227, 24, 30, 52, 84, + 131, 150, 166, 186, 203, 229, 37, 48, + 64, 84, 104, 118, 156, 177, 201, 230 +}; + +static const opus_uint8 silk_NLSF_CB1_iCDF_NB_MB[ 64 ] = { + 212, 178, 148, 129, 108, 96, 85, 82, + 79, 77, 61, 59, 57, 56, 51, 49, + 48, 45, 42, 41, 40, 38, 36, 34, + 31, 30, 21, 12, 10, 3, 1, 0, + 255, 245, 244, 236, 233, 225, 217, 203, + 190, 176, 175, 161, 149, 136, 125, 114, + 102, 91, 81, 71, 60, 52, 43, 35, + 28, 20, 19, 18, 12, 11, 5, 0 +}; + +static const opus_uint8 silk_NLSF_CB2_SELECT_NB_MB[ 160 ] = { + 16, 0, 0, 0, 0, 99, 66, 36, + 36, 34, 36, 34, 34, 34, 34, 83, + 69, 36, 52, 34, 116, 102, 70, 68, + 68, 176, 102, 68, 68, 34, 65, 85, + 68, 84, 36, 116, 141, 152, 139, 170, + 132, 187, 184, 216, 137, 132, 249, 168, + 185, 139, 104, 102, 100, 68, 68, 178, + 218, 185, 185, 170, 244, 216, 187, 187, + 170, 244, 187, 187, 219, 138, 103, 155, + 184, 185, 137, 116, 183, 155, 152, 136, + 132, 217, 184, 184, 170, 164, 217, 171, + 155, 139, 244, 169, 184, 185, 170, 164, + 216, 223, 218, 138, 214, 143, 188, 218, + 168, 244, 141, 136, 155, 170, 168, 138, + 220, 219, 139, 164, 219, 202, 216, 137, + 168, 186, 246, 185, 139, 116, 185, 219, + 185, 138, 100, 100, 134, 100, 102, 34, + 68, 68, 100, 68, 168, 203, 221, 218, + 168, 167, 154, 136, 104, 70, 164, 246, + 171, 137, 139, 137, 155, 218, 219, 139 +}; + +static const opus_uint8 silk_NLSF_CB2_iCDF_NB_MB[ 72 ] = { + 255, 254, 253, 238, 14, 3, 2, 1, + 0, 255, 254, 252, 218, 35, 3, 2, + 1, 0, 255, 254, 250, 208, 59, 4, + 2, 1, 0, 255, 254, 246, 194, 71, + 10, 2, 1, 0, 255, 252, 236, 183, + 82, 8, 2, 1, 0, 255, 252, 235, + 180, 90, 17, 2, 1, 0, 255, 248, + 224, 171, 97, 30, 4, 1, 0, 255, + 254, 236, 173, 95, 37, 7, 1, 0 +}; + +static const opus_uint8 silk_NLSF_CB2_BITS_NB_MB_Q5[ 72 ] = { + 255, 255, 255, 131, 6, 145, 255, 255, + 255, 255, 255, 236, 93, 15, 96, 255, + 255, 255, 255, 255, 194, 83, 25, 71, + 221, 255, 255, 255, 255, 162, 73, 34, + 66, 162, 255, 255, 255, 210, 126, 73, + 43, 57, 173, 255, 255, 255, 201, 125, + 71, 48, 58, 130, 255, 255, 255, 166, + 110, 73, 57, 62, 104, 210, 255, 255, + 251, 123, 65, 55, 68, 100, 171, 255 +}; + +static const opus_uint8 silk_NLSF_PRED_NB_MB_Q8[ 18 ] = { + 179, 138, 140, 148, 151, 149, 153, 151, + 163, 116, 67, 82, 59, 92, 72, 100, + 89, 92 +}; + +static const opus_int16 silk_NLSF_DELTA_MIN_NB_MB_Q15[ 11 ] = { + 250, 3, 6, 3, 3, 3, 4, 3, + 3, 3, 461 +}; + +const silk_NLSF_CB_struct silk_NLSF_CB_NB_MB = +{ + 32, + 10, + SILK_FIX_CONST( 0.18, 16 ), + SILK_FIX_CONST( 1.0 / 0.18, 6 ), + silk_NLSF_CB1_NB_MB_Q8, + silk_NLSF_CB1_iCDF_NB_MB, + silk_NLSF_PRED_NB_MB_Q8, + silk_NLSF_CB2_SELECT_NB_MB, + silk_NLSF_CB2_iCDF_NB_MB, + silk_NLSF_CB2_BITS_NB_MB_Q5, + silk_NLSF_DELTA_MIN_NB_MB_Q15, +}; diff --git a/drivers/opus/silk/tables_NLSF_CB_WB.c b/drivers/opus/silk/tables_NLSF_CB_WB.c new file mode 100644 index 0000000000..d83567ea6f --- /dev/null +++ b/drivers/opus/silk/tables_NLSF_CB_WB.c @@ -0,0 +1,198 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "tables.h" + +static const opus_uint8 silk_NLSF_CB1_WB_Q8[ 512 ] = { + 7, 23, 38, 54, 69, 85, 100, 116, + 131, 147, 162, 178, 193, 208, 223, 239, + 13, 25, 41, 55, 69, 83, 98, 112, + 127, 142, 157, 171, 187, 203, 220, 236, + 15, 21, 34, 51, 61, 78, 92, 106, + 126, 136, 152, 167, 185, 205, 225, 240, + 10, 21, 36, 50, 63, 79, 95, 110, + 126, 141, 157, 173, 189, 205, 221, 237, + 17, 20, 37, 51, 59, 78, 89, 107, + 123, 134, 150, 164, 184, 205, 224, 240, + 10, 15, 32, 51, 67, 81, 96, 112, + 129, 142, 158, 173, 189, 204, 220, 236, + 8, 21, 37, 51, 65, 79, 98, 113, + 126, 138, 155, 168, 179, 192, 209, 218, + 12, 15, 34, 55, 63, 78, 87, 108, + 118, 131, 148, 167, 185, 203, 219, 236, + 16, 19, 32, 36, 56, 79, 91, 108, + 118, 136, 154, 171, 186, 204, 220, 237, + 11, 28, 43, 58, 74, 89, 105, 120, + 135, 150, 165, 180, 196, 211, 226, 241, + 6, 16, 33, 46, 60, 75, 92, 107, + 123, 137, 156, 169, 185, 199, 214, 225, + 11, 19, 30, 44, 57, 74, 89, 105, + 121, 135, 152, 169, 186, 202, 218, 234, + 12, 19, 29, 46, 57, 71, 88, 100, + 120, 132, 148, 165, 182, 199, 216, 233, + 17, 23, 35, 46, 56, 77, 92, 106, + 123, 134, 152, 167, 185, 204, 222, 237, + 14, 17, 45, 53, 63, 75, 89, 107, + 115, 132, 151, 171, 188, 206, 221, 240, + 9, 16, 29, 40, 56, 71, 88, 103, + 119, 137, 154, 171, 189, 205, 222, 237, + 16, 19, 36, 48, 57, 76, 87, 105, + 118, 132, 150, 167, 185, 202, 218, 236, + 12, 17, 29, 54, 71, 81, 94, 104, + 126, 136, 149, 164, 182, 201, 221, 237, + 15, 28, 47, 62, 79, 97, 115, 129, + 142, 155, 168, 180, 194, 208, 223, 238, + 8, 14, 30, 45, 62, 78, 94, 111, + 127, 143, 159, 175, 192, 207, 223, 239, + 17, 30, 49, 62, 79, 92, 107, 119, + 132, 145, 160, 174, 190, 204, 220, 235, + 14, 19, 36, 45, 61, 76, 91, 108, + 121, 138, 154, 172, 189, 205, 222, 238, + 12, 18, 31, 45, 60, 76, 91, 107, + 123, 138, 154, 171, 187, 204, 221, 236, + 13, 17, 31, 43, 53, 70, 83, 103, + 114, 131, 149, 167, 185, 203, 220, 237, + 17, 22, 35, 42, 58, 78, 93, 110, + 125, 139, 155, 170, 188, 206, 224, 240, + 8, 15, 34, 50, 67, 83, 99, 115, + 131, 146, 162, 178, 193, 209, 224, 239, + 13, 16, 41, 66, 73, 86, 95, 111, + 128, 137, 150, 163, 183, 206, 225, 241, + 17, 25, 37, 52, 63, 75, 92, 102, + 119, 132, 144, 160, 175, 191, 212, 231, + 19, 31, 49, 65, 83, 100, 117, 133, + 147, 161, 174, 187, 200, 213, 227, 242, + 18, 31, 52, 68, 88, 103, 117, 126, + 138, 149, 163, 177, 192, 207, 223, 239, + 16, 29, 47, 61, 76, 90, 106, 119, + 133, 147, 161, 176, 193, 209, 224, 240, + 15, 21, 35, 50, 61, 73, 86, 97, + 110, 119, 129, 141, 175, 198, 218, 237 +}; + +static const opus_uint8 silk_NLSF_CB1_iCDF_WB[ 64 ] = { + 225, 204, 201, 184, 183, 175, 158, 154, + 153, 135, 119, 115, 113, 110, 109, 99, + 98, 95, 79, 68, 52, 50, 48, 45, + 43, 32, 31, 27, 18, 10, 3, 0, + 255, 251, 235, 230, 212, 201, 196, 182, + 167, 166, 163, 151, 138, 124, 110, 104, + 90, 78, 76, 70, 69, 57, 45, 34, + 24, 21, 11, 6, 5, 4, 3, 0 +}; + +static const opus_uint8 silk_NLSF_CB2_SELECT_WB[ 256 ] = { + 0, 0, 0, 0, 0, 0, 0, 1, + 100, 102, 102, 68, 68, 36, 34, 96, + 164, 107, 158, 185, 180, 185, 139, 102, + 64, 66, 36, 34, 34, 0, 1, 32, + 208, 139, 141, 191, 152, 185, 155, 104, + 96, 171, 104, 166, 102, 102, 102, 132, + 1, 0, 0, 0, 0, 16, 16, 0, + 80, 109, 78, 107, 185, 139, 103, 101, + 208, 212, 141, 139, 173, 153, 123, 103, + 36, 0, 0, 0, 0, 0, 0, 1, + 48, 0, 0, 0, 0, 0, 0, 32, + 68, 135, 123, 119, 119, 103, 69, 98, + 68, 103, 120, 118, 118, 102, 71, 98, + 134, 136, 157, 184, 182, 153, 139, 134, + 208, 168, 248, 75, 189, 143, 121, 107, + 32, 49, 34, 34, 34, 0, 17, 2, + 210, 235, 139, 123, 185, 137, 105, 134, + 98, 135, 104, 182, 100, 183, 171, 134, + 100, 70, 68, 70, 66, 66, 34, 131, + 64, 166, 102, 68, 36, 2, 1, 0, + 134, 166, 102, 68, 34, 34, 66, 132, + 212, 246, 158, 139, 107, 107, 87, 102, + 100, 219, 125, 122, 137, 118, 103, 132, + 114, 135, 137, 105, 171, 106, 50, 34, + 164, 214, 141, 143, 185, 151, 121, 103, + 192, 34, 0, 0, 0, 0, 0, 1, + 208, 109, 74, 187, 134, 249, 159, 137, + 102, 110, 154, 118, 87, 101, 119, 101, + 0, 2, 0, 36, 36, 66, 68, 35, + 96, 164, 102, 100, 36, 0, 2, 33, + 167, 138, 174, 102, 100, 84, 2, 2, + 100, 107, 120, 119, 36, 197, 24, 0 +}; + +static const opus_uint8 silk_NLSF_CB2_iCDF_WB[ 72 ] = { + 255, 254, 253, 244, 12, 3, 2, 1, + 0, 255, 254, 252, 224, 38, 3, 2, + 1, 0, 255, 254, 251, 209, 57, 4, + 2, 1, 0, 255, 254, 244, 195, 69, + 4, 2, 1, 0, 255, 251, 232, 184, + 84, 7, 2, 1, 0, 255, 254, 240, + 186, 86, 14, 2, 1, 0, 255, 254, + 239, 178, 91, 30, 5, 1, 0, 255, + 248, 227, 177, 100, 19, 2, 1, 0 +}; + +static const opus_uint8 silk_NLSF_CB2_BITS_WB_Q5[ 72 ] = { + 255, 255, 255, 156, 4, 154, 255, 255, + 255, 255, 255, 227, 102, 15, 92, 255, + 255, 255, 255, 255, 213, 83, 24, 72, + 236, 255, 255, 255, 255, 150, 76, 33, + 63, 214, 255, 255, 255, 190, 121, 77, + 43, 55, 185, 255, 255, 255, 245, 137, + 71, 43, 59, 139, 255, 255, 255, 255, + 131, 66, 50, 66, 107, 194, 255, 255, + 166, 116, 76, 55, 53, 125, 255, 255 +}; + +static const opus_uint8 silk_NLSF_PRED_WB_Q8[ 30 ] = { + 175, 148, 160, 176, 178, 173, 174, 164, + 177, 174, 196, 182, 198, 192, 182, 68, + 62, 66, 60, 72, 117, 85, 90, 118, + 136, 151, 142, 160, 142, 155 +}; + +static const opus_int16 silk_NLSF_DELTA_MIN_WB_Q15[ 17 ] = { + 100, 3, 40, 3, 3, 3, 5, 14, + 14, 10, 11, 3, 8, 9, 7, 3, + 347 +}; + +const silk_NLSF_CB_struct silk_NLSF_CB_WB = +{ + 32, + 16, + SILK_FIX_CONST( 0.15, 16 ), + SILK_FIX_CONST( 1.0 / 0.15, 6 ), + silk_NLSF_CB1_WB_Q8, + silk_NLSF_CB1_iCDF_WB, + silk_NLSF_PRED_WB_Q8, + silk_NLSF_CB2_SELECT_WB, + silk_NLSF_CB2_iCDF_WB, + silk_NLSF_CB2_BITS_WB_Q5, + silk_NLSF_DELTA_MIN_WB_Q15, +}; + diff --git a/drivers/opus/silk/tables_gain.c b/drivers/opus/silk/tables_gain.c new file mode 100644 index 0000000000..6df980616b --- /dev/null +++ b/drivers/opus/silk/tables_gain.c @@ -0,0 +1,63 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "tables.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +const opus_uint8 silk_gain_iCDF[ 3 ][ N_LEVELS_QGAIN / 8 ] = +{ +{ + 224, 112, 44, 15, 3, 2, 1, 0 +}, +{ + 254, 237, 192, 132, 70, 23, 4, 0 +}, +{ + 255, 252, 226, 155, 61, 11, 2, 0 +} +}; + +const opus_uint8 silk_delta_gain_iCDF[ MAX_DELTA_GAIN_QUANT - MIN_DELTA_GAIN_QUANT + 1 ] = { + 250, 245, 234, 203, 71, 50, 42, 38, + 35, 33, 31, 29, 28, 27, 26, 25, + 24, 23, 22, 21, 20, 19, 18, 17, + 16, 15, 14, 13, 12, 11, 10, 9, + 8, 7, 6, 5, 4, 3, 2, 1, + 0 +}; + +#ifdef __cplusplus +} +#endif diff --git a/drivers/opus/silk/tables_other.c b/drivers/opus/silk/tables_other.c new file mode 100644 index 0000000000..246e960fa4 --- /dev/null +++ b/drivers/opus/silk/tables_other.c @@ -0,0 +1,138 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "structs.h" +#include "define.h" +#include "tables.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* Piece-wise linear mapping from bitrate in kbps to coding quality in dB SNR */ +const opus_int32 silk_TargetRate_table_NB[ TARGET_RATE_TAB_SZ ] = { + 0, 8000, 9400, 11500, 13500, 17500, 25000, MAX_TARGET_RATE_BPS +}; +const opus_int32 silk_TargetRate_table_MB[ TARGET_RATE_TAB_SZ ] = { + 0, 9000, 12000, 14500, 18500, 24500, 35500, MAX_TARGET_RATE_BPS +}; +const opus_int32 silk_TargetRate_table_WB[ TARGET_RATE_TAB_SZ ] = { + 0, 10500, 14000, 17000, 21500, 28500, 42000, MAX_TARGET_RATE_BPS +}; +const opus_int16 silk_SNR_table_Q1[ TARGET_RATE_TAB_SZ ] = { + 18, 29, 38, 40, 46, 52, 62, 84 +}; + +/* Tables for stereo predictor coding */ +const opus_int16 silk_stereo_pred_quant_Q13[ STEREO_QUANT_TAB_SIZE ] = { + -13732, -10050, -8266, -7526, -6500, -5000, -2950, -820, + 820, 2950, 5000, 6500, 7526, 8266, 10050, 13732 +}; +const opus_uint8 silk_stereo_pred_joint_iCDF[ 25 ] = { + 249, 247, 246, 245, 244, + 234, 210, 202, 201, 200, + 197, 174, 82, 59, 56, + 55, 54, 46, 22, 12, + 11, 10, 9, 7, 0 +}; +const opus_uint8 silk_stereo_only_code_mid_iCDF[ 2 ] = { 64, 0 }; + +/* Tables for LBRR flags */ +static const opus_uint8 silk_LBRR_flags_2_iCDF[ 3 ] = { 203, 150, 0 }; +static const opus_uint8 silk_LBRR_flags_3_iCDF[ 7 ] = { 215, 195, 166, 125, 110, 82, 0 }; +const opus_uint8 * const silk_LBRR_flags_iCDF_ptr[ 2 ] = { + silk_LBRR_flags_2_iCDF, + silk_LBRR_flags_3_iCDF +}; + +/* Table for LSB coding */ +const opus_uint8 silk_lsb_iCDF[ 2 ] = { 120, 0 }; + +/* Tables for LTPScale */ +const opus_uint8 silk_LTPscale_iCDF[ 3 ] = { 128, 64, 0 }; + +/* Tables for signal type and offset coding */ +const opus_uint8 silk_type_offset_VAD_iCDF[ 4 ] = { + 232, 158, 10, 0 +}; +const opus_uint8 silk_type_offset_no_VAD_iCDF[ 2 ] = { + 230, 0 +}; + +/* Tables for NLSF interpolation factor */ +const opus_uint8 silk_NLSF_interpolation_factor_iCDF[ 5 ] = { 243, 221, 192, 181, 0 }; + +/* Quantization offsets */ +const opus_int16 silk_Quantization_Offsets_Q10[ 2 ][ 2 ] = { + { OFFSET_UVL_Q10, OFFSET_UVH_Q10 }, { OFFSET_VL_Q10, OFFSET_VH_Q10 } +}; + +/* Table for LTPScale */ +const opus_int16 silk_LTPScales_table_Q14[ 3 ] = { 15565, 12288, 8192 }; + +/* Uniform entropy tables */ +const opus_uint8 silk_uniform3_iCDF[ 3 ] = { 171, 85, 0 }; +const opus_uint8 silk_uniform4_iCDF[ 4 ] = { 192, 128, 64, 0 }; +const opus_uint8 silk_uniform5_iCDF[ 5 ] = { 205, 154, 102, 51, 0 }; +const opus_uint8 silk_uniform6_iCDF[ 6 ] = { 213, 171, 128, 85, 43, 0 }; +const opus_uint8 silk_uniform8_iCDF[ 8 ] = { 224, 192, 160, 128, 96, 64, 32, 0 }; + +const opus_uint8 silk_NLSF_EXT_iCDF[ 7 ] = { 100, 40, 16, 7, 3, 1, 0 }; + +/* Elliptic/Cauer filters designed with 0.1 dB passband ripple, + 80 dB minimum stopband attenuation, and + [0.95 : 0.15 : 0.35] normalized cut off frequencies. */ + +/* Interpolation points for filter coefficients used in the bandwidth transition smoother */ +const opus_int32 silk_Transition_LP_B_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NB ] = +{ +{ 250767114, 501534038, 250767114 }, +{ 209867381, 419732057, 209867381 }, +{ 170987846, 341967853, 170987846 }, +{ 131531482, 263046905, 131531482 }, +{ 89306658, 178584282, 89306658 } +}; + +/* Interpolation points for filter coefficients used in the bandwidth transition smoother */ +const opus_int32 silk_Transition_LP_A_Q28[ TRANSITION_INT_NUM ][ TRANSITION_NA ] = +{ +{ 506393414, 239854379 }, +{ 411067935, 169683996 }, +{ 306733530, 116694253 }, +{ 185807084, 77959395 }, +{ 35497197, 57401098 } +}; + +#ifdef __cplusplus +} +#endif + diff --git a/drivers/opus/silk/tables_pitch_lag.c b/drivers/opus/silk/tables_pitch_lag.c new file mode 100644 index 0000000000..0af5c5ace7 --- /dev/null +++ b/drivers/opus/silk/tables_pitch_lag.c @@ -0,0 +1,69 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "tables.h" + +const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ] = { + 253, 250, 244, 233, 212, 182, 150, 131, + 120, 110, 98, 85, 72, 60, 49, 40, + 32, 25, 19, 15, 13, 11, 9, 8, + 7, 6, 5, 4, 3, 2, 1, 0 +}; + +const opus_uint8 silk_pitch_delta_iCDF[21] = { + 210, 208, 206, 203, 199, 193, 183, 168, + 142, 104, 74, 52, 37, 27, 20, 14, + 10, 6, 4, 2, 0 +}; + +const opus_uint8 silk_pitch_contour_iCDF[34] = { + 223, 201, 183, 167, 152, 138, 124, 111, + 98, 88, 79, 70, 62, 56, 50, 44, + 39, 35, 31, 27, 24, 21, 18, 16, + 14, 12, 10, 8, 6, 4, 3, 2, + 1, 0 +}; + +const opus_uint8 silk_pitch_contour_NB_iCDF[11] = { + 188, 176, 155, 138, 119, 97, 67, 43, + 26, 10, 0 +}; + +const opus_uint8 silk_pitch_contour_10_ms_iCDF[12] = { + 165, 119, 80, 61, 47, 35, 27, 20, + 14, 9, 4, 0 +}; + +const opus_uint8 silk_pitch_contour_10_ms_NB_iCDF[3] = { + 113, 63, 0 +}; + + diff --git a/drivers/opus/silk/tables_pulses_per_block.c b/drivers/opus/silk/tables_pulses_per_block.c new file mode 100644 index 0000000000..05ba2318f8 --- /dev/null +++ b/drivers/opus/silk/tables_pulses_per_block.c @@ -0,0 +1,264 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "tables.h" + +const opus_uint8 silk_max_pulses_table[ 4 ] = { + 8, 10, 12, 16 +}; + +const opus_uint8 silk_pulses_per_block_iCDF[ 10 ][ 18 ] = { +{ + 125, 51, 26, 18, 15, 12, 11, 10, + 9, 8, 7, 6, 5, 4, 3, 2, + 1, 0 +}, +{ + 198, 105, 45, 22, 15, 12, 11, 10, + 9, 8, 7, 6, 5, 4, 3, 2, + 1, 0 +}, +{ + 213, 162, 116, 83, 59, 43, 32, 24, + 18, 15, 12, 9, 7, 6, 5, 3, + 2, 0 +}, +{ + 239, 187, 116, 59, 28, 16, 11, 10, + 9, 8, 7, 6, 5, 4, 3, 2, + 1, 0 +}, +{ + 250, 229, 188, 135, 86, 51, 30, 19, + 13, 10, 8, 6, 5, 4, 3, 2, + 1, 0 +}, +{ + 249, 235, 213, 185, 156, 128, 103, 83, + 66, 53, 42, 33, 26, 21, 17, 13, + 10, 0 +}, +{ + 254, 249, 235, 206, 164, 118, 77, 46, + 27, 16, 10, 7, 5, 4, 3, 2, + 1, 0 +}, +{ + 255, 253, 249, 239, 220, 191, 156, 119, + 85, 57, 37, 23, 15, 10, 6, 4, + 2, 0 +}, +{ + 255, 253, 251, 246, 237, 223, 203, 179, + 152, 124, 98, 75, 55, 40, 29, 21, + 15, 0 +}, +{ + 255, 254, 253, 247, 220, 162, 106, 67, + 42, 28, 18, 12, 9, 6, 4, 3, + 2, 0 +} +}; + +const opus_uint8 silk_pulses_per_block_BITS_Q5[ 9 ][ 18 ] = { +{ + 31, 57, 107, 160, 205, 205, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255 +}, +{ + 69, 47, 67, 111, 166, 205, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255 +}, +{ + 82, 74, 79, 95, 109, 128, 145, 160, + 173, 205, 205, 205, 224, 255, 255, 224, + 255, 224 +}, +{ + 125, 74, 59, 69, 97, 141, 182, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255 +}, +{ + 173, 115, 85, 73, 76, 92, 115, 145, + 173, 205, 224, 224, 255, 255, 255, 255, + 255, 255 +}, +{ + 166, 134, 113, 102, 101, 102, 107, 118, + 125, 138, 145, 155, 166, 182, 192, 192, + 205, 150 +}, +{ + 224, 182, 134, 101, 83, 79, 85, 97, + 120, 145, 173, 205, 224, 255, 255, 255, + 255, 255 +}, +{ + 255, 224, 192, 150, 120, 101, 92, 89, + 93, 102, 118, 134, 160, 182, 192, 224, + 224, 224 +}, +{ + 255, 224, 224, 182, 155, 134, 118, 109, + 104, 102, 106, 111, 118, 131, 145, 160, + 173, 131 +} +}; + +const opus_uint8 silk_rate_levels_iCDF[ 2 ][ 9 ] = +{ +{ + 241, 190, 178, 132, 87, 74, 41, 14, + 0 +}, +{ + 223, 193, 157, 140, 106, 57, 39, 18, + 0 +} +}; + +const opus_uint8 silk_rate_levels_BITS_Q5[ 2 ][ 9 ] = +{ +{ + 131, 74, 141, 79, 80, 138, 95, 104, + 134 +}, +{ + 95, 99, 91, 125, 93, 76, 123, 115, + 123 +} +}; + +const opus_uint8 silk_shell_code_table0[ 152 ] = { + 128, 0, 214, 42, 0, 235, 128, 21, + 0, 244, 184, 72, 11, 0, 248, 214, + 128, 42, 7, 0, 248, 225, 170, 80, + 25, 5, 0, 251, 236, 198, 126, 54, + 18, 3, 0, 250, 238, 211, 159, 82, + 35, 15, 5, 0, 250, 231, 203, 168, + 128, 88, 53, 25, 6, 0, 252, 238, + 216, 185, 148, 108, 71, 40, 18, 4, + 0, 253, 243, 225, 199, 166, 128, 90, + 57, 31, 13, 3, 0, 254, 246, 233, + 212, 183, 147, 109, 73, 44, 23, 10, + 2, 0, 255, 250, 240, 223, 198, 166, + 128, 90, 58, 33, 16, 6, 1, 0, + 255, 251, 244, 231, 210, 181, 146, 110, + 75, 46, 25, 12, 5, 1, 0, 255, + 253, 248, 238, 221, 196, 164, 128, 92, + 60, 35, 18, 8, 3, 1, 0, 255, + 253, 249, 242, 229, 208, 180, 146, 110, + 76, 48, 27, 14, 7, 3, 1, 0 +}; + +const opus_uint8 silk_shell_code_table1[ 152 ] = { + 129, 0, 207, 50, 0, 236, 129, 20, + 0, 245, 185, 72, 10, 0, 249, 213, + 129, 42, 6, 0, 250, 226, 169, 87, + 27, 4, 0, 251, 233, 194, 130, 62, + 20, 4, 0, 250, 236, 207, 160, 99, + 47, 17, 3, 0, 255, 240, 217, 182, + 131, 81, 41, 11, 1, 0, 255, 254, + 233, 201, 159, 107, 61, 20, 2, 1, + 0, 255, 249, 233, 206, 170, 128, 86, + 50, 23, 7, 1, 0, 255, 250, 238, + 217, 186, 148, 108, 70, 39, 18, 6, + 1, 0, 255, 252, 243, 226, 200, 166, + 128, 90, 56, 30, 13, 4, 1, 0, + 255, 252, 245, 231, 209, 180, 146, 110, + 76, 47, 25, 11, 4, 1, 0, 255, + 253, 248, 237, 219, 194, 163, 128, 93, + 62, 37, 19, 8, 3, 1, 0, 255, + 254, 250, 241, 226, 205, 177, 145, 111, + 79, 51, 30, 15, 6, 2, 1, 0 +}; + +const opus_uint8 silk_shell_code_table2[ 152 ] = { + 129, 0, 203, 54, 0, 234, 129, 23, + 0, 245, 184, 73, 10, 0, 250, 215, + 129, 41, 5, 0, 252, 232, 173, 86, + 24, 3, 0, 253, 240, 200, 129, 56, + 15, 2, 0, 253, 244, 217, 164, 94, + 38, 10, 1, 0, 253, 245, 226, 189, + 132, 71, 27, 7, 1, 0, 253, 246, + 231, 203, 159, 105, 56, 23, 6, 1, + 0, 255, 248, 235, 213, 179, 133, 85, + 47, 19, 5, 1, 0, 255, 254, 243, + 221, 194, 159, 117, 70, 37, 12, 2, + 1, 0, 255, 254, 248, 234, 208, 171, + 128, 85, 48, 22, 8, 2, 1, 0, + 255, 254, 250, 240, 220, 189, 149, 107, + 67, 36, 16, 6, 2, 1, 0, 255, + 254, 251, 243, 227, 201, 166, 128, 90, + 55, 29, 13, 5, 2, 1, 0, 255, + 254, 252, 246, 234, 213, 183, 147, 109, + 73, 43, 22, 10, 4, 2, 1, 0 +}; + +const opus_uint8 silk_shell_code_table3[ 152 ] = { + 130, 0, 200, 58, 0, 231, 130, 26, + 0, 244, 184, 76, 12, 0, 249, 214, + 130, 43, 6, 0, 252, 232, 173, 87, + 24, 3, 0, 253, 241, 203, 131, 56, + 14, 2, 0, 254, 246, 221, 167, 94, + 35, 8, 1, 0, 254, 249, 232, 193, + 130, 65, 23, 5, 1, 0, 255, 251, + 239, 211, 162, 99, 45, 15, 4, 1, + 0, 255, 251, 243, 223, 186, 131, 74, + 33, 11, 3, 1, 0, 255, 252, 245, + 230, 202, 158, 105, 57, 24, 8, 2, + 1, 0, 255, 253, 247, 235, 214, 179, + 132, 84, 44, 19, 7, 2, 1, 0, + 255, 254, 250, 240, 223, 196, 159, 112, + 69, 36, 15, 6, 2, 1, 0, 255, + 254, 253, 245, 231, 209, 176, 136, 93, + 55, 27, 11, 3, 2, 1, 0, 255, + 254, 253, 252, 239, 221, 194, 158, 117, + 76, 42, 18, 4, 3, 2, 1, 0 +}; + +const opus_uint8 silk_shell_code_table_offsets[ 17 ] = { + 0, 0, 2, 5, 9, 14, 20, 27, + 35, 44, 54, 65, 77, 90, 104, 119, + 135 +}; + +const opus_uint8 silk_sign_iCDF[ 42 ] = { + 254, 49, 67, 77, 82, 93, 99, + 198, 11, 18, 24, 31, 36, 45, + 255, 46, 66, 78, 87, 94, 104, + 208, 14, 21, 32, 42, 51, 66, + 255, 94, 104, 109, 112, 115, 118, + 248, 53, 69, 80, 88, 95, 102 +}; diff --git a/drivers/opus/silk/tuning_parameters.h b/drivers/opus/silk/tuning_parameters.h new file mode 100644 index 0000000000..e1057bbaae --- /dev/null +++ b/drivers/opus/silk/tuning_parameters.h @@ -0,0 +1,171 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_TUNING_PARAMETERS_H +#define SILK_TUNING_PARAMETERS_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* Decay time for bitreservoir */ +#define BITRESERVOIR_DECAY_TIME_MS 500 + +/*******************/ +/* Pitch estimator */ +/*******************/ + +/* Level of noise floor for whitening filter LPC analysis in pitch analysis */ +#define FIND_PITCH_WHITE_NOISE_FRACTION 1e-3f + +/* Bandwidth expansion for whitening filter in pitch analysis */ +#define FIND_PITCH_BANDWIDTH_EXPANSION 0.99f + +/*********************/ +/* Linear prediction */ +/*********************/ + +/* LPC analysis regularization */ +#define FIND_LPC_COND_FAC 1e-5f + +/* LTP analysis defines */ +#define FIND_LTP_COND_FAC 1e-5f +#define LTP_DAMPING 0.05f +#define LTP_SMOOTHING 0.1f + +/* LTP quantization settings */ +#define MU_LTP_QUANT_NB 0.03f +#define MU_LTP_QUANT_MB 0.025f +#define MU_LTP_QUANT_WB 0.02f + +/* Max cumulative LTP gain */ +#define MAX_SUM_LOG_GAIN_DB 250.0f + +/***********************/ +/* High pass filtering */ +/***********************/ + +/* Smoothing parameters for low end of pitch frequency range estimation */ +#define VARIABLE_HP_SMTH_COEF1 0.1f +#define VARIABLE_HP_SMTH_COEF2 0.015f +#define VARIABLE_HP_MAX_DELTA_FREQ 0.4f + +/* Min and max cut-off frequency values (-3 dB points) */ +#define VARIABLE_HP_MIN_CUTOFF_HZ 60 +#define VARIABLE_HP_MAX_CUTOFF_HZ 100 + +/***********/ +/* Various */ +/***********/ + +/* VAD threshold */ +#define SPEECH_ACTIVITY_DTX_THRES 0.05f + +/* Speech Activity LBRR enable threshold */ +#define LBRR_SPEECH_ACTIVITY_THRES 0.3f + +/*************************/ +/* Perceptual parameters */ +/*************************/ + +/* reduction in coding SNR during low speech activity */ +#define BG_SNR_DECR_dB 2.0f + +/* factor for reducing quantization noise during voiced speech */ +#define HARM_SNR_INCR_dB 2.0f + +/* factor for reducing quantization noise for unvoiced sparse signals */ +#define SPARSE_SNR_INCR_dB 2.0f + +/* threshold for sparseness measure above which to use lower quantization offset during unvoiced */ +#define SPARSENESS_THRESHOLD_QNT_OFFSET 0.75f + +/* warping control */ +#define WARPING_MULTIPLIER 0.015f + +/* fraction added to first autocorrelation value */ +#define SHAPE_WHITE_NOISE_FRACTION 5e-5f + +/* noise shaping filter chirp factor */ +#define BANDWIDTH_EXPANSION 0.95f + +/* difference between chirp factors for analysis and synthesis noise shaping filters at low bitrates */ +#define LOW_RATE_BANDWIDTH_EXPANSION_DELTA 0.01f + +/* extra harmonic boosting (signal shaping) at low bitrates */ +#define LOW_RATE_HARMONIC_BOOST 0.1f + +/* extra harmonic boosting (signal shaping) for noisy input signals */ +#define LOW_INPUT_QUALITY_HARMONIC_BOOST 0.1f + +/* harmonic noise shaping */ +#define HARMONIC_SHAPING 0.3f + +/* extra harmonic noise shaping for high bitrates or noisy input */ +#define HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING 0.2f + +/* parameter for shaping noise towards higher frequencies */ +#define HP_NOISE_COEF 0.25f + +/* parameter for shaping noise even more towards higher frequencies during voiced speech */ +#define HARM_HP_NOISE_COEF 0.35f + +/* parameter for applying a high-pass tilt to the input signal */ +#define INPUT_TILT 0.05f + +/* parameter for extra high-pass tilt to the input signal at high rates */ +#define HIGH_RATE_INPUT_TILT 0.1f + +/* parameter for reducing noise at the very low frequencies */ +#define LOW_FREQ_SHAPING 4.0f + +/* less reduction of noise at the very low frequencies for signals with low SNR at low frequencies */ +#define LOW_QUALITY_LOW_FREQ_SHAPING_DECR 0.5f + +/* subframe smoothing coefficient for HarmBoost, HarmShapeGain, Tilt (lower -> more smoothing) */ +#define SUBFR_SMTH_COEF 0.4f + +/* parameters defining the R/D tradeoff in the residual quantizer */ +#define LAMBDA_OFFSET 1.2f +#define LAMBDA_SPEECH_ACT -0.2f +#define LAMBDA_DELAYED_DECISIONS -0.05f +#define LAMBDA_INPUT_QUALITY -0.1f +#define LAMBDA_CODING_QUALITY -0.2f +#define LAMBDA_QUANT_OFFSET 0.8f + +/* Compensation in bitrate calculations for 10 ms modes */ +#define REDUCE_BITRATE_10_MS_BPS 2200 + +/* Maximum time before allowing a bandwidth transition */ +#define MAX_BANDWIDTH_SWITCH_DELAY_MS 5000 + +#ifdef __cplusplus +} +#endif + +#endif /* SILK_TUNING_PARAMETERS_H */ diff --git a/drivers/opus/silk/typedef.h b/drivers/opus/silk/typedef.h new file mode 100644 index 0000000000..ca2361bc82 --- /dev/null +++ b/drivers/opus/silk/typedef.h @@ -0,0 +1,78 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_TYPEDEF_H +#define SILK_TYPEDEF_H + +#include "opus_types.h" +#include "opus_defines.h" + +#ifndef OPUS_FIXED_POINT +# include <float.h> +# define silk_float float +# define silk_float_MAX FLT_MAX +#endif + +#define silk_int64_MAX ((opus_int64)0x7FFFFFFFFFFFFFFFLL) /* 2^63 - 1 */ +#define silk_int64_MIN ((opus_int64)0x8000000000000000LL) /* -2^63 */ +#define silk_int32_MAX 0x7FFFFFFF /* 2^31 - 1 = 2147483647 */ +#define silk_int32_MIN ((opus_int32)0x80000000) /* -2^31 = -2147483648 */ +#define silk_int16_MAX 0x7FFF /* 2^15 - 1 = 32767 */ +#define silk_int16_MIN ((opus_int16)0x8000) /* -2^15 = -32768 */ +#define silk_int8_MAX 0x7F /* 2^7 - 1 = 127 */ +#define silk_int8_MIN ((opus_int8)0x80) /* -2^7 = -128 */ +#define silk_uint8_MAX 0xFF /* 2^8 - 1 = 255 */ + +#define silk_TRUE 1 +#define silk_FALSE 0 + +/* assertions */ +#if (defined _WIN32 && !defined _WINCE && !defined(__GNUC__) && !defined(NO_ASSERTS)) +# ifndef silk_assert +# include <crtdbg.h> /* ASSERTE() */ +# define silk_assert(COND) _ASSERTE(COND) +# endif +#else +# ifdef ENABLE_ASSERTIONS +# include <stdio.h> +# include <stdlib.h> +#define silk_fatal(str) _silk_fatal(str, __FILE__, __LINE__); +#ifdef __GNUC__ +__attribute__((noreturn)) +#endif +static OPUS_INLINE void _silk_fatal(const char *str, const char *file, int line) +{ + fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str); + abort(); +} +# define silk_assert(COND) {if (!(COND)) {silk_fatal("assertion failed: " #COND);}} +# else +# define silk_assert(COND) +# endif +#endif + +#endif /* SILK_TYPEDEF_H */ diff --git a/drivers/opus/stream.c b/drivers/opus/stream.c new file mode 100644 index 0000000000..17293f2bca --- /dev/null +++ b/drivers/opus/stream.c @@ -0,0 +1,366 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 1994-2012 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: stdio-based convenience library for opening/seeking/decoding + last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $ + + ********************************************************************/ +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "internal.h" +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#if defined(_WIN32) +# include <io.h> +#endif + +typedef struct OpusMemStream OpusMemStream; + +#define OP_MEM_SIZE_MAX (~(size_t)0>>1) +#define OP_MEM_DIFF_MAX ((ptrdiff_t)OP_MEM_SIZE_MAX) + +/*The context information needed to read from a block of memory as if it were a + file.*/ +struct OpusMemStream{ + /*The block of memory to read from.*/ + const unsigned char *data; + /*The total size of the block. + This must be at most OP_MEM_SIZE_MAX to prevent signed overflow while + seeking.*/ + ptrdiff_t size; + /*The current file position. + This is allowed to be set arbitrarily greater than size (i.e., past the end + of the block, though we will not read data past the end of the block), but + is not allowed to be negative (i.e., before the beginning of the block).*/ + ptrdiff_t pos; +}; + +static int op_fread(void *_stream,unsigned char *_ptr,int _buf_size){ + FILE *stream; + size_t ret; + /*Check for empty read.*/ + if(_buf_size<=0)return 0; + stream=(FILE *)_stream; + ret=fread(_ptr,1,_buf_size,stream); + OP_ASSERT(ret<=(size_t)_buf_size); + /*If ret==0 and !feof(stream), there was a read error.*/ + return ret>0||feof(stream)?(int)ret:OP_EREAD; +} + +static int op_fseek(void *_stream,opus_int64 _offset,int _whence){ +#if defined(_WIN32) + /*_fseeki64() is not exposed until MSCVCRT80. + This is the default starting with MSVC 2005 (_MSC_VER>=1400), but we want + to allow linking against older MSVCRT versions for compatibility back to + XP without installing extra runtime libraries. + i686-pc-mingw32 does not have fseeko() and requires + __MSVCRT_VERSION__>=0x800 for _fseeki64(), which screws up linking with + other libraries (that don't use MSVCRT80 from MSVC 2005 by default). + i686-w64-mingw32 does have fseeko() and respects _FILE_OFFSET_BITS, but I + don't know how to detect that at compile time. + We could just use fseeko64() (which is available in both), but its + implemented using fgetpos()/fsetpos() just like this code, except without + the overflow checking, so we prefer our version.*/ + opus_int64 pos; + /*We don't use fpos_t directly because it might be a struct if __STDC__ is + non-zero or _INTEGRAL_MAX_BITS < 64. + I'm not certain when the latter is true, but someone could in theory set + the former. + Either way, it should be binary compatible with a normal 64-bit int (this + assumption is not portable, but I believe it is true for MSVCRT).*/ + OP_ASSERT(sizeof(pos)==sizeof(fpos_t)); + /*Translate the seek to an absolute one.*/ + if(_whence==SEEK_CUR){ + int ret; + ret=fgetpos((FILE *)_stream,(fpos_t *)&pos); + if(ret)return ret; + } + else if(_whence==SEEK_END)pos=_filelengthi64(_fileno((FILE *)_stream)); + else if(_whence==SEEK_SET)pos=0; + else return -1; + /*Check for errors or overflow.*/ + if(pos<0||_offset<-pos||_offset>OP_INT64_MAX-pos)return -1; + pos+=_offset; + return fsetpos((FILE *)_stream,(fpos_t *)&pos); +#else + /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer + it except on Windows.*/ + return fseeko((FILE *)_stream,(off_t)_offset,_whence); +#endif +} + +static opus_int64 op_ftell(void *_stream){ +#if defined(_WIN32) + /*_ftelli64() is not exposed until MSCVCRT80, and ftello()/ftello64() have + the same problems as fseeko()/fseeko64() in MingW. + See above for a more detailed explanation.*/ + opus_int64 pos; + OP_ASSERT(sizeof(pos)==sizeof(fpos_t)); + return fgetpos((FILE *)_stream,(fpos_t *)&pos)?-1:pos; +#else + /*This function actually conforms to the SUSv2 and POSIX.1-2001, so we prefer + it except on Windows.*/ + return ftello((FILE *)_stream); +#endif +} + +static const OpusFileCallbacks OP_FILE_CALLBACKS={ + op_fread, + op_fseek, + op_ftell, + (op_close_func)fclose +}; + +#if defined(_WIN32) +# include <stddef.h> +# include <errno.h> + +/*Windows doesn't accept UTF-8 by default, and we don't have a wchar_t API, + so if we just pass the path to fopen(), then there'd be no way for a user + of our API to open a Unicode filename. + Instead, we translate from UTF-8 to UTF-16 and use Windows' wchar_t API. + This makes this API more consistent with platforms where the character set + used by fopen is the same as used on disk, which is generally UTF-8, and + with our metadata API, which always uses UTF-8.*/ +static wchar_t *op_utf8_to_utf16(const char *_src){ + wchar_t *dst; + size_t len; + len=strlen(_src); + /*Worst-case output is 1 wide character per 1 input character.*/ + dst=(wchar_t *)_ogg_malloc(sizeof(*dst)*(len+1)); + if(dst!=NULL){ + size_t si; + size_t di; + for(di=si=0;si<len;si++){ + int c0; + c0=(unsigned char)_src[si]; + if(!(c0&0x80)){ + /*Start byte says this is a 1-byte sequence.*/ + dst[di++]=(wchar_t)c0; + continue; + } + else{ + int c1; + /*This is safe, because c0 was not 0 and _src is NUL-terminated.*/ + c1=(unsigned char)_src[si+1]; + if((c1&0xC0)==0x80){ + /*Found at least one continuation byte.*/ + if((c0&0xE0)==0xC0){ + wchar_t w; + /*Start byte says this is a 2-byte sequence.*/ + w=(c0&0x1F)<<6|c1&0x3F; + if(w>=0x80U){ + /*This is a 2-byte sequence that is not overlong.*/ + dst[di++]=w; + si++; + continue; + } + } + else{ + int c2; + /*This is safe, because c1 was not 0 and _src is NUL-terminated.*/ + c2=(unsigned char)_src[si+2]; + if((c2&0xC0)==0x80){ + /*Found at least two continuation bytes.*/ + if((c0&0xF0)==0xE0){ + wchar_t w; + /*Start byte says this is a 3-byte sequence.*/ + w=(c0&0xF)<<12|(c1&0x3F)<<6|c2&0x3F; + if(w>=0x800U&&(w<0xD800||w>=0xE000)&&w<0xFFFE){ + /*This is a 3-byte sequence that is not overlong, not a + UTF-16 surrogate pair value, and not a 'not a character' + value.*/ + dst[di++]=w; + si+=2; + continue; + } + } + else{ + int c3; + /*This is safe, because c2 was not 0 and _src is + NUL-terminated.*/ + c3=(unsigned char)_src[si+3]; + if((c3&0xC0)==0x80){ + /*Found at least three continuation bytes.*/ + if((c0&0xF8)==0xF0){ + opus_uint32 w; + /*Start byte says this is a 4-byte sequence.*/ + w=(c0&7)<<18|(c1&0x3F)<<12|(c2&0x3F)<<6&(c3&0x3F); + if(w>=0x10000U&&w<0x110000U){ + /*This is a 4-byte sequence that is not overlong and not + greater than the largest valid Unicode code point. + Convert it to a surrogate pair.*/ + w-=0x10000; + dst[di++]=(wchar_t)(0xD800+(w>>10)); + dst[di++]=(wchar_t)(0xDC00+(w&0x3FF)); + si+=3; + continue; + } + } + } + } + } + } + } + } + /*If we got here, we encountered an illegal UTF-8 sequence.*/ + _ogg_free(dst); + return NULL; + } + OP_ASSERT(di<=len); + dst[di]='\0'; + } + return dst; +} + +#endif + +void *op_fopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode){ + FILE *fp; +#if !defined(_WIN32) + fp=fopen(_path,_mode); +#else + fp=NULL; + if(_path==NULL||_mode==NULL)errno=EINVAL; + else{ + wchar_t *wpath; + wchar_t *wmode; + wpath=op_utf8_to_utf16(_path); + wmode=op_utf8_to_utf16(_mode); + if(wmode==NULL)errno=EINVAL; + else if(wpath==NULL)errno=ENOENT; + else fp=_wfopen(wpath,wmode); + _ogg_free(wmode); + _ogg_free(wpath); + } +#endif + if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS; + return fp; +} + +void *op_fdopen(OpusFileCallbacks *_cb,int _fd,const char *_mode){ + FILE *fp; + fp=fdopen(_fd,_mode); + if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS; + return fp; +} + +void *op_freopen(OpusFileCallbacks *_cb,const char *_path,const char *_mode, + void *_stream){ + FILE *fp; +#if !defined(_WIN32) + fp=freopen(_path,_mode,(FILE *)_stream); +#else + fp=NULL; + if(_path==NULL||_mode==NULL)errno=EINVAL; + else{ + wchar_t *wpath; + wchar_t *wmode; + wpath=op_utf8_to_utf16(_path); + wmode=op_utf8_to_utf16(_mode); + if(wmode==NULL)errno=EINVAL; + else if(wpath==NULL)errno=ENOENT; + else fp=_wfreopen(wpath,wmode,(FILE *)_stream); + _ogg_free(wmode); + _ogg_free(wpath); + } +#endif + if(fp!=NULL)*_cb=*&OP_FILE_CALLBACKS; + return fp; +} + +static int op_mem_read(void *_stream,unsigned char *_ptr,int _buf_size){ + OpusMemStream *stream; + ptrdiff_t size; + ptrdiff_t pos; + stream=(OpusMemStream *)_stream; + /*Check for empty read.*/ + if(_buf_size<=0)return 0; + size=stream->size; + pos=stream->pos; + /*Check for EOF.*/ + if(pos>=size)return 0; + /*Check for a short read.*/ + _buf_size=(int)OP_MIN(size-pos,_buf_size); + memcpy(_ptr,stream->data+pos,_buf_size); + pos+=_buf_size; + stream->pos=pos; + return _buf_size; +} + +static int op_mem_seek(void *_stream,opus_int64 _offset,int _whence){ + OpusMemStream *stream; + ptrdiff_t pos; + stream=(OpusMemStream *)_stream; + pos=stream->pos; + OP_ASSERT(pos>=0); + switch(_whence){ + case SEEK_SET:{ + /*Check for overflow:*/ + if(_offset<0||_offset>OP_MEM_DIFF_MAX)return -1; + pos=(ptrdiff_t)_offset; + }break; + case SEEK_CUR:{ + /*Check for overflow:*/ + if(_offset<-pos||_offset>OP_MEM_DIFF_MAX-pos)return -1; + pos=(ptrdiff_t)(pos+_offset); + }break; + case SEEK_END:{ + ptrdiff_t size; + size=stream->size; + OP_ASSERT(size>=0); + /*Check for overflow:*/ + if(_offset>size||_offset<size-OP_MEM_DIFF_MAX)return -1; + pos=(ptrdiff_t)(size-_offset); + }break; + default:return -1; + } + stream->pos=pos; + return 0; +} + +static opus_int64 op_mem_tell(void *_stream){ + OpusMemStream *stream; + stream=(OpusMemStream *)_stream; + return (ogg_int64_t)stream->pos; +} + +static int op_mem_close(void *_stream){ + _ogg_free(_stream); + return 0; +} + +static const OpusFileCallbacks OP_MEM_CALLBACKS={ + op_mem_read, + op_mem_seek, + op_mem_tell, + op_mem_close +}; + +void *op_mem_stream_create(OpusFileCallbacks *_cb, + const unsigned char *_data,size_t _size){ + OpusMemStream *stream; + if(_size>OP_MEM_SIZE_MAX)return NULL; + stream=(OpusMemStream *)_ogg_malloc(sizeof(*stream)); + if(stream!=NULL){ + *_cb=*&OP_MEM_CALLBACKS; + stream->data=_data; + stream->size=_size; + stream->pos=0; + } + return stream; +} diff --git a/drivers/opus/tansig_table.h b/drivers/opus/tansig_table.h new file mode 100644 index 0000000000..c76f844a72 --- /dev/null +++ b/drivers/opus/tansig_table.h @@ -0,0 +1,45 @@ +/* This file is auto-generated by gen_tables */ + +static const float tansig_table[201] = { +0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f, +0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f, +0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f, +0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f, +0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f, +0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f, +0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f, +0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f, +0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f, +0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f, +0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f, +0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f, +0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f, +0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f, +0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f, +0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f, +0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f, +0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f, +0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f, +0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f, +0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f, +0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f, +0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f, +0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f, +0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f, +0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f, +0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f, +0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f, +0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f, +0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f, +0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f, +0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f, +0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f, +0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f, +0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f, +0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f, +0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f, +0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f, +1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, +1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, +1.000000f, +}; diff --git a/drivers/opus/wincerts.c b/drivers/opus/wincerts.c new file mode 100644 index 0000000000..568a085e43 --- /dev/null +++ b/drivers/opus/wincerts.c @@ -0,0 +1,171 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2013 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ********************************************************************/ + +/*This should really be part of OpenSSL, but there's been a patch [1] sitting + in their bugtracker for over two years that implements this, without any + action, so I'm giving up and re-implementing it locally. + + [1] <http://rt.openssl.org/Ticket/Display.html?id=2158>*/ + +#ifdef OPUS_HAVE_CONFIG_H +#include "opus_config.h" +#endif + +#include "internal.h" +#if defined(OP_ENABLE_HTTP)&&defined(_WIN32) +/*You must include windows.h before wincrypt.h and x509.h.*/ +# define WIN32_LEAN_AND_MEAN +# define WIN32_EXTRA_LEAN +# include <windows.h> +/*You must include wincrypt.h before x509.h, too, or X509_NAME doesn't get + defined properly.*/ +# include <wincrypt.h> +# include <openssl/ssl.h> +# include <openssl/err.h> +# include <openssl/x509.h> + +static int op_capi_new(X509_LOOKUP *_lu){ + HCERTSTORE h_store; + h_store=CertOpenStore(CERT_STORE_PROV_SYSTEM_A,0,0, + CERT_STORE_OPEN_EXISTING_FLAG|CERT_STORE_READONLY_FLAG| + CERT_SYSTEM_STORE_CURRENT_USER|CERT_STORE_SHARE_CONTEXT_FLAG,"ROOT"); + if(h_store!=NULL){ + _lu->method_data=(char *)h_store; + return 1; + } + return 0; +} + +static void op_capi_free(X509_LOOKUP *_lu){ + HCERTSTORE h_store; + h_store=(HCERTSTORE)_lu->method_data; +# if defined(OP_ENABLE_ASSERTIONS) + OP_ALWAYS_TRUE(CertCloseStore(h_store,CERT_CLOSE_STORE_CHECK_FLAG)); +# else + CertCloseStore(h_store,0); +# endif +} + +static int op_capi_retrieve_by_subject(X509_LOOKUP *_lu,int _type, + X509_NAME *_name,X509_OBJECT *_ret){ + X509_OBJECT *obj; + CRYPTO_w_lock(CRYPTO_LOCK_X509_STORE); + obj=X509_OBJECT_retrieve_by_subject(_lu->store_ctx->objs,_type,_name); + CRYPTO_w_unlock(CRYPTO_LOCK_X509_STORE); + if(obj!=NULL){ + _ret->type=obj->type; + memcpy(&_ret->data,&obj->data,sizeof(_ret->data)); + return 1; + } + return 0; +} + +static int op_capi_get_by_subject(X509_LOOKUP *_lu,int _type,X509_NAME *_name, + X509_OBJECT *_ret){ + HCERTSTORE h_store; + if(_name==NULL)return 0; + if(_name->bytes==NULL||_name->bytes->length<=0||_name->modified){ + if(i2d_X509_NAME(_name,NULL)<0)return 0; + OP_ASSERT(_name->bytes->length>0); + } + h_store=(HCERTSTORE)_lu->method_data; + switch(_type){ + case X509_LU_X509:{ + CERT_NAME_BLOB find_para; + PCCERT_CONTEXT cert; + X509 *x; + int ret; + /*Although X509_NAME contains a canon_enc field, that "canonical" [1] + encoding was just made up by OpenSSL. + It doesn't correspond to any actual standard, and since it drops the + initial sequence header, won't be recognized by the Crypto API. + The assumption here is that CertFindCertificateInStore() will allow any + appropriate variations in the encoding when it does its comparison. + This is, however, emphatically not true under Wine, which just compares + the encodings with memcmp(). + Most of the time things work anyway, though, and there isn't really + anything we can do to make the situation better. + + [1] A "canonical form" is defined as the one where, if you locked 10 + mathematicians in a room and asked them to come up with a + representation for something, it's the answer that 9 of them would + give you back. + I don't think OpenSSL's encoding qualifies.*/ + find_para.cbData=_name->bytes->length; + find_para.pbData=(unsigned char *)_name->bytes->data; + cert=CertFindCertificateInStore(h_store,X509_ASN_ENCODING,0, + CERT_FIND_SUBJECT_NAME,&find_para,NULL); + if(cert==NULL)return 0; + x=d2i_X509(NULL,(const unsigned char **)&cert->pbCertEncoded, + cert->cbCertEncoded); + CertFreeCertificateContext(cert); + if(x==NULL)return 0; + ret=X509_STORE_add_cert(_lu->store_ctx,x); + X509_free(x); + if(ret)return op_capi_retrieve_by_subject(_lu,_type,_name,_ret); + }break; + case X509_LU_CRL:{ + CERT_INFO cert_info; + CERT_CONTEXT find_para; + PCCRL_CONTEXT crl; + X509_CRL *x; + int ret; + ret=op_capi_retrieve_by_subject(_lu,_type,_name,_ret); + if(ret>0)return ret; + memset(&cert_info,0,sizeof(cert_info)); + cert_info.Issuer.cbData=_name->bytes->length; + cert_info.Issuer.pbData=(unsigned char *)_name->bytes->data; + memset(&find_para,0,sizeof(find_para)); + find_para.pCertInfo=&cert_info; + crl=CertFindCRLInStore(h_store,0,0,CRL_FIND_ISSUED_BY,&find_para,NULL); + if(crl==NULL)return 0; + x=d2i_X509_CRL(NULL,(const unsigned char **)&crl->pbCrlEncoded, + crl->cbCrlEncoded); + CertFreeCRLContext(crl); + if(x==NULL)return 0; + ret=X509_STORE_add_crl(_lu->store_ctx,x); + X509_CRL_free(x); + if(ret)return op_capi_retrieve_by_subject(_lu,_type,_name,_ret); + }break; + } + return 0; +} + +/*This is not const because OpenSSL doesn't allow it, even though it won't + write to it.*/ +static X509_LOOKUP_METHOD X509_LOOKUP_CAPI={ + "Load Crypto API store into cache", + op_capi_new, + op_capi_free, + NULL, + NULL, + NULL, + op_capi_get_by_subject, + NULL, + NULL, + NULL +}; + +int SSL_CTX_set_default_verify_paths_win32(SSL_CTX *_ssl_ctx){ + X509_STORE *store; + X509_LOOKUP *lu; + /*We intentionally do not add the normal default paths, as they are usually + wrong, and are just asking to be used as an exploit vector.*/ + store=SSL_CTX_get_cert_store(_ssl_ctx); + OP_ASSERT(store!=NULL); + lu=X509_STORE_add_lookup(store,&X509_LOOKUP_CAPI); + if(lu==NULL)return 0; + ERR_clear_error(); + return 1; +} + +#endif diff --git a/drivers/opus/winerrno.h b/drivers/opus/winerrno.h new file mode 100644 index 0000000000..32a90b4ee1 --- /dev/null +++ b/drivers/opus/winerrno.h @@ -0,0 +1,90 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE libopusfile SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE libopusfile SOURCE CODE IS (C) COPYRIGHT 2012 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ********************************************************************/ +#if !defined(_opusfile_winerrno_h) +# define _opusfile_winerrno_h (1) + +# include <errno.h> +# include <winerror.h> + +/*These conflict with the MSVC errno.h definitions, but we don't need to use + the original ones in any file that deals with sockets. + We could map the WSA errors to the errno.h ones (most of which are only + available on sufficiently new versions of MSVC), but they aren't ordered the + same, and given how rarely we actually look at the values, I don't think + it's worth a lookup table.*/ +# undef EWOULDBLOCK +# undef EINPROGRESS +# undef EALREADY +# undef ENOTSOCK +# undef EDESTADDRREQ +# undef EMSGSIZE +# undef EPROTOTYPE +# undef ENOPROTOOPT +# undef EPROTONOSUPPORT +# undef EOPNOTSUPP +# undef EAFNOSUPPORT +# undef EADDRINUSE +# undef EADDRNOTAVAIL +# undef ENETDOWN +# undef ENETUNREACH +# undef ENETRESET +# undef ECONNABORTED +# undef ECONNRESET +# undef ENOBUFS +# undef EISCONN +# undef ENOTCONN +# undef ETIMEDOUT +# undef ECONNREFUSED +# undef ELOOP +# undef ENAMETOOLONG +# undef EHOSTUNREACH +# undef ENOTEMPTY + +# define EWOULDBLOCK (WSAEWOULDBLOCK-WSABASEERR) +# define EINPROGRESS (WSAEINPROGRESS-WSABASEERR) +# define EALREADY (WSAEALREADY-WSABASEERR) +# define ENOTSOCK (WSAENOTSOCK-WSABASEERR) +# define EDESTADDRREQ (WSAEDESTADDRREQ-WSABASEERR) +# define EMSGSIZE (WSAEMSGSIZE-WSABASEERR) +# define EPROTOTYPE (WSAEPROTOTYPE-WSABASEERR) +# define ENOPROTOOPT (WSAENOPROTOOPT-WSABASEERR) +# define EPROTONOSUPPORT (WSAEPROTONOSUPPORT-WSABASEERR) +# define ESOCKTNOSUPPORT (WSAESOCKTNOSUPPORT-WSABASEERR) +# define EOPNOTSUPP (WSAEOPNOTSUPP-WSABASEERR) +# define EPFNOSUPPORT (WSAEPFNOSUPPORT-WSABASEERR) +# define EAFNOSUPPORT (WSAEAFNOSUPPORT-WSABASEERR) +# define EADDRINUSE (WSAEADDRINUSE-WSABASEERR) +# define EADDRNOTAVAIL (WSAEADDRNOTAVAIL-WSABASEERR) +# define ENETDOWN (WSAENETDOWN-WSABASEERR) +# define ENETUNREACH (WSAENETUNREACH-WSABASEERR) +# define ENETRESET (WSAENETRESET-WSABASEERR) +# define ECONNABORTED (WSAECONNABORTED-WSABASEERR) +# define ECONNRESET (WSAECONNRESET-WSABASEERR) +# define ENOBUFS (WSAENOBUFS-WSABASEERR) +# define EISCONN (WSAEISCONN-WSABASEERR) +# define ENOTCONN (WSAENOTCONN-WSABASEERR) +# define ESHUTDOWN (WSAESHUTDOWN-WSABASEERR) +# define ETOOMANYREFS (WSAETOOMANYREFS-WSABASEERR) +# define ETIMEDOUT (WSAETIMEDOUT-WSABASEERR) +# define ECONNREFUSED (WSAECONNREFUSED-WSABASEERR) +# define ELOOP (WSAELOOP-WSABASEERR) +# define ENAMETOOLONG (WSAENAMETOOLONG-WSABASEERR) +# define EHOSTDOWN (WSAEHOSTDOWN-WSABASEERR) +# define EHOSTUNREACH (WSAEHOSTUNREACH-WSABASEERR) +# define ENOTEMPTY (WSAENOTEMPTY-WSABASEERR) +# define EPROCLIM (WSAEPROCLIM-WSABASEERR) +# define EUSERS (WSAEUSERS-WSABASEERR) +# define EDQUOT (WSAEDQUOT-WSABASEERR) +# define ESTALE (WSAESTALE-WSABASEERR) +# define EREMOTE (WSAEREMOTE-WSABASEERR) + +#endif diff --git a/drivers/pulseaudio/audio_driver_pulseaudio.cpp b/drivers/pulseaudio/audio_driver_pulseaudio.cpp index dfe9ddc55f..4cda141f92 100644 --- a/drivers/pulseaudio/audio_driver_pulseaudio.cpp +++ b/drivers/pulseaudio/audio_driver_pulseaudio.cpp @@ -82,6 +82,17 @@ Error AudioDriverPulseAudio::init() { return OK; } +float AudioDriverPulseAudio::get_latency() { + + if (latency==0) { //only do this once since it's approximate anyway + int error_code; + pa_usec_t palat = pa_simple_get_latency( pulse,&error_code); + latency=double(palat)/1000000.0; + } + + return latency; +} + void AudioDriverPulseAudio::thread_func(void* p_udata) { AudioDriverPulseAudio* ad = (AudioDriverPulseAudio*)p_udata; @@ -121,6 +132,7 @@ void AudioDriverPulseAudio::thread_func(void* p_udata) { ad->exit_thread = true; break; } + } ad->thread_exited = true; @@ -185,6 +197,7 @@ AudioDriverPulseAudio::AudioDriverPulseAudio() { mutex = NULL; thread = NULL; pulse = NULL; + latency=0; } AudioDriverPulseAudio::~AudioDriverPulseAudio() { diff --git a/drivers/pulseaudio/audio_driver_pulseaudio.h b/drivers/pulseaudio/audio_driver_pulseaudio.h index e82e0c24be..e7c8bcce36 100644 --- a/drivers/pulseaudio/audio_driver_pulseaudio.h +++ b/drivers/pulseaudio/audio_driver_pulseaudio.h @@ -58,6 +58,8 @@ class AudioDriverPulseAudio : public AudioDriverSW { mutable bool exit_thread; bool pcm_open; + float latency; + public: const char* get_name() const { @@ -72,6 +74,9 @@ public: virtual void unlock(); virtual void finish(); + virtual float get_latency(); + + AudioDriverPulseAudio(); ~AudioDriverPulseAudio(); }; diff --git a/drivers/register_driver_types.cpp b/drivers/register_driver_types.cpp index 01f6a8b5b0..2647d23011 100644 --- a/drivers/register_driver_types.cpp +++ b/drivers/register_driver_types.cpp @@ -37,13 +37,19 @@ #include "vorbis/audio_stream_ogg_vorbis.h" #endif +#ifdef OPUS_ENABLED +#include "opus/audio_stream_opus.h" +#endif #ifdef SPEEX_ENABLED #include "speex/audio_stream_speex.h" #endif #ifdef THEORA_ENABLED -//#include "theora/video_stream_theora.h" +#include "theora/video_stream_theora.h" +#endif + +#ifdef THEORAPLAYER_ENABLED #include "theoraplayer/video_stream_theoraplayer.h" #endif @@ -85,12 +91,19 @@ static ResourceFormatLoaderAudioStreamOGG *vorbis_stream_loader=NULL; static ResourceFormatLoaderAudioStreamOGGVorbis *vorbis_stream_loader=NULL; #endif +#ifdef OPUS_ENABLED +static ResourceFormatLoaderAudioStreamOpus *opus_stream_loader=NULL; +#endif + #ifdef SPEEX_ENABLED static ResourceFormatLoaderAudioStreamSpeex *speex_stream_loader=NULL; #endif #ifdef THEORA_ENABLED -//static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL; +static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL; +#endif + +#ifdef THEORAPLAYER_ENABLED static ResourceFormatLoaderVideoStreamTheoraplayer* theoraplayer_stream_loader = NULL; #endif @@ -169,6 +182,11 @@ void register_driver_types() { ObjectTypeDB::register_type<AudioStreamOGGVorbis>(); #endif +#ifdef OPUS_ENABLED + opus_stream_loader=memnew( ResourceFormatLoaderAudioStreamOpus ); + ResourceLoader::add_resource_format_loader( opus_stream_loader ); + ObjectTypeDB::register_type<AudioStreamOpus>(); +#endif #ifdef DDS_ENABLED resource_loader_dds = memnew( ResourceFormatDDS ); @@ -205,9 +223,12 @@ void register_driver_types() { #endif #ifdef THEORA_ENABLED - //theora_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheora ); - //ResourceLoader::add_resource_format_loader(theora_stream_loader); - //ObjectTypeDB::register_type<VideoStreamTheora>(); + theora_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheora ); + ResourceLoader::add_resource_format_loader(theora_stream_loader); + ObjectTypeDB::register_type<VideoStreamTheora>(); +#endif + +#ifdef THEORAPLAYER_ENABLED theoraplayer_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheoraplayer ); ResourceLoader::add_resource_format_loader(theoraplayer_stream_loader); ObjectTypeDB::register_type<VideoStreamTheoraplayer>(); @@ -239,12 +260,19 @@ void unregister_driver_types() { memdelete( vorbis_stream_loader ); #endif +#ifdef OPUS_ENABLED + memdelete( opus_stream_loader ); +#endif + #ifdef SPEEX_ENABLED memdelete( speex_stream_loader ); #endif #ifdef THEORA_ENABLED - //memdelete (theora_stream_loader); + memdelete (theora_stream_loader); +#endif + +#ifdef THEORAPLAYER_ENABLED memdelete (theoraplayer_stream_loader); #endif diff --git a/drivers/rtaudio/RtAudio.cpp b/drivers/rtaudio/RtAudio.cpp index 8876f72e21..72ca836907 100644 --- a/drivers/rtaudio/RtAudio.cpp +++ b/drivers/rtaudio/RtAudio.cpp @@ -1,10234 +1,10234 @@ -#ifdef RTAUDIO_ENABLED
-/************************************************************************/
-/*! \class RtAudio
- \brief Realtime audio i/o C++ classes.
-
- RtAudio provides a common API (Application Programming Interface)
- for realtime audio input/output across Linux (native ALSA, Jack,
- and OSS), Macintosh OS X (CoreAudio and Jack), and Windows
- (DirectSound, ASIO and WASAPI) operating systems.
-
- RtAudio WWW site: http://www.music.mcgill.ca/~gary/rtaudio/
-
- RtAudio: realtime audio i/o C++ classes
- Copyright (c) 2001-2014 Gary P. Scavone
-
- Permission is hereby granted, free of charge, to any person
- obtaining a copy of this software and associated documentation files
- (the "Software"), to deal in the Software without restriction,
- including without limitation the rights to use, copy, modify, merge,
- publish, distribute, sublicense, and/or sell copies of the Software,
- and to permit persons to whom the Software is furnished to do so,
- subject to the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- Any person wishing to distribute modifications to the Software is
- asked to send the modifications to the original developer so that
- they can be incorporated into the canonical version. This is,
- however, not a binding provision of this license.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
- ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
- CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-/************************************************************************/
-
-// RtAudio: Version 4.1.1
-
-#include "RtAudio.h"
-#include <iostream>
-#include <cstdlib>
-#include <cstring>
-#include <climits>
-#include <algorithm>
-
-// Static variable definitions.
-const unsigned int RtApi::MAX_SAMPLE_RATES = 14;
-const unsigned int RtApi::SAMPLE_RATES[] = {
- 4000, 5512, 8000, 9600, 11025, 16000, 22050,
- 32000, 44100, 48000, 88200, 96000, 176400, 192000
-};
-
-#if defined(__WINDOWS_DS__) || defined(__WINDOWS_ASIO__) || defined(__WINDOWS_WASAPI__)
-#ifdef WINRT_ENABLED
- #define MUTEX_INITIALIZE(A) InitializeCriticalSectionEx(A, 0, 0)
-#else
- #define MUTEX_INITIALIZE(A) InitializeCriticalSection(A)
-#endif
- #define MUTEX_DESTROY(A) DeleteCriticalSection(A)
- #define MUTEX_LOCK(A) EnterCriticalSection(A)
- #define MUTEX_UNLOCK(A) LeaveCriticalSection(A)
-
- #include "tchar.h"
-
- static std::string convertCharPointerToStdString(const char *text)
- {
- return std::string(text);
- }
-
- static std::string convertCharPointerToStdString(const wchar_t *text)
- {
- int length = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL);
- std::string s( length-1, '\0' );
- WideCharToMultiByte(CP_UTF8, 0, text, -1, &s[0], length, NULL, NULL);
- return s;
- }
-
-#elif defined(__LINUX_ALSA__) || defined(__LINUX_PULSE__) || defined(__UNIX_JACK__) || defined(__LINUX_OSS__) || defined(__MACOSX_CORE__)
- // pthread API
- #define MUTEX_INITIALIZE(A) pthread_mutex_init(A, NULL)
- #define MUTEX_DESTROY(A) pthread_mutex_destroy(A)
- #define MUTEX_LOCK(A) pthread_mutex_lock(A)
- #define MUTEX_UNLOCK(A) pthread_mutex_unlock(A)
-#else
- #define MUTEX_INITIALIZE(A) abs(*A) // dummy definitions
- #define MUTEX_DESTROY(A) abs(*A) // dummy definitions
-#endif
-
-// *************************************************** //
-//
-// RtAudio definitions.
-//
-// *************************************************** //
-
-std::string RtAudio :: getVersion( void ) throw()
-{
- return RTAUDIO_VERSION;
-}
-
-void RtAudio :: getCompiledApi( std::vector<RtAudio::Api> &apis ) throw()
-{
- apis.clear();
-
- // The order here will control the order of RtAudio's API search in
- // the constructor.
-#if defined(__UNIX_JACK__)
- apis.push_back( UNIX_JACK );
-#endif
-#if defined(__LINUX_ALSA__)
- apis.push_back( LINUX_ALSA );
-#endif
-#if defined(__LINUX_PULSE__)
- apis.push_back( LINUX_PULSE );
-#endif
-#if defined(__LINUX_OSS__)
- apis.push_back( LINUX_OSS );
-#endif
-#if defined(__WINDOWS_ASIO__)
- apis.push_back( WINDOWS_ASIO );
-#endif
-#if defined(__WINDOWS_WASAPI__)
- apis.push_back( WINDOWS_WASAPI );
-#endif
-#if defined(__WINDOWS_DS__)
- apis.push_back( WINDOWS_DS );
-#endif
-#if defined(__MACOSX_CORE__)
- apis.push_back( MACOSX_CORE );
-#endif
-#if defined(__RTAUDIO_DUMMY__)
- apis.push_back( RTAUDIO_DUMMY );
-#endif
-}
-
-void RtAudio :: openRtApi( RtAudio::Api api )
-{
- if ( rtapi_ )
- delete rtapi_;
- rtapi_ = 0;
-
-#if defined(__UNIX_JACK__)
- if ( api == UNIX_JACK )
- rtapi_ = new RtApiJack();
-#endif
-#if defined(__LINUX_ALSA__)
- if ( api == LINUX_ALSA )
- rtapi_ = new RtApiAlsa();
-#endif
-#if defined(__LINUX_PULSE__)
- if ( api == LINUX_PULSE )
- rtapi_ = new RtApiPulse();
-#endif
-#if defined(__LINUX_OSS__)
- if ( api == LINUX_OSS )
- rtapi_ = new RtApiOss();
-#endif
-#if defined(__WINDOWS_ASIO__)
- if ( api == WINDOWS_ASIO )
- rtapi_ = new RtApiAsio();
-#endif
-#if defined(__WINDOWS_WASAPI__)
- if ( api == WINDOWS_WASAPI )
- rtapi_ = new RtApiWasapi();
-#endif
-#if defined(__WINDOWS_DS__)
- if ( api == WINDOWS_DS )
- rtapi_ = new RtApiDs();
-#endif
-#if defined(__MACOSX_CORE__)
- if ( api == MACOSX_CORE )
- rtapi_ = new RtApiCore();
-#endif
-#if defined(__RTAUDIO_DUMMY__)
- if ( api == RTAUDIO_DUMMY )
- rtapi_ = new RtApiDummy();
-#endif
-}
-
-RtAudio :: RtAudio( RtAudio::Api api )
-{
- rtapi_ = 0;
-
- if ( api != UNSPECIFIED ) {
- // Attempt to open the specified API.
- openRtApi( api );
- if ( rtapi_ ) return;
-
- // No compiled support for specified API value. Issue a debug
- // warning and continue as if no API was specified.
- std::cerr << "\nRtAudio: no compiled support for specified API argument!\n" << std::endl;
- }
-
- // Iterate through the compiled APIs and return as soon as we find
- // one with at least one device or we reach the end of the list.
- std::vector< RtAudio::Api > apis;
- getCompiledApi( apis );
- for ( unsigned int i=0; i<apis.size(); i++ ) {
- openRtApi( apis[i] );
- if ( rtapi_ && rtapi_->getDeviceCount() ) break;
- }
-
- if ( rtapi_ ) return;
-
- // It should not be possible to get here because the preprocessor
- // definition __RTAUDIO_DUMMY__ is automatically defined if no
- // API-specific definitions are passed to the compiler. But just in
- // case something weird happens, we'll thow an error.
- std::string errorText = "\nRtAudio: no compiled API support found ... critical error!!\n\n";
- throw( RtAudioError( errorText, RtAudioError::UNSPECIFIED ) );
-}
-
-RtAudio :: ~RtAudio() throw()
-{
- if ( rtapi_ )
- delete rtapi_;
-}
-
-void RtAudio :: openStream( RtAudio::StreamParameters *outputParameters,
- RtAudio::StreamParameters *inputParameters,
- RtAudioFormat format, unsigned int sampleRate,
- unsigned int *bufferFrames,
- RtAudioCallback callback, void *userData,
- RtAudio::StreamOptions *options,
- RtAudioErrorCallback errorCallback )
-{
- return rtapi_->openStream( outputParameters, inputParameters, format,
- sampleRate, bufferFrames, callback,
- userData, options, errorCallback );
-}
-
-// *************************************************** //
-//
-// Public RtApi definitions (see end of file for
-// private or protected utility functions).
-//
-// *************************************************** //
-
-RtApi :: RtApi()
-{
- stream_.state = STREAM_CLOSED;
- stream_.mode = UNINITIALIZED;
- stream_.apiHandle = 0;
- stream_.userBuffer[0] = 0;
- stream_.userBuffer[1] = 0;
- MUTEX_INITIALIZE( &stream_.mutex );
- showWarnings_ = true;
- firstErrorOccurred_ = false;
-}
-
-RtApi :: ~RtApi()
-{
- MUTEX_DESTROY( &stream_.mutex );
-}
-
-void RtApi :: openStream( RtAudio::StreamParameters *oParams,
- RtAudio::StreamParameters *iParams,
- RtAudioFormat format, unsigned int sampleRate,
- unsigned int *bufferFrames,
- RtAudioCallback callback, void *userData,
- RtAudio::StreamOptions *options,
- RtAudioErrorCallback errorCallback )
-{
- if ( stream_.state != STREAM_CLOSED ) {
- errorText_ = "RtApi::openStream: a stream is already open!";
- error( RtAudioError::INVALID_USE );
- return;
- }
-
- // Clear stream information potentially left from a previously open stream.
- clearStreamInfo();
-
- if ( oParams && oParams->nChannels < 1 ) {
- errorText_ = "RtApi::openStream: a non-NULL output StreamParameters structure cannot have an nChannels value less than one.";
- error( RtAudioError::INVALID_USE );
- return;
- }
-
- if ( iParams && iParams->nChannels < 1 ) {
- errorText_ = "RtApi::openStream: a non-NULL input StreamParameters structure cannot have an nChannels value less than one.";
- error( RtAudioError::INVALID_USE );
- return;
- }
-
- if ( oParams == NULL && iParams == NULL ) {
- errorText_ = "RtApi::openStream: input and output StreamParameters structures are both NULL!";
- error( RtAudioError::INVALID_USE );
- return;
- }
-
- if ( formatBytes(format) == 0 ) {
- errorText_ = "RtApi::openStream: 'format' parameter value is undefined.";
- error( RtAudioError::INVALID_USE );
- return;
- }
-
- unsigned int nDevices = getDeviceCount();
- unsigned int oChannels = 0;
- if ( oParams ) {
- oChannels = oParams->nChannels;
- if ( oParams->deviceId >= nDevices ) {
- errorText_ = "RtApi::openStream: output device parameter value is invalid.";
- error( RtAudioError::INVALID_USE );
- return;
- }
- }
-
- unsigned int iChannels = 0;
- if ( iParams ) {
- iChannels = iParams->nChannels;
- if ( iParams->deviceId >= nDevices ) {
- errorText_ = "RtApi::openStream: input device parameter value is invalid.";
- error( RtAudioError::INVALID_USE );
- return;
- }
- }
-
- bool result;
-
- if ( oChannels > 0 ) {
-
- result = probeDeviceOpen( oParams->deviceId, OUTPUT, oChannels, oParams->firstChannel,
- sampleRate, format, bufferFrames, options );
- if ( result == false ) {
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- }
-
- if ( iChannels > 0 ) {
-
- result = probeDeviceOpen( iParams->deviceId, INPUT, iChannels, iParams->firstChannel,
- sampleRate, format, bufferFrames, options );
- if ( result == false ) {
- if ( oChannels > 0 ) closeStream();
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- }
-
- stream_.callbackInfo.callback = (void *) callback;
- stream_.callbackInfo.userData = userData;
- stream_.callbackInfo.errorCallback = (void *) errorCallback;
-
- if ( options ) options->numberOfBuffers = stream_.nBuffers;
- stream_.state = STREAM_STOPPED;
-}
-
-unsigned int RtApi :: getDefaultInputDevice( void )
-{
- // Should be implemented in subclasses if possible.
- return 0;
-}
-
-unsigned int RtApi :: getDefaultOutputDevice( void )
-{
- // Should be implemented in subclasses if possible.
- return 0;
-}
-
-void RtApi :: closeStream( void )
-{
- // MUST be implemented in subclasses!
- return;
-}
-
-bool RtApi :: probeDeviceOpen( unsigned int /*device*/, StreamMode /*mode*/, unsigned int /*channels*/,
- unsigned int /*firstChannel*/, unsigned int /*sampleRate*/,
- RtAudioFormat /*format*/, unsigned int * /*bufferSize*/,
- RtAudio::StreamOptions * /*options*/ )
-{
- // MUST be implemented in subclasses!
- return FAILURE;
-}
-
-void RtApi :: tickStreamTime( void )
-{
- // Subclasses that do not provide their own implementation of
- // getStreamTime should call this function once per buffer I/O to
- // provide basic stream time support.
-
- stream_.streamTime += ( stream_.bufferSize * 1.0 / stream_.sampleRate );
-
-#if defined( HAVE_GETTIMEOFDAY )
- gettimeofday( &stream_.lastTickTimestamp, NULL );
-#endif
-}
-
-long RtApi :: getStreamLatency( void )
-{
- verifyStream();
-
- long totalLatency = 0;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
- totalLatency = stream_.latency[0];
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX )
- totalLatency += stream_.latency[1];
-
- return totalLatency;
-}
-
-double RtApi :: getStreamTime( void )
-{
- verifyStream();
-
-#if defined( HAVE_GETTIMEOFDAY )
- // Return a very accurate estimate of the stream time by
- // adding in the elapsed time since the last tick.
- struct timeval then;
- struct timeval now;
-
- if ( stream_.state != STREAM_RUNNING || stream_.streamTime == 0.0 )
- return stream_.streamTime;
-
- gettimeofday( &now, NULL );
- then = stream_.lastTickTimestamp;
- return stream_.streamTime +
- ((now.tv_sec + 0.000001 * now.tv_usec) -
- (then.tv_sec + 0.000001 * then.tv_usec));
-#else
- return stream_.streamTime;
-#endif
-}
-
-void RtApi :: setStreamTime( double time )
-{
- verifyStream();
-
- if ( time >= 0.0 )
- stream_.streamTime = time;
-}
-
-unsigned int RtApi :: getStreamSampleRate( void )
-{
- verifyStream();
-
- return stream_.sampleRate;
-}
-
-
-// *************************************************** //
-//
-// OS/API-specific methods.
-//
-// *************************************************** //
-
-#if defined(__MACOSX_CORE__)
-
-// The OS X CoreAudio API is designed to use a separate callback
-// procedure for each of its audio devices. A single RtAudio duplex
-// stream using two different devices is supported here, though it
-// cannot be guaranteed to always behave correctly because we cannot
-// synchronize these two callbacks.
-//
-// A property listener is installed for over/underrun information.
-// However, no functionality is currently provided to allow property
-// listeners to trigger user handlers because it is unclear what could
-// be done if a critical stream parameter (buffer size, sample rate,
-// device disconnect) notification arrived. The listeners entail
-// quite a bit of extra code and most likely, a user program wouldn't
-// be prepared for the result anyway. However, we do provide a flag
-// to the client callback function to inform of an over/underrun.
-
-// A structure to hold various information related to the CoreAudio API
-// implementation.
-struct CoreHandle {
- AudioDeviceID id[2]; // device ids
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
- AudioDeviceIOProcID procId[2];
-#endif
- UInt32 iStream[2]; // device stream index (or first if using multiple)
- UInt32 nStreams[2]; // number of streams to use
- bool xrun[2];
- char *deviceBuffer;
- pthread_cond_t condition;
- int drainCounter; // Tracks callback counts when draining
- bool internalDrain; // Indicates if stop is initiated from callback or not.
-
- CoreHandle()
- :deviceBuffer(0), drainCounter(0), internalDrain(false) { nStreams[0] = 1; nStreams[1] = 1; id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; }
-};
-
-RtApiCore:: RtApiCore()
-{
-#if defined( AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER )
- // This is a largely undocumented but absolutely necessary
- // requirement starting with OS-X 10.6. If not called, queries and
- // updates to various audio device properties are not handled
- // correctly.
- CFRunLoopRef theRunLoop = NULL;
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyRunLoop,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectSetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, sizeof(CFRunLoopRef), &theRunLoop);
- if ( result != noErr ) {
- errorText_ = "RtApiCore::RtApiCore: error setting run loop property!";
- error( RtAudioError::WARNING );
- }
-#endif
-}
-
-RtApiCore :: ~RtApiCore()
-{
- // The subclass destructor gets called before the base class
- // destructor, so close an existing stream before deallocating
- // apiDeviceId memory.
- if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiCore :: getDeviceCount( void )
-{
- // Find out how many audio devices there are, if any.
- UInt32 dataSize;
- AudioObjectPropertyAddress propertyAddress = { kAudioHardwarePropertyDevices, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectGetPropertyDataSize( kAudioObjectSystemObject, &propertyAddress, 0, NULL, &dataSize );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDeviceCount: OS-X error getting device info!";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- return dataSize / sizeof( AudioDeviceID );
-}
-
-unsigned int RtApiCore :: getDefaultInputDevice( void )
-{
- unsigned int nDevices = getDeviceCount();
- if ( nDevices <= 1 ) return 0;
-
- AudioDeviceID id;
- UInt32 dataSize = sizeof( AudioDeviceID );
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultInputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- dataSize *= nDevices;
- AudioDeviceID deviceList[ nDevices ];
- property.mSelector = kAudioHardwarePropertyDevices;
- result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device IDs.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- for ( unsigned int i=0; i<nDevices; i++ )
- if ( id == deviceList[i] ) return i;
-
- errorText_ = "RtApiCore::getDefaultInputDevice: No default device found!";
- error( RtAudioError::WARNING );
- return 0;
-}
-
-unsigned int RtApiCore :: getDefaultOutputDevice( void )
-{
- unsigned int nDevices = getDeviceCount();
- if ( nDevices <= 1 ) return 0;
-
- AudioDeviceID id;
- UInt32 dataSize = sizeof( AudioDeviceID );
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultOutputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- dataSize = sizeof( AudioDeviceID ) * nDevices;
- AudioDeviceID deviceList[ nDevices ];
- property.mSelector = kAudioHardwarePropertyDevices;
- result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device IDs.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- for ( unsigned int i=0; i<nDevices; i++ )
- if ( id == deviceList[i] ) return i;
-
- errorText_ = "RtApiCore::getDefaultOutputDevice: No default device found!";
- error( RtAudioError::WARNING );
- return 0;
-}
-
-RtAudio::DeviceInfo RtApiCore :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- // Get device ID
- unsigned int nDevices = getDeviceCount();
- if ( nDevices == 0 ) {
- errorText_ = "RtApiCore::getDeviceInfo: no devices found!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- if ( device >= nDevices ) {
- errorText_ = "RtApiCore::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- AudioDeviceID deviceList[ nDevices ];
- UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices;
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property,
- 0, NULL, &dataSize, (void *) &deviceList );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::getDeviceInfo: OS-X system error getting device IDs.";
- error( RtAudioError::WARNING );
- return info;
- }
-
- AudioDeviceID id = deviceList[ device ];
-
- // Get the device name.
- info.name.erase();
- CFStringRef cfname;
- dataSize = sizeof( CFStringRef );
- property.mSelector = kAudioObjectPropertyManufacturer;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device manufacturer.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- //const char *mname = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() );
- int length = CFStringGetLength(cfname);
- char *mname = (char *)malloc(length * 3 + 1);
-#if defined( UNICODE ) || defined( _UNICODE )
- CFStringGetCString(cfname, mname, length * 3 + 1, kCFStringEncodingUTF8);
-#else
- CFStringGetCString(cfname, mname, length * 3 + 1, CFStringGetSystemEncoding());
-#endif
- info.name.append( (const char *)mname, strlen(mname) );
- info.name.append( ": " );
- CFRelease( cfname );
- free(mname);
-
- property.mSelector = kAudioObjectPropertyName;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device name.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- //const char *name = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() );
- length = CFStringGetLength(cfname);
- char *name = (char *)malloc(length * 3 + 1);
-#if defined( UNICODE ) || defined( _UNICODE )
- CFStringGetCString(cfname, name, length * 3 + 1, kCFStringEncodingUTF8);
-#else
- CFStringGetCString(cfname, name, length * 3 + 1, CFStringGetSystemEncoding());
-#endif
- info.name.append( (const char *)name, strlen(name) );
- CFRelease( cfname );
- free(name);
-
- // Get the output stream "configuration".
- AudioBufferList *bufferList = nil;
- property.mSelector = kAudioDevicePropertyStreamConfiguration;
- property.mScope = kAudioDevicePropertyScopeOutput;
- // property.mElement = kAudioObjectPropertyElementWildcard;
- dataSize = 0;
- result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
- if ( result != noErr || dataSize == 0 ) {
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration info for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Allocate the AudioBufferList.
- bufferList = (AudioBufferList *) malloc( dataSize );
- if ( bufferList == NULL ) {
- errorText_ = "RtApiCore::getDeviceInfo: memory error allocating output AudioBufferList.";
- error( RtAudioError::WARNING );
- return info;
- }
-
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
- if ( result != noErr || dataSize == 0 ) {
- free( bufferList );
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Get output channel information.
- unsigned int i, nStreams = bufferList->mNumberBuffers;
- for ( i=0; i<nStreams; i++ )
- info.outputChannels += bufferList->mBuffers[i].mNumberChannels;
- free( bufferList );
-
- // Get the input stream "configuration".
- property.mScope = kAudioDevicePropertyScopeInput;
- result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
- if ( result != noErr || dataSize == 0 ) {
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration info for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Allocate the AudioBufferList.
- bufferList = (AudioBufferList *) malloc( dataSize );
- if ( bufferList == NULL ) {
- errorText_ = "RtApiCore::getDeviceInfo: memory error allocating input AudioBufferList.";
- error( RtAudioError::WARNING );
- return info;
- }
-
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
- if (result != noErr || dataSize == 0) {
- free( bufferList );
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Get input channel information.
- nStreams = bufferList->mNumberBuffers;
- for ( i=0; i<nStreams; i++ )
- info.inputChannels += bufferList->mBuffers[i].mNumberChannels;
- free( bufferList );
-
- // If device opens for both playback and capture, we determine the channels.
- if ( info.outputChannels > 0 && info.inputChannels > 0 )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
- // Probe the device sample rates.
- bool isInput = false;
- if ( info.outputChannels == 0 ) isInput = true;
-
- // Determine the supported sample rates.
- property.mSelector = kAudioDevicePropertyAvailableNominalSampleRates;
- if ( isInput == false ) property.mScope = kAudioDevicePropertyScopeOutput;
- result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
- if ( result != kAudioHardwareNoError || dataSize == 0 ) {
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rate info.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- UInt32 nRanges = dataSize / sizeof( AudioValueRange );
- AudioValueRange rangeList[ nRanges ];
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &rangeList );
- if ( result != kAudioHardwareNoError ) {
- errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rates.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // The sample rate reporting mechanism is a bit of a mystery. It
- // seems that it can either return individual rates or a range of
- // rates. I assume that if the min / max range values are the same,
- // then that represents a single supported rate and if the min / max
- // range values are different, the device supports an arbitrary
- // range of values (though there might be multiple ranges, so we'll
- // use the most conservative range).
- Float64 minimumRate = 1.0, maximumRate = 10000000000.0;
- bool haveValueRange = false;
- info.sampleRates.clear();
- for ( UInt32 i=0; i<nRanges; i++ ) {
- if ( rangeList[i].mMinimum == rangeList[i].mMaximum ) {
- unsigned int tmpSr = (unsigned int) rangeList[i].mMinimum;
- info.sampleRates.push_back( tmpSr );
-
- if ( !info.preferredSampleRate || ( tmpSr <= 48000 && tmpSr > info.preferredSampleRate ) )
- info.preferredSampleRate = tmpSr;
-
- } else {
- haveValueRange = true;
- if ( rangeList[i].mMinimum > minimumRate ) minimumRate = rangeList[i].mMinimum;
- if ( rangeList[i].mMaximum < maximumRate ) maximumRate = rangeList[i].mMaximum;
- }
- }
-
- if ( haveValueRange ) {
- for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
- if ( SAMPLE_RATES[k] >= (unsigned int) minimumRate && SAMPLE_RATES[k] <= (unsigned int) maximumRate ) {
- info.sampleRates.push_back( SAMPLE_RATES[k] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[k];
- }
- }
- }
-
- // Sort and remove any redundant values
- std::sort( info.sampleRates.begin(), info.sampleRates.end() );
- info.sampleRates.erase( unique( info.sampleRates.begin(), info.sampleRates.end() ), info.sampleRates.end() );
-
- if ( info.sampleRates.size() == 0 ) {
- errorStream_ << "RtApiCore::probeDeviceInfo: No supported sample rates found for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // CoreAudio always uses 32-bit floating point data for PCM streams.
- // Thus, any other "physical" formats supported by the device are of
- // no interest to the client.
- info.nativeFormats = RTAUDIO_FLOAT32;
-
- if ( info.outputChannels > 0 )
- if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true;
- if ( info.inputChannels > 0 )
- if ( getDefaultInputDevice() == device ) info.isDefaultInput = true;
-
- info.probed = true;
- return info;
-}
-
-static OSStatus callbackHandler( AudioDeviceID inDevice,
- const AudioTimeStamp* /*inNow*/,
- const AudioBufferList* inInputData,
- const AudioTimeStamp* /*inInputTime*/,
- AudioBufferList* outOutputData,
- const AudioTimeStamp* /*inOutputTime*/,
- void* infoPointer )
-{
- CallbackInfo *info = (CallbackInfo *) infoPointer;
-
- RtApiCore *object = (RtApiCore *) info->object;
- if ( object->callbackEvent( inDevice, inInputData, outOutputData ) == false )
- return kAudioHardwareUnspecifiedError;
- else
- return kAudioHardwareNoError;
-}
-
-static OSStatus xrunListener( AudioObjectID /*inDevice*/,
- UInt32 nAddresses,
- const AudioObjectPropertyAddress properties[],
- void* handlePointer )
-{
- CoreHandle *handle = (CoreHandle *) handlePointer;
- for ( UInt32 i=0; i<nAddresses; i++ ) {
- if ( properties[i].mSelector == kAudioDeviceProcessorOverload ) {
- if ( properties[i].mScope == kAudioDevicePropertyScopeInput )
- handle->xrun[1] = true;
- else
- handle->xrun[0] = true;
- }
- }
-
- return kAudioHardwareNoError;
-}
-
-static OSStatus rateListener( AudioObjectID inDevice,
- UInt32 /*nAddresses*/,
- const AudioObjectPropertyAddress /*properties*/[],
- void* ratePointer )
-{
- Float64 *rate = (Float64 *) ratePointer;
- UInt32 dataSize = sizeof( Float64 );
- AudioObjectPropertyAddress property = { kAudioDevicePropertyNominalSampleRate,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
- AudioObjectGetPropertyData( inDevice, &property, 0, NULL, &dataSize, rate );
- return kAudioHardwareNoError;
-}
-
-bool RtApiCore :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-{
- // Get device ID
- unsigned int nDevices = getDeviceCount();
- if ( nDevices == 0 ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiCore::probeDeviceOpen: no devices found!";
- return FAILURE;
- }
-
- if ( device >= nDevices ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiCore::probeDeviceOpen: device ID is invalid!";
- return FAILURE;
- }
-
- AudioDeviceID deviceList[ nDevices ];
- UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices;
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
- OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property,
- 0, NULL, &dataSize, (void *) &deviceList );
- if ( result != noErr ) {
- errorText_ = "RtApiCore::probeDeviceOpen: OS-X system error getting device IDs.";
- return FAILURE;
- }
-
- AudioDeviceID id = deviceList[ device ];
-
- // Setup for stream mode.
- bool isInput = false;
- if ( mode == INPUT ) {
- isInput = true;
- property.mScope = kAudioDevicePropertyScopeInput;
- }
- else
- property.mScope = kAudioDevicePropertyScopeOutput;
-
- // Get the stream "configuration".
- AudioBufferList *bufferList = nil;
- dataSize = 0;
- property.mSelector = kAudioDevicePropertyStreamConfiguration;
- result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize );
- if ( result != noErr || dataSize == 0 ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration info for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Allocate the AudioBufferList.
- bufferList = (AudioBufferList *) malloc( dataSize );
- if ( bufferList == NULL ) {
- errorText_ = "RtApiCore::probeDeviceOpen: memory error allocating AudioBufferList.";
- return FAILURE;
- }
-
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList );
- if (result != noErr || dataSize == 0) {
- free( bufferList );
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Search for one or more streams that contain the desired number of
- // channels. CoreAudio devices can have an arbitrary number of
- // streams and each stream can have an arbitrary number of channels.
- // For each stream, a single buffer of interleaved samples is
- // provided. RtAudio prefers the use of one stream of interleaved
- // data or multiple consecutive single-channel streams. However, we
- // now support multiple consecutive multi-channel streams of
- // interleaved data as well.
- UInt32 iStream, offsetCounter = firstChannel;
- UInt32 nStreams = bufferList->mNumberBuffers;
- bool monoMode = false;
- bool foundStream = false;
-
- // First check that the device supports the requested number of
- // channels.
- UInt32 deviceChannels = 0;
- for ( iStream=0; iStream<nStreams; iStream++ )
- deviceChannels += bufferList->mBuffers[iStream].mNumberChannels;
-
- if ( deviceChannels < ( channels + firstChannel ) ) {
- free( bufferList );
- errorStream_ << "RtApiCore::probeDeviceOpen: the device (" << device << ") does not support the requested channel count.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Look for a single stream meeting our needs.
- UInt32 firstStream, streamCount = 1, streamChannels = 0, channelOffset = 0;
- for ( iStream=0; iStream<nStreams; iStream++ ) {
- streamChannels = bufferList->mBuffers[iStream].mNumberChannels;
- if ( streamChannels >= channels + offsetCounter ) {
- firstStream = iStream;
- channelOffset = offsetCounter;
- foundStream = true;
- break;
- }
- if ( streamChannels > offsetCounter ) break;
- offsetCounter -= streamChannels;
- }
-
- // If we didn't find a single stream above, then we should be able
- // to meet the channel specification with multiple streams.
- if ( foundStream == false ) {
- monoMode = true;
- offsetCounter = firstChannel;
- for ( iStream=0; iStream<nStreams; iStream++ ) {
- streamChannels = bufferList->mBuffers[iStream].mNumberChannels;
- if ( streamChannels > offsetCounter ) break;
- offsetCounter -= streamChannels;
- }
-
- firstStream = iStream;
- channelOffset = offsetCounter;
- Int32 channelCounter = channels + offsetCounter - streamChannels;
-
- if ( streamChannels > 1 ) monoMode = false;
- while ( channelCounter > 0 ) {
- streamChannels = bufferList->mBuffers[++iStream].mNumberChannels;
- if ( streamChannels > 1 ) monoMode = false;
- channelCounter -= streamChannels;
- streamCount++;
- }
- }
-
- free( bufferList );
-
- // Determine the buffer size.
- AudioValueRange bufferRange;
- dataSize = sizeof( AudioValueRange );
- property.mSelector = kAudioDevicePropertyBufferFrameSizeRange;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &bufferRange );
-
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting buffer size range for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- if ( bufferRange.mMinimum > *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMinimum;
- else if ( bufferRange.mMaximum < *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMaximum;
- if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) *bufferSize = (unsigned long) bufferRange.mMinimum;
-
- // Set the buffer size. For multiple streams, I'm assuming we only
- // need to make this setting for the master channel.
- UInt32 theSize = (UInt32) *bufferSize;
- dataSize = sizeof( UInt32 );
- property.mSelector = kAudioDevicePropertyBufferFrameSize;
- result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &theSize );
-
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting the buffer size for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // If attempting to setup a duplex stream, the bufferSize parameter
- // MUST be the same in both directions!
- *bufferSize = theSize;
- if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- stream_.bufferSize = *bufferSize;
- stream_.nBuffers = 1;
-
- // Try to set "hog" mode ... it's not clear to me this is working.
- if ( options && options->flags & RTAUDIO_HOG_DEVICE ) {
- pid_t hog_pid;
- dataSize = sizeof( hog_pid );
- property.mSelector = kAudioDevicePropertyHogMode;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &hog_pid );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting 'hog' state!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- if ( hog_pid != getpid() ) {
- hog_pid = getpid();
- result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &hog_pid );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting 'hog' state!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
- }
-
- // Check and if necessary, change the sample rate for the device.
- Float64 nominalRate;
- dataSize = sizeof( Float64 );
- property.mSelector = kAudioDevicePropertyNominalSampleRate;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &nominalRate );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting current sample rate.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Only change the sample rate if off by more than 1 Hz.
- if ( fabs( nominalRate - (double)sampleRate ) > 1.0 ) {
-
- // Set a property listener for the sample rate change
- Float64 reportedRate = 0.0;
- AudioObjectPropertyAddress tmp = { kAudioDevicePropertyNominalSampleRate, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster };
- result = AudioObjectAddPropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate property listener for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- nominalRate = (Float64) sampleRate;
- result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &nominalRate );
- if ( result != noErr ) {
- AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Now wait until the reported nominal rate is what we just set.
- UInt32 microCounter = 0;
- while ( reportedRate != nominalRate ) {
- microCounter += 5000;
- if ( microCounter > 5000000 ) break;
- usleep( 5000 );
- }
-
- // Remove the property listener.
- AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate );
-
- if ( microCounter > 5000000 ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: timeout waiting for sample rate update for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // Now set the stream format for all streams. Also, check the
- // physical format of the device and change that if necessary.
- AudioStreamBasicDescription description;
- dataSize = sizeof( AudioStreamBasicDescription );
- property.mSelector = kAudioStreamPropertyVirtualFormat;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &description );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream format for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Set the sample rate and data format id. However, only make the
- // change if the sample rate is not within 1.0 of the desired
- // rate and the format is not linear pcm.
- bool updateFormat = false;
- if ( fabs( description.mSampleRate - (Float64)sampleRate ) > 1.0 ) {
- description.mSampleRate = (Float64) sampleRate;
- updateFormat = true;
- }
-
- if ( description.mFormatID != kAudioFormatLinearPCM ) {
- description.mFormatID = kAudioFormatLinearPCM;
- updateFormat = true;
- }
-
- if ( updateFormat ) {
- result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &description );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate or data format for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // Now check the physical format.
- property.mSelector = kAudioStreamPropertyPhysicalFormat;
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &description );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream physical format for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- //std::cout << "Current physical stream format:" << std::endl;
- //std::cout << " mBitsPerChan = " << description.mBitsPerChannel << std::endl;
- //std::cout << " aligned high = " << (description.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (description.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl;
- //std::cout << " bytesPerFrame = " << description.mBytesPerFrame << std::endl;
- //std::cout << " sample rate = " << description.mSampleRate << std::endl;
-
- if ( description.mFormatID != kAudioFormatLinearPCM || description.mBitsPerChannel < 16 ) {
- description.mFormatID = kAudioFormatLinearPCM;
- //description.mSampleRate = (Float64) sampleRate;
- AudioStreamBasicDescription testDescription = description;
- UInt32 formatFlags;
-
- // We'll try higher bit rates first and then work our way down.
- std::vector< std::pair<UInt32, UInt32> > physicalFormats;
- formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsFloat) & ~kLinearPCMFormatFlagIsSignedInteger;
- physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) );
- formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat;
- physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) );
- physicalFormats.push_back( std::pair<Float32, UInt32>( 24, formatFlags ) ); // 24-bit packed
- formatFlags &= ~( kAudioFormatFlagIsPacked | kAudioFormatFlagIsAlignedHigh );
- physicalFormats.push_back( std::pair<Float32, UInt32>( 24.2, formatFlags ) ); // 24-bit in 4 bytes, aligned low
- formatFlags |= kAudioFormatFlagIsAlignedHigh;
- physicalFormats.push_back( std::pair<Float32, UInt32>( 24.4, formatFlags ) ); // 24-bit in 4 bytes, aligned high
- formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat;
- physicalFormats.push_back( std::pair<Float32, UInt32>( 16, formatFlags ) );
- physicalFormats.push_back( std::pair<Float32, UInt32>( 8, formatFlags ) );
-
- bool setPhysicalFormat = false;
- for( unsigned int i=0; i<physicalFormats.size(); i++ ) {
- testDescription = description;
- testDescription.mBitsPerChannel = (UInt32) physicalFormats[i].first;
- testDescription.mFormatFlags = physicalFormats[i].second;
- if ( (24 == (UInt32)physicalFormats[i].first) && ~( physicalFormats[i].second & kAudioFormatFlagIsPacked ) )
- testDescription.mBytesPerFrame = 4 * testDescription.mChannelsPerFrame;
- else
- testDescription.mBytesPerFrame = testDescription.mBitsPerChannel/8 * testDescription.mChannelsPerFrame;
- testDescription.mBytesPerPacket = testDescription.mBytesPerFrame * testDescription.mFramesPerPacket;
- result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &testDescription );
- if ( result == noErr ) {
- setPhysicalFormat = true;
- //std::cout << "Updated physical stream format:" << std::endl;
- //std::cout << " mBitsPerChan = " << testDescription.mBitsPerChannel << std::endl;
- //std::cout << " aligned high = " << (testDescription.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (testDescription.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl;
- //std::cout << " bytesPerFrame = " << testDescription.mBytesPerFrame << std::endl;
- //std::cout << " sample rate = " << testDescription.mSampleRate << std::endl;
- break;
- }
- }
-
- if ( !setPhysicalFormat ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting physical data format for device (" << device << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- } // done setting virtual/physical formats.
-
- // Get the stream / device latency.
- UInt32 latency;
- dataSize = sizeof( UInt32 );
- property.mSelector = kAudioDevicePropertyLatency;
- if ( AudioObjectHasProperty( id, &property ) == true ) {
- result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &latency );
- if ( result == kAudioHardwareNoError ) stream_.latency[ mode ] = latency;
- else {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting device latency for device (" << device << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
- }
-
- // Byte-swapping: According to AudioHardware.h, the stream data will
- // always be presented in native-endian format, so we should never
- // need to byte swap.
- stream_.doByteSwap[mode] = false;
-
- // From the CoreAudio documentation, PCM data must be supplied as
- // 32-bit floats.
- stream_.userFormat = format;
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
-
- if ( streamCount == 1 )
- stream_.nDeviceChannels[mode] = description.mChannelsPerFrame;
- else // multiple streams
- stream_.nDeviceChannels[mode] = channels;
- stream_.nUserChannels[mode] = channels;
- stream_.channelOffset[mode] = channelOffset; // offset within a CoreAudio stream
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
- else stream_.userInterleaved = true;
- stream_.deviceInterleaved[mode] = true;
- if ( monoMode == true ) stream_.deviceInterleaved[mode] = false;
-
- // Set flags for buffer conversion.
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( streamCount == 1 ) {
- if ( stream_.nUserChannels[mode] > 1 &&
- stream_.userInterleaved != stream_.deviceInterleaved[mode] )
- stream_.doConvertBuffer[mode] = true;
- }
- else if ( monoMode && stream_.userInterleaved )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate our CoreHandle structure for the stream.
- CoreHandle *handle = 0;
- if ( stream_.apiHandle == 0 ) {
- try {
- handle = new CoreHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiCore::probeDeviceOpen: error allocating CoreHandle memory.";
- goto error;
- }
-
- if ( pthread_cond_init( &handle->condition, NULL ) ) {
- errorText_ = "RtApiCore::probeDeviceOpen: error initializing pthread condition variable.";
- goto error;
- }
- stream_.apiHandle = (void *) handle;
- }
- else
- handle = (CoreHandle *) stream_.apiHandle;
- handle->iStream[mode] = firstStream;
- handle->nStreams[mode] = streamCount;
- handle->id[mode] = id;
-
- // Allocate necessary internal buffers.
- unsigned long bufferBytes;
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- // stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- stream_.userBuffer[mode] = (char *) malloc( bufferBytes * sizeof(char) );
- memset( stream_.userBuffer[mode], 0, bufferBytes * sizeof(char) );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiCore::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- // If possible, we will make use of the CoreAudio stream buffers as
- // "device buffers". However, we can't do this if using multiple
- // streams.
- if ( stream_.doConvertBuffer[mode] && handle->nStreams[mode] > 1 ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( mode == INPUT ) {
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiCore::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- stream_.sampleRate = sampleRate;
- stream_.device[mode] = device;
- stream_.state = STREAM_STOPPED;
- stream_.callbackInfo.object = (void *) this;
-
- // Setup the buffer conversion information structure.
- if ( stream_.doConvertBuffer[mode] ) {
- if ( streamCount > 1 ) setConvertInfo( mode, 0 );
- else setConvertInfo( mode, channelOffset );
- }
-
- if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device )
- // Only one callback procedure per device.
- stream_.mode = DUPLEX;
- else {
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
- result = AudioDeviceCreateIOProcID( id, callbackHandler, (void *) &stream_.callbackInfo, &handle->procId[mode] );
-#else
- // deprecated in favor of AudioDeviceCreateIOProcID()
- result = AudioDeviceAddIOProc( id, callbackHandler, (void *) &stream_.callbackInfo );
-#endif
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::probeDeviceOpen: system error setting callback for device (" << device << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
- if ( stream_.mode == OUTPUT && mode == INPUT )
- stream_.mode = DUPLEX;
- else
- stream_.mode = mode;
- }
-
- // Setup the device property listener for over/underload.
- property.mSelector = kAudioDeviceProcessorOverload;
- property.mScope = kAudioObjectPropertyScopeGlobal;
- result = AudioObjectAddPropertyListener( id, &property, xrunListener, (void *) handle );
-
- return SUCCESS;
-
- error:
- if ( handle ) {
- pthread_cond_destroy( &handle->condition );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.state = STREAM_CLOSED;
- return FAILURE;
-}
-
-void RtApiCore :: closeStream( void )
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiCore::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- if (handle) {
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
-
- property.mSelector = kAudioDeviceProcessorOverload;
- property.mScope = kAudioObjectPropertyScopeGlobal;
- if (AudioObjectRemovePropertyListener( handle->id[0], &property, xrunListener, (void *) handle ) != noErr) {
- errorText_ = "RtApiCore::closeStream(): error removing property listener!";
- error( RtAudioError::WARNING );
- }
- }
- if ( stream_.state == STREAM_RUNNING )
- AudioDeviceStop( handle->id[0], callbackHandler );
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
- AudioDeviceDestroyIOProcID( handle->id[0], handle->procId[0] );
-#else
- // deprecated in favor of AudioDeviceDestroyIOProcID()
- AudioDeviceRemoveIOProc( handle->id[0], callbackHandler );
-#endif
- }
-
- if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
- if (handle) {
- AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices,
- kAudioObjectPropertyScopeGlobal,
- kAudioObjectPropertyElementMaster };
-
- property.mSelector = kAudioDeviceProcessorOverload;
- property.mScope = kAudioObjectPropertyScopeGlobal;
- if (AudioObjectRemovePropertyListener( handle->id[1], &property, xrunListener, (void *) handle ) != noErr) {
- errorText_ = "RtApiCore::closeStream(): error removing property listener!";
- error( RtAudioError::WARNING );
- }
- }
- if ( stream_.state == STREAM_RUNNING )
- AudioDeviceStop( handle->id[1], callbackHandler );
-#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 )
- AudioDeviceDestroyIOProcID( handle->id[1], handle->procId[1] );
-#else
- // deprecated in favor of AudioDeviceDestroyIOProcID()
- AudioDeviceRemoveIOProc( handle->id[1], callbackHandler );
-#endif
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- // Destroy pthread condition variable.
- pthread_cond_destroy( &handle->condition );
- delete handle;
- stream_.apiHandle = 0;
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-void RtApiCore :: startStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiCore::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- OSStatus result = noErr;
- CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- result = AudioDeviceStart( handle->id[0], callbackHandler );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::startStream: system error (" << getErrorCode( result ) << ") starting callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- if ( stream_.mode == INPUT ||
- ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
-
- result = AudioDeviceStart( handle->id[1], callbackHandler );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::startStream: system error starting input callback procedure on device (" << stream_.device[1] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- handle->drainCounter = 0;
- handle->internalDrain = false;
- stream_.state = STREAM_RUNNING;
-
- unlock:
- if ( result == noErr ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiCore :: stopStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiCore::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- OSStatus result = noErr;
- CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- if ( handle->drainCounter == 0 ) {
- handle->drainCounter = 2;
- pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled
- }
-
- result = AudioDeviceStop( handle->id[0], callbackHandler );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) {
-
- result = AudioDeviceStop( handle->id[1], callbackHandler );
- if ( result != noErr ) {
- errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping input callback procedure on device (" << stream_.device[1] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- stream_.state = STREAM_STOPPED;
-
- unlock:
- if ( result == noErr ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiCore :: abortStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiCore::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
- handle->drainCounter = 2;
-
- stopStream();
-}
-
-// This function will be called by a spawned thread when the user
-// callback function signals that the stream should be stopped or
-// aborted. It is better to handle it this way because the
-// callbackEvent() function probably should return before the AudioDeviceStop()
-// function is called.
-static void *coreStopStream( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiCore *object = (RtApiCore *) info->object;
-
- object->stopStream();
- pthread_exit( NULL );
-}
-
-bool RtApiCore :: callbackEvent( AudioDeviceID deviceId,
- const AudioBufferList *inBufferList,
- const AudioBufferList *outBufferList )
-{
- if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return FAILURE;
- }
-
- CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
- CoreHandle *handle = (CoreHandle *) stream_.apiHandle;
-
- // Check if we were draining the stream and signal is finished.
- if ( handle->drainCounter > 3 ) {
- ThreadHandle threadId;
-
- stream_.state = STREAM_STOPPING;
- if ( handle->internalDrain == true )
- pthread_create( &threadId, NULL, coreStopStream, info );
- else // external call to stopStream()
- pthread_cond_signal( &handle->condition );
- return SUCCESS;
- }
-
- AudioDeviceID outputDevice = handle->id[0];
-
- // Invoke user callback to get fresh output data UNLESS we are
- // draining stream or duplex mode AND the input/output devices are
- // different AND this function is called for the input device.
- if ( handle->drainCounter == 0 && ( stream_.mode != DUPLEX || deviceId == outputDevice ) ) {
- RtAudioCallback callback = (RtAudioCallback) info->callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- handle->xrun[0] = false;
- }
- if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- handle->xrun[1] = false;
- }
-
- int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, info->userData );
- if ( cbReturnValue == 2 ) {
- stream_.state = STREAM_STOPPING;
- handle->drainCounter = 2;
- abortStream();
- return SUCCESS;
- }
- else if ( cbReturnValue == 1 ) {
- handle->drainCounter = 1;
- handle->internalDrain = true;
- }
- }
-
- if ( stream_.mode == OUTPUT || ( stream_.mode == DUPLEX && deviceId == outputDevice ) ) {
-
- if ( handle->drainCounter > 1 ) { // write zeros to the output stream
-
- if ( handle->nStreams[0] == 1 ) {
- memset( outBufferList->mBuffers[handle->iStream[0]].mData,
- 0,
- outBufferList->mBuffers[handle->iStream[0]].mDataByteSize );
- }
- else { // fill multiple streams with zeros
- for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) {
- memset( outBufferList->mBuffers[handle->iStream[0]+i].mData,
- 0,
- outBufferList->mBuffers[handle->iStream[0]+i].mDataByteSize );
- }
- }
- }
- else if ( handle->nStreams[0] == 1 ) {
- if ( stream_.doConvertBuffer[0] ) { // convert directly to CoreAudio stream buffer
- convertBuffer( (char *) outBufferList->mBuffers[handle->iStream[0]].mData,
- stream_.userBuffer[0], stream_.convertInfo[0] );
- }
- else { // copy from user buffer
- memcpy( outBufferList->mBuffers[handle->iStream[0]].mData,
- stream_.userBuffer[0],
- outBufferList->mBuffers[handle->iStream[0]].mDataByteSize );
- }
- }
- else { // fill multiple streams
- Float32 *inBuffer = (Float32 *) stream_.userBuffer[0];
- if ( stream_.doConvertBuffer[0] ) {
- convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
- inBuffer = (Float32 *) stream_.deviceBuffer;
- }
-
- if ( stream_.deviceInterleaved[0] == false ) { // mono mode
- UInt32 bufferBytes = outBufferList->mBuffers[handle->iStream[0]].mDataByteSize;
- for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
- memcpy( outBufferList->mBuffers[handle->iStream[0]+i].mData,
- (void *)&inBuffer[i*stream_.bufferSize], bufferBytes );
- }
- }
- else { // fill multiple multi-channel streams with interleaved data
- UInt32 streamChannels, channelsLeft, inJump, outJump, inOffset;
- Float32 *out, *in;
-
- bool inInterleaved = ( stream_.userInterleaved ) ? true : false;
- UInt32 inChannels = stream_.nUserChannels[0];
- if ( stream_.doConvertBuffer[0] ) {
- inInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode
- inChannels = stream_.nDeviceChannels[0];
- }
-
- if ( inInterleaved ) inOffset = 1;
- else inOffset = stream_.bufferSize;
-
- channelsLeft = inChannels;
- for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) {
- in = inBuffer;
- out = (Float32 *) outBufferList->mBuffers[handle->iStream[0]+i].mData;
- streamChannels = outBufferList->mBuffers[handle->iStream[0]+i].mNumberChannels;
-
- outJump = 0;
- // Account for possible channel offset in first stream
- if ( i == 0 && stream_.channelOffset[0] > 0 ) {
- streamChannels -= stream_.channelOffset[0];
- outJump = stream_.channelOffset[0];
- out += outJump;
- }
-
- // Account for possible unfilled channels at end of the last stream
- if ( streamChannels > channelsLeft ) {
- outJump = streamChannels - channelsLeft;
- streamChannels = channelsLeft;
- }
-
- // Determine input buffer offsets and skips
- if ( inInterleaved ) {
- inJump = inChannels;
- in += inChannels - channelsLeft;
- }
- else {
- inJump = 1;
- in += (inChannels - channelsLeft) * inOffset;
- }
-
- for ( unsigned int i=0; i<stream_.bufferSize; i++ ) {
- for ( unsigned int j=0; j<streamChannels; j++ ) {
- *out++ = in[j*inOffset];
- }
- out += outJump;
- in += inJump;
- }
- channelsLeft -= streamChannels;
- }
- }
- }
- }
-
- // Don't bother draining input
- if ( handle->drainCounter ) {
- handle->drainCounter++;
- goto unlock;
- }
-
- AudioDeviceID inputDevice;
- inputDevice = handle->id[1];
- if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && deviceId == inputDevice ) ) {
-
- if ( handle->nStreams[1] == 1 ) {
- if ( stream_.doConvertBuffer[1] ) { // convert directly from CoreAudio stream buffer
- convertBuffer( stream_.userBuffer[1],
- (char *) inBufferList->mBuffers[handle->iStream[1]].mData,
- stream_.convertInfo[1] );
- }
- else { // copy to user buffer
- memcpy( stream_.userBuffer[1],
- inBufferList->mBuffers[handle->iStream[1]].mData,
- inBufferList->mBuffers[handle->iStream[1]].mDataByteSize );
- }
- }
- else { // read from multiple streams
- Float32 *outBuffer = (Float32 *) stream_.userBuffer[1];
- if ( stream_.doConvertBuffer[1] ) outBuffer = (Float32 *) stream_.deviceBuffer;
-
- if ( stream_.deviceInterleaved[1] == false ) { // mono mode
- UInt32 bufferBytes = inBufferList->mBuffers[handle->iStream[1]].mDataByteSize;
- for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
- memcpy( (void *)&outBuffer[i*stream_.bufferSize],
- inBufferList->mBuffers[handle->iStream[1]+i].mData, bufferBytes );
- }
- }
- else { // read from multiple multi-channel streams
- UInt32 streamChannels, channelsLeft, inJump, outJump, outOffset;
- Float32 *out, *in;
-
- bool outInterleaved = ( stream_.userInterleaved ) ? true : false;
- UInt32 outChannels = stream_.nUserChannels[1];
- if ( stream_.doConvertBuffer[1] ) {
- outInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode
- outChannels = stream_.nDeviceChannels[1];
- }
-
- if ( outInterleaved ) outOffset = 1;
- else outOffset = stream_.bufferSize;
-
- channelsLeft = outChannels;
- for ( unsigned int i=0; i<handle->nStreams[1]; i++ ) {
- out = outBuffer;
- in = (Float32 *) inBufferList->mBuffers[handle->iStream[1]+i].mData;
- streamChannels = inBufferList->mBuffers[handle->iStream[1]+i].mNumberChannels;
-
- inJump = 0;
- // Account for possible channel offset in first stream
- if ( i == 0 && stream_.channelOffset[1] > 0 ) {
- streamChannels -= stream_.channelOffset[1];
- inJump = stream_.channelOffset[1];
- in += inJump;
- }
-
- // Account for possible unread channels at end of the last stream
- if ( streamChannels > channelsLeft ) {
- inJump = streamChannels - channelsLeft;
- streamChannels = channelsLeft;
- }
-
- // Determine output buffer offsets and skips
- if ( outInterleaved ) {
- outJump = outChannels;
- out += outChannels - channelsLeft;
- }
- else {
- outJump = 1;
- out += (outChannels - channelsLeft) * outOffset;
- }
-
- for ( unsigned int i=0; i<stream_.bufferSize; i++ ) {
- for ( unsigned int j=0; j<streamChannels; j++ ) {
- out[j*outOffset] = *in++;
- }
- out += outJump;
- in += inJump;
- }
- channelsLeft -= streamChannels;
- }
- }
-
- if ( stream_.doConvertBuffer[1] ) { // convert from our internal "device" buffer
- convertBuffer( stream_.userBuffer[1],
- stream_.deviceBuffer,
- stream_.convertInfo[1] );
- }
- }
- }
-
- unlock:
- //MUTEX_UNLOCK( &stream_.mutex );
-
- RtApi::tickStreamTime();
- return SUCCESS;
-}
-
-const char* RtApiCore :: getErrorCode( OSStatus code )
-{
- switch( code ) {
-
- case kAudioHardwareNotRunningError:
- return "kAudioHardwareNotRunningError";
-
- case kAudioHardwareUnspecifiedError:
- return "kAudioHardwareUnspecifiedError";
-
- case kAudioHardwareUnknownPropertyError:
- return "kAudioHardwareUnknownPropertyError";
-
- case kAudioHardwareBadPropertySizeError:
- return "kAudioHardwareBadPropertySizeError";
-
- case kAudioHardwareIllegalOperationError:
- return "kAudioHardwareIllegalOperationError";
-
- case kAudioHardwareBadObjectError:
- return "kAudioHardwareBadObjectError";
-
- case kAudioHardwareBadDeviceError:
- return "kAudioHardwareBadDeviceError";
-
- case kAudioHardwareBadStreamError:
- return "kAudioHardwareBadStreamError";
-
- case kAudioHardwareUnsupportedOperationError:
- return "kAudioHardwareUnsupportedOperationError";
-
- case kAudioDeviceUnsupportedFormatError:
- return "kAudioDeviceUnsupportedFormatError";
-
- case kAudioDevicePermissionsError:
- return "kAudioDevicePermissionsError";
-
- default:
- return "CoreAudio unknown error";
- }
-}
-
- //******************** End of __MACOSX_CORE__ *********************//
-#endif
-
-#if defined(__UNIX_JACK__)
-
-// JACK is a low-latency audio server, originally written for the
-// GNU/Linux operating system and now also ported to OS-X. It can
-// connect a number of different applications to an audio device, as
-// well as allowing them to share audio between themselves.
-//
-// When using JACK with RtAudio, "devices" refer to JACK clients that
-// have ports connected to the server. The JACK server is typically
-// started in a terminal as follows:
-//
-// .jackd -d alsa -d hw:0
-//
-// or through an interface program such as qjackctl. Many of the
-// parameters normally set for a stream are fixed by the JACK server
-// and can be specified when the JACK server is started. In
-// particular,
-//
-// .jackd -d alsa -d hw:0 -r 44100 -p 512 -n 4
-//
-// specifies a sample rate of 44100 Hz, a buffer size of 512 sample
-// frames, and number of buffers = 4. Once the server is running, it
-// is not possible to override these values. If the values are not
-// specified in the command-line, the JACK server uses default values.
-//
-// The JACK server does not have to be running when an instance of
-// RtApiJack is created, though the function getDeviceCount() will
-// report 0 devices found until JACK has been started. When no
-// devices are available (i.e., the JACK server is not running), a
-// stream cannot be opened.
-
-#include <jack/jack.h>
-#include <unistd.h>
-#include <cstdio>
-
-// A structure to hold various information related to the Jack API
-// implementation.
-struct JackHandle {
- jack_client_t *client;
- jack_port_t **ports[2];
- std::string deviceName[2];
- bool xrun[2];
- pthread_cond_t condition;
- int drainCounter; // Tracks callback counts when draining
- bool internalDrain; // Indicates if stop is initiated from callback or not.
-
- JackHandle()
- :client(0), drainCounter(0), internalDrain(false) { ports[0] = 0; ports[1] = 0; xrun[0] = false; xrun[1] = false; }
-};
-
-static void jackSilentError( const char * ) {};
-
-RtApiJack :: RtApiJack()
-{
- // Nothing to do here.
-#if !defined(__RTAUDIO_DEBUG__)
- // Turn off Jack's internal error reporting.
- jack_set_error_function( &jackSilentError );
-#endif
-}
-
-RtApiJack :: ~RtApiJack()
-{
- if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiJack :: getDeviceCount( void )
-{
- // See if we can become a jack client.
- jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption;
- jack_status_t *status = NULL;
- jack_client_t *client = jack_client_open( "RtApiJackCount", options, status );
- if ( client == 0 ) return 0;
-
- const char **ports;
- std::string port, previousPort;
- unsigned int nChannels = 0, nDevices = 0;
- ports = jack_get_ports( client, NULL, NULL, 0 );
- if ( ports ) {
- // Parse the port names up to the first colon (:).
- size_t iColon = 0;
- do {
- port = (char *) ports[ nChannels ];
- iColon = port.find(":");
- if ( iColon != std::string::npos ) {
- port = port.substr( 0, iColon + 1 );
- if ( port != previousPort ) {
- nDevices++;
- previousPort = port;
- }
- }
- } while ( ports[++nChannels] );
- free( ports );
- }
-
- jack_client_close( client );
- return nDevices;
-}
-
-RtAudio::DeviceInfo RtApiJack :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption
- jack_status_t *status = NULL;
- jack_client_t *client = jack_client_open( "RtApiJackInfo", options, status );
- if ( client == 0 ) {
- errorText_ = "RtApiJack::getDeviceInfo: Jack server not found or connection error!";
- error( RtAudioError::WARNING );
- return info;
- }
-
- const char **ports;
- std::string port, previousPort;
- unsigned int nPorts = 0, nDevices = 0;
- ports = jack_get_ports( client, NULL, NULL, 0 );
- if ( ports ) {
- // Parse the port names up to the first colon (:).
- size_t iColon = 0;
- do {
- port = (char *) ports[ nPorts ];
- iColon = port.find(":");
- if ( iColon != std::string::npos ) {
- port = port.substr( 0, iColon );
- if ( port != previousPort ) {
- if ( nDevices == device ) info.name = port;
- nDevices++;
- previousPort = port;
- }
- }
- } while ( ports[++nPorts] );
- free( ports );
- }
-
- if ( device >= nDevices ) {
- jack_client_close( client );
- errorText_ = "RtApiJack::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- // Get the current jack server sample rate.
- info.sampleRates.clear();
-
- info.preferredSampleRate = jack_get_sample_rate( client );
- info.sampleRates.push_back( info.preferredSampleRate );
-
- // Count the available ports containing the client name as device
- // channels. Jack "input ports" equal RtAudio output channels.
- unsigned int nChannels = 0;
- ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsInput );
- if ( ports ) {
- while ( ports[ nChannels ] ) nChannels++;
- free( ports );
- info.outputChannels = nChannels;
- }
-
- // Jack "output ports" equal RtAudio input channels.
- nChannels = 0;
- ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsOutput );
- if ( ports ) {
- while ( ports[ nChannels ] ) nChannels++;
- free( ports );
- info.inputChannels = nChannels;
- }
-
- if ( info.outputChannels == 0 && info.inputChannels == 0 ) {
- jack_client_close(client);
- errorText_ = "RtApiJack::getDeviceInfo: error determining Jack input/output channels!";
- error( RtAudioError::WARNING );
- return info;
- }
-
- // If device opens for both playback and capture, we determine the channels.
- if ( info.outputChannels > 0 && info.inputChannels > 0 )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
- // Jack always uses 32-bit floats.
- info.nativeFormats = RTAUDIO_FLOAT32;
-
- // Jack doesn't provide default devices so we'll use the first available one.
- if ( device == 0 && info.outputChannels > 0 )
- info.isDefaultOutput = true;
- if ( device == 0 && info.inputChannels > 0 )
- info.isDefaultInput = true;
-
- jack_client_close(client);
- info.probed = true;
- return info;
-}
-
-static int jackCallbackHandler( jack_nframes_t nframes, void *infoPointer )
-{
- CallbackInfo *info = (CallbackInfo *) infoPointer;
-
- RtApiJack *object = (RtApiJack *) info->object;
- if ( object->callbackEvent( (unsigned long) nframes ) == false ) return 1;
-
- return 0;
-}
-
-// This function will be called by a spawned thread when the Jack
-// server signals that it is shutting down. It is necessary to handle
-// it this way because the jackShutdown() function must return before
-// the jack_deactivate() function (in closeStream()) will return.
-static void *jackCloseStream( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiJack *object = (RtApiJack *) info->object;
-
- object->closeStream();
-
- pthread_exit( NULL );
-}
-static void jackShutdown( void *infoPointer )
-{
- CallbackInfo *info = (CallbackInfo *) infoPointer;
- RtApiJack *object = (RtApiJack *) info->object;
-
- // Check current stream state. If stopped, then we'll assume this
- // was called as a result of a call to RtApiJack::stopStream (the
- // deactivation of a client handle causes this function to be called).
- // If not, we'll assume the Jack server is shutting down or some
- // other problem occurred and we should close the stream.
- if ( object->isStreamRunning() == false ) return;
-
- ThreadHandle threadId;
- pthread_create( &threadId, NULL, jackCloseStream, info );
- std::cerr << "\nRtApiJack: the Jack server is shutting down this client ... stream stopped and closed!!\n" << std::endl;
-}
-
-static int jackXrun( void *infoPointer )
-{
- JackHandle *handle = (JackHandle *) infoPointer;
-
- if ( handle->ports[0] ) handle->xrun[0] = true;
- if ( handle->ports[1] ) handle->xrun[1] = true;
-
- return 0;
-}
-
-bool RtApiJack :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-{
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
-
- // Look for jack server and try to become a client (only do once per stream).
- jack_client_t *client = 0;
- if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) {
- jack_options_t jackoptions = (jack_options_t) ( JackNoStartServer ); //JackNullOption;
- jack_status_t *status = NULL;
- if ( options && !options->streamName.empty() )
- client = jack_client_open( options->streamName.c_str(), jackoptions, status );
- else
- client = jack_client_open( "RtApiJack", jackoptions, status );
- if ( client == 0 ) {
- errorText_ = "RtApiJack::probeDeviceOpen: Jack server not found or connection error!";
- error( RtAudioError::WARNING );
- return FAILURE;
- }
- }
- else {
- // The handle must have been created on an earlier pass.
- client = handle->client;
- }
-
- const char **ports;
- std::string port, previousPort, deviceName;
- unsigned int nPorts = 0, nDevices = 0;
- ports = jack_get_ports( client, NULL, NULL, 0 );
- if ( ports ) {
- // Parse the port names up to the first colon (:).
- size_t iColon = 0;
- do {
- port = (char *) ports[ nPorts ];
- iColon = port.find(":");
- if ( iColon != std::string::npos ) {
- port = port.substr( 0, iColon );
- if ( port != previousPort ) {
- if ( nDevices == device ) deviceName = port;
- nDevices++;
- previousPort = port;
- }
- }
- } while ( ports[++nPorts] );
- free( ports );
- }
-
- if ( device >= nDevices ) {
- errorText_ = "RtApiJack::probeDeviceOpen: device ID is invalid!";
- return FAILURE;
- }
-
- // Count the available ports containing the client name as device
- // channels. Jack "input ports" equal RtAudio output channels.
- unsigned int nChannels = 0;
- unsigned long flag = JackPortIsInput;
- if ( mode == INPUT ) flag = JackPortIsOutput;
- ports = jack_get_ports( client, deviceName.c_str(), NULL, flag );
- if ( ports ) {
- while ( ports[ nChannels ] ) nChannels++;
- free( ports );
- }
-
- // Compare the jack ports for specified client to the requested number of channels.
- if ( nChannels < (channels + firstChannel) ) {
- errorStream_ << "RtApiJack::probeDeviceOpen: requested number of channels (" << channels << ") + offset (" << firstChannel << ") not found for specified device (" << device << ":" << deviceName << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Check the jack server sample rate.
- unsigned int jackRate = jack_get_sample_rate( client );
- if ( sampleRate != jackRate ) {
- jack_client_close( client );
- errorStream_ << "RtApiJack::probeDeviceOpen: the requested sample rate (" << sampleRate << ") is different than the JACK server rate (" << jackRate << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- stream_.sampleRate = jackRate;
-
- // Get the latency of the JACK port.
- ports = jack_get_ports( client, deviceName.c_str(), NULL, flag );
- if ( ports[ firstChannel ] ) {
- // Added by Ge Wang
- jack_latency_callback_mode_t cbmode = (mode == INPUT ? JackCaptureLatency : JackPlaybackLatency);
- // the range (usually the min and max are equal)
- jack_latency_range_t latrange; latrange.min = latrange.max = 0;
- // get the latency range
- jack_port_get_latency_range( jack_port_by_name( client, ports[firstChannel] ), cbmode, &latrange );
- // be optimistic, use the min!
- stream_.latency[mode] = latrange.min;
- //stream_.latency[mode] = jack_port_get_latency( jack_port_by_name( client, ports[ firstChannel ] ) );
- }
- free( ports );
-
- // The jack server always uses 32-bit floating-point data.
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
- stream_.userFormat = format;
-
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
- else stream_.userInterleaved = true;
-
- // Jack always uses non-interleaved buffers.
- stream_.deviceInterleaved[mode] = false;
-
- // Jack always provides host byte-ordered data.
- stream_.doByteSwap[mode] = false;
-
- // Get the buffer size. The buffer size and number of buffers
- // (periods) is set when the jack server is started.
- stream_.bufferSize = (int) jack_get_buffer_size( client );
- *bufferSize = stream_.bufferSize;
-
- stream_.nDeviceChannels[mode] = channels;
- stream_.nUserChannels[mode] = channels;
-
- // Set flags for buffer conversion.
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate our JackHandle structure for the stream.
- if ( handle == 0 ) {
- try {
- handle = new JackHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiJack::probeDeviceOpen: error allocating JackHandle memory.";
- goto error;
- }
-
- if ( pthread_cond_init(&handle->condition, NULL) ) {
- errorText_ = "RtApiJack::probeDeviceOpen: error initializing pthread condition variable.";
- goto error;
- }
- stream_.apiHandle = (void *) handle;
- handle->client = client;
- }
- handle->deviceName[mode] = deviceName;
-
- // Allocate necessary internal buffers.
- unsigned long bufferBytes;
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiJack::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- if ( mode == OUTPUT )
- bufferBytes = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- else { // mode == INPUT
- bufferBytes = stream_.nDeviceChannels[1] * formatBytes( stream_.deviceFormat[1] );
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes(stream_.deviceFormat[0]);
- if ( bufferBytes < bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiJack::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- // Allocate memory for the Jack ports (channels) identifiers.
- handle->ports[mode] = (jack_port_t **) malloc ( sizeof (jack_port_t *) * channels );
- if ( handle->ports[mode] == NULL ) {
- errorText_ = "RtApiJack::probeDeviceOpen: error allocating port memory.";
- goto error;
- }
-
- stream_.device[mode] = device;
- stream_.channelOffset[mode] = firstChannel;
- stream_.state = STREAM_STOPPED;
- stream_.callbackInfo.object = (void *) this;
-
- if ( stream_.mode == OUTPUT && mode == INPUT )
- // We had already set up the stream for output.
- stream_.mode = DUPLEX;
- else {
- stream_.mode = mode;
- jack_set_process_callback( handle->client, jackCallbackHandler, (void *) &stream_.callbackInfo );
- jack_set_xrun_callback( handle->client, jackXrun, (void *) &handle );
- jack_on_shutdown( handle->client, jackShutdown, (void *) &stream_.callbackInfo );
- }
-
- // Register our ports.
- char label[64];
- if ( mode == OUTPUT ) {
- for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
- snprintf( label, 64, "outport %d", i );
- handle->ports[0][i] = jack_port_register( handle->client, (const char *)label,
- JACK_DEFAULT_AUDIO_TYPE, JackPortIsOutput, 0 );
- }
- }
- else {
- for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
- snprintf( label, 64, "inport %d", i );
- handle->ports[1][i] = jack_port_register( handle->client, (const char *)label,
- JACK_DEFAULT_AUDIO_TYPE, JackPortIsInput, 0 );
- }
- }
-
- // Setup the buffer conversion information structure. We don't use
- // buffers to do channel offsets, so we override that parameter
- // here.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 );
-
- return SUCCESS;
-
- error:
- if ( handle ) {
- pthread_cond_destroy( &handle->condition );
- jack_client_close( handle->client );
-
- if ( handle->ports[0] ) free( handle->ports[0] );
- if ( handle->ports[1] ) free( handle->ports[1] );
-
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- return FAILURE;
-}
-
-void RtApiJack :: closeStream( void )
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiJack::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
- if ( handle ) {
-
- if ( stream_.state == STREAM_RUNNING )
- jack_deactivate( handle->client );
-
- jack_client_close( handle->client );
- }
-
- if ( handle ) {
- if ( handle->ports[0] ) free( handle->ports[0] );
- if ( handle->ports[1] ) free( handle->ports[1] );
- pthread_cond_destroy( &handle->condition );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-void RtApiJack :: startStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiJack::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
- int result = jack_activate( handle->client );
- if ( result ) {
- errorText_ = "RtApiJack::startStream(): unable to activate JACK client!";
- goto unlock;
- }
-
- const char **ports;
-
- // Get the list of available ports.
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- result = 1;
- ports = jack_get_ports( handle->client, handle->deviceName[0].c_str(), NULL, JackPortIsInput);
- if ( ports == NULL) {
- errorText_ = "RtApiJack::startStream(): error determining available JACK input ports!";
- goto unlock;
- }
-
- // Now make the port connections. Since RtAudio wasn't designed to
- // allow the user to select particular channels of a device, we'll
- // just open the first "nChannels" ports with offset.
- for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
- result = 1;
- if ( ports[ stream_.channelOffset[0] + i ] )
- result = jack_connect( handle->client, jack_port_name( handle->ports[0][i] ), ports[ stream_.channelOffset[0] + i ] );
- if ( result ) {
- free( ports );
- errorText_ = "RtApiJack::startStream(): error connecting output ports!";
- goto unlock;
- }
- }
- free(ports);
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
- result = 1;
- ports = jack_get_ports( handle->client, handle->deviceName[1].c_str(), NULL, JackPortIsOutput );
- if ( ports == NULL) {
- errorText_ = "RtApiJack::startStream(): error determining available JACK output ports!";
- goto unlock;
- }
-
- // Now make the port connections. See note above.
- for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
- result = 1;
- if ( ports[ stream_.channelOffset[1] + i ] )
- result = jack_connect( handle->client, ports[ stream_.channelOffset[1] + i ], jack_port_name( handle->ports[1][i] ) );
- if ( result ) {
- free( ports );
- errorText_ = "RtApiJack::startStream(): error connecting input ports!";
- goto unlock;
- }
- }
- free(ports);
- }
-
- handle->drainCounter = 0;
- handle->internalDrain = false;
- stream_.state = STREAM_RUNNING;
-
- unlock:
- if ( result == 0 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiJack :: stopStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiJack::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- if ( handle->drainCounter == 0 ) {
- handle->drainCounter = 2;
- pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled
- }
- }
-
- jack_deactivate( handle->client );
- stream_.state = STREAM_STOPPED;
-}
-
-void RtApiJack :: abortStream( void )
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiJack::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
- handle->drainCounter = 2;
-
- stopStream();
-}
-
-// This function will be called by a spawned thread when the user
-// callback function signals that the stream should be stopped or
-// aborted. It is necessary to handle it this way because the
-// callbackEvent() function must return before the jack_deactivate()
-// function will return.
-static void *jackStopStream( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiJack *object = (RtApiJack *) info->object;
-
- object->stopStream();
- pthread_exit( NULL );
-}
-
-bool RtApiJack :: callbackEvent( unsigned long nframes )
-{
- if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return FAILURE;
- }
- if ( stream_.bufferSize != nframes ) {
- errorText_ = "RtApiCore::callbackEvent(): the JACK buffer size has changed ... cannot process!";
- error( RtAudioError::WARNING );
- return FAILURE;
- }
-
- CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
- JackHandle *handle = (JackHandle *) stream_.apiHandle;
-
- // Check if we were draining the stream and signal is finished.
- if ( handle->drainCounter > 3 ) {
- ThreadHandle threadId;
-
- stream_.state = STREAM_STOPPING;
- if ( handle->internalDrain == true )
- pthread_create( &threadId, NULL, jackStopStream, info );
- else
- pthread_cond_signal( &handle->condition );
- return SUCCESS;
- }
-
- // Invoke user callback first, to get fresh output data.
- if ( handle->drainCounter == 0 ) {
- RtAudioCallback callback = (RtAudioCallback) info->callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- handle->xrun[0] = false;
- }
- if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- handle->xrun[1] = false;
- }
- int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, info->userData );
- if ( cbReturnValue == 2 ) {
- stream_.state = STREAM_STOPPING;
- handle->drainCounter = 2;
- ThreadHandle id;
- pthread_create( &id, NULL, jackStopStream, info );
- return SUCCESS;
- }
- else if ( cbReturnValue == 1 ) {
- handle->drainCounter = 1;
- handle->internalDrain = true;
- }
- }
-
- jack_default_audio_sample_t *jackbuffer;
- unsigned long bufferBytes = nframes * sizeof( jack_default_audio_sample_t );
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- if ( handle->drainCounter > 1 ) { // write zeros to the output stream
-
- for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) {
- jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
- memset( jackbuffer, 0, bufferBytes );
- }
-
- }
- else if ( stream_.doConvertBuffer[0] ) {
-
- convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
-
- for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) {
- jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
- memcpy( jackbuffer, &stream_.deviceBuffer[i*bufferBytes], bufferBytes );
- }
- }
- else { // no buffer conversion
- for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) {
- jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes );
- memcpy( jackbuffer, &stream_.userBuffer[0][i*bufferBytes], bufferBytes );
- }
- }
- }
-
- // Don't bother draining input
- if ( handle->drainCounter ) {
- handle->drainCounter++;
- goto unlock;
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- if ( stream_.doConvertBuffer[1] ) {
- for ( unsigned int i=0; i<stream_.nDeviceChannels[1]; i++ ) {
- jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes );
- memcpy( &stream_.deviceBuffer[i*bufferBytes], jackbuffer, bufferBytes );
- }
- convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
- }
- else { // no buffer conversion
- for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) {
- jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes );
- memcpy( &stream_.userBuffer[1][i*bufferBytes], jackbuffer, bufferBytes );
- }
- }
- }
-
- unlock:
- RtApi::tickStreamTime();
- return SUCCESS;
-}
- //******************** End of __UNIX_JACK__ *********************//
-#endif
-
-#if defined(__WINDOWS_ASIO__) // ASIO API on Windows
-
-// The ASIO API is designed around a callback scheme, so this
-// implementation is similar to that used for OS-X CoreAudio and Linux
-// Jack. The primary constraint with ASIO is that it only allows
-// access to a single driver at a time. Thus, it is not possible to
-// have more than one simultaneous RtAudio stream.
-//
-// This implementation also requires a number of external ASIO files
-// and a few global variables. The ASIO callback scheme does not
-// allow for the passing of user data, so we must create a global
-// pointer to our callbackInfo structure.
-//
-// On unix systems, we make use of a pthread condition variable.
-// Since there is no equivalent in Windows, I hacked something based
-// on information found in
-// http://www.cs.wustl.edu/~schmidt/win32-cv-1.html.
-
-#include "asiosys.h"
-#include "asio.h"
-#include "iasiothiscallresolver.h"
-#include "asiodrivers.h"
-#include <cmath>
-
-static AsioDrivers drivers;
-static ASIOCallbacks asioCallbacks;
-static ASIODriverInfo driverInfo;
-static CallbackInfo *asioCallbackInfo;
-static bool asioXRun;
-
-struct AsioHandle {
- int drainCounter; // Tracks callback counts when draining
- bool internalDrain; // Indicates if stop is initiated from callback or not.
- ASIOBufferInfo *bufferInfos;
- HANDLE condition;
-
- AsioHandle()
- :drainCounter(0), internalDrain(false), bufferInfos(0) {}
-};
-
-// Function declarations (definitions at end of section)
-static const char* getAsioErrorString( ASIOError result );
-static void sampleRateChanged( ASIOSampleRate sRate );
-static long asioMessages( long selector, long value, void* message, double* opt );
-
-RtApiAsio :: RtApiAsio()
-{
- // ASIO cannot run on a multi-threaded appartment. You can call
- // CoInitialize beforehand, but it must be for appartment threading
- // (in which case, CoInitilialize will return S_FALSE here).
- coInitialized_ = false;
- HRESULT hr = CoInitialize( NULL );
- if ( FAILED(hr) ) {
- errorText_ = "RtApiAsio::ASIO requires a single-threaded appartment. Call CoInitializeEx(0,COINIT_APARTMENTTHREADED)";
- error( RtAudioError::WARNING );
- }
- coInitialized_ = true;
-
- drivers.removeCurrentDriver();
- driverInfo.asioVersion = 2;
-
- // See note in DirectSound implementation about GetDesktopWindow().
- driverInfo.sysRef = GetForegroundWindow();
-}
-
-RtApiAsio :: ~RtApiAsio()
-{
- if ( stream_.state != STREAM_CLOSED ) closeStream();
- if ( coInitialized_ ) CoUninitialize();
-}
-
-unsigned int RtApiAsio :: getDeviceCount( void )
-{
- return (unsigned int) drivers.asioGetNumDev();
-}
-
-RtAudio::DeviceInfo RtApiAsio :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- // Get device ID
- unsigned int nDevices = getDeviceCount();
- if ( nDevices == 0 ) {
- errorText_ = "RtApiAsio::getDeviceInfo: no devices found!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- if ( device >= nDevices ) {
- errorText_ = "RtApiAsio::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- // If a stream is already open, we cannot probe other devices. Thus, use the saved results.
- if ( stream_.state != STREAM_CLOSED ) {
- if ( device >= devices_.size() ) {
- errorText_ = "RtApiAsio::getDeviceInfo: device ID was not present before stream was opened.";
- error( RtAudioError::WARNING );
- return info;
- }
- return devices_[ device ];
- }
-
- char driverName[32];
- ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::getDeviceInfo: unable to get driver name (" << getAsioErrorString( result ) << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- info.name = driverName;
-
- if ( !drivers.loadDriver( driverName ) ) {
- errorStream_ << "RtApiAsio::getDeviceInfo: unable to load driver (" << driverName << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- result = ASIOInit( &driverInfo );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Determine the device channel information.
- long inputChannels, outputChannels;
- result = ASIOGetChannels( &inputChannels, &outputChannels );
- if ( result != ASE_OK ) {
- drivers.removeCurrentDriver();
- errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- info.outputChannels = outputChannels;
- info.inputChannels = inputChannels;
- if ( info.outputChannels > 0 && info.inputChannels > 0 )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
- // Determine the supported sample rates.
- info.sampleRates.clear();
- for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) {
- result = ASIOCanSampleRate( (ASIOSampleRate) SAMPLE_RATES[i] );
- if ( result == ASE_OK ) {
- info.sampleRates.push_back( SAMPLE_RATES[i] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[i];
- }
- }
-
- // Determine supported data types ... just check first channel and assume rest are the same.
- ASIOChannelInfo channelInfo;
- channelInfo.channel = 0;
- channelInfo.isInput = true;
- if ( info.inputChannels <= 0 ) channelInfo.isInput = false;
- result = ASIOGetChannelInfo( &channelInfo );
- if ( result != ASE_OK ) {
- drivers.removeCurrentDriver();
- errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting driver channel info (" << driverName << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- info.nativeFormats = 0;
- if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB )
- info.nativeFormats |= RTAUDIO_SINT16;
- else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB )
- info.nativeFormats |= RTAUDIO_SINT32;
- else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB )
- info.nativeFormats |= RTAUDIO_FLOAT32;
- else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB )
- info.nativeFormats |= RTAUDIO_FLOAT64;
- else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB )
- info.nativeFormats |= RTAUDIO_SINT24;
-
- if ( info.outputChannels > 0 )
- if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true;
- if ( info.inputChannels > 0 )
- if ( getDefaultInputDevice() == device ) info.isDefaultInput = true;
-
- info.probed = true;
- drivers.removeCurrentDriver();
- return info;
-}
-
-static void bufferSwitch( long index, ASIOBool /*processNow*/ )
-{
- RtApiAsio *object = (RtApiAsio *) asioCallbackInfo->object;
- object->callbackEvent( index );
-}
-
-void RtApiAsio :: saveDeviceInfo( void )
-{
- devices_.clear();
-
- unsigned int nDevices = getDeviceCount();
- devices_.resize( nDevices );
- for ( unsigned int i=0; i<nDevices; i++ )
- devices_[i] = getDeviceInfo( i );
-}
-
-bool RtApiAsio :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-{////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
- bool isDuplexInput = mode == INPUT && stream_.mode == OUTPUT;
-
- // For ASIO, a duplex stream MUST use the same driver.
- if ( isDuplexInput && stream_.device[0] != device ) {
- errorText_ = "RtApiAsio::probeDeviceOpen: an ASIO duplex stream must use the same device for input and output!";
- return FAILURE;
- }
-
- char driverName[32];
- ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: unable to get driver name (" << getAsioErrorString( result ) << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Only load the driver once for duplex stream.
- if ( !isDuplexInput ) {
- // The getDeviceInfo() function will not work when a stream is open
- // because ASIO does not allow multiple devices to run at the same
- // time. Thus, we'll probe the system before opening a stream and
- // save the results for use by getDeviceInfo().
- this->saveDeviceInfo();
-
- if ( !drivers.loadDriver( driverName ) ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: unable to load driver (" << driverName << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- result = ASIOInit( &driverInfo );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // keep them before any "goto error", they are used for error cleanup + goto device boundary checks
- bool buffersAllocated = false;
- AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
- unsigned int nChannels;
-
-
- // Check the device channel count.
- long inputChannels, outputChannels;
- result = ASIOGetChannels( &inputChannels, &outputChannels );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- if ( ( mode == OUTPUT && (channels+firstChannel) > (unsigned int) outputChannels) ||
- ( mode == INPUT && (channels+firstChannel) > (unsigned int) inputChannels) ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested channel count (" << channels << ") + offset (" << firstChannel << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
- stream_.nDeviceChannels[mode] = channels;
- stream_.nUserChannels[mode] = channels;
- stream_.channelOffset[mode] = firstChannel;
-
- // Verify the sample rate is supported.
- result = ASIOCanSampleRate( (ASIOSampleRate) sampleRate );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested sample rate (" << sampleRate << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- // Get the current sample rate
- ASIOSampleRate currentRate;
- result = ASIOGetSampleRate( ¤tRate );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error getting sample rate.";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- // Set the sample rate only if necessary
- if ( currentRate != sampleRate ) {
- result = ASIOSetSampleRate( (ASIOSampleRate) sampleRate );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error setting sample rate (" << sampleRate << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
- }
-
- // Determine the driver data type.
- ASIOChannelInfo channelInfo;
- channelInfo.channel = 0;
- if ( mode == OUTPUT ) channelInfo.isInput = false;
- else channelInfo.isInput = true;
- result = ASIOGetChannelInfo( &channelInfo );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting data format.";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- // Assuming WINDOWS host is always little-endian.
- stream_.doByteSwap[mode] = false;
- stream_.userFormat = format;
- stream_.deviceFormat[mode] = 0;
- if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- if ( channelInfo.type == ASIOSTInt16MSB ) stream_.doByteSwap[mode] = true;
- }
- else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- if ( channelInfo.type == ASIOSTInt32MSB ) stream_.doByteSwap[mode] = true;
- }
- else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB ) {
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
- if ( channelInfo.type == ASIOSTFloat32MSB ) stream_.doByteSwap[mode] = true;
- }
- else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB ) {
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT64;
- if ( channelInfo.type == ASIOSTFloat64MSB ) stream_.doByteSwap[mode] = true;
- }
- else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- if ( channelInfo.type == ASIOSTInt24MSB ) stream_.doByteSwap[mode] = true;
- }
-
- if ( stream_.deviceFormat[mode] == 0 ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") data format not supported by RtAudio.";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- // Set the buffer size. For a duplex stream, this will end up
- // setting the buffer size based on the input constraints, which
- // should be ok.
- long minSize, maxSize, preferSize, granularity;
- result = ASIOGetBufferSize( &minSize, &maxSize, &preferSize, &granularity );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting buffer size.";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- if ( isDuplexInput ) {
- // When this is the duplex input (output was opened before), then we have to use the same
- // buffersize as the output, because it might use the preferred buffer size, which most
- // likely wasn't passed as input to this. The buffer sizes have to be identically anyway,
- // So instead of throwing an error, make them equal. The caller uses the reference
- // to the "bufferSize" param as usual to set up processing buffers.
-
- *bufferSize = stream_.bufferSize;
-
- } else {
- if ( *bufferSize == 0 ) *bufferSize = preferSize;
- else if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize;
- else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize;
- else if ( granularity == -1 ) {
- // Make sure bufferSize is a power of two.
- int log2_of_min_size = 0;
- int log2_of_max_size = 0;
-
- for ( unsigned int i = 0; i < sizeof(long) * 8; i++ ) {
- if ( minSize & ((long)1 << i) ) log2_of_min_size = i;
- if ( maxSize & ((long)1 << i) ) log2_of_max_size = i;
- }
-
- long min_delta = std::abs( (long)*bufferSize - ((long)1 << log2_of_min_size) );
- int min_delta_num = log2_of_min_size;
-
- for (int i = log2_of_min_size + 1; i <= log2_of_max_size; i++) {
- long current_delta = std::abs( (long)*bufferSize - ((long)1 << i) );
- if (current_delta < min_delta) {
- min_delta = current_delta;
- min_delta_num = i;
- }
- }
-
- *bufferSize = ( (unsigned int)1 << min_delta_num );
- if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize;
- else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize;
- }
- else if ( granularity != 0 ) {
- // Set to an even multiple of granularity, rounding up.
- *bufferSize = (*bufferSize + granularity-1) / granularity * granularity;
- }
- }
-
- /*
- // we don't use it anymore, see above!
- // Just left it here for the case...
- if ( isDuplexInput && stream_.bufferSize != *bufferSize ) {
- errorText_ = "RtApiAsio::probeDeviceOpen: input/output buffersize discrepancy!";
- goto error;
- }
- */
-
- stream_.bufferSize = *bufferSize;
- stream_.nBuffers = 2;
-
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
- else stream_.userInterleaved = true;
-
- // ASIO always uses non-interleaved buffers.
- stream_.deviceInterleaved[mode] = false;
-
- // Allocate, if necessary, our AsioHandle structure for the stream.
- if ( handle == 0 ) {
- try {
- handle = new AsioHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiAsio::probeDeviceOpen: error allocating AsioHandle memory.";
- goto error;
- }
- handle->bufferInfos = 0;
-
- // Create a manual-reset event.
- handle->condition = CreateEvent( NULL, // no security
- TRUE, // manual-reset
- FALSE, // non-signaled initially
- NULL ); // unnamed
- stream_.apiHandle = (void *) handle;
- }
-
- // Create the ASIO internal buffers. Since RtAudio sets up input
- // and output separately, we'll have to dispose of previously
- // created output buffers for a duplex stream.
- if ( mode == INPUT && stream_.mode == OUTPUT ) {
- ASIODisposeBuffers();
- if ( handle->bufferInfos ) free( handle->bufferInfos );
- }
-
- // Allocate, initialize, and save the bufferInfos in our stream callbackInfo structure.
- unsigned int i;
- nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1];
- handle->bufferInfos = (ASIOBufferInfo *) malloc( nChannels * sizeof(ASIOBufferInfo) );
- if ( handle->bufferInfos == NULL ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: error allocating bufferInfo memory for driver (" << driverName << ").";
- errorText_ = errorStream_.str();
- goto error;
- }
-
- ASIOBufferInfo *infos;
- infos = handle->bufferInfos;
- for ( i=0; i<stream_.nDeviceChannels[0]; i++, infos++ ) {
- infos->isInput = ASIOFalse;
- infos->channelNum = i + stream_.channelOffset[0];
- infos->buffers[0] = infos->buffers[1] = 0;
- }
- for ( i=0; i<stream_.nDeviceChannels[1]; i++, infos++ ) {
- infos->isInput = ASIOTrue;
- infos->channelNum = i + stream_.channelOffset[1];
- infos->buffers[0] = infos->buffers[1] = 0;
- }
-
- // prepare for callbacks
- stream_.sampleRate = sampleRate;
- stream_.device[mode] = device;
- stream_.mode = isDuplexInput ? DUPLEX : mode;
-
- // store this class instance before registering callbacks, that are going to use it
- asioCallbackInfo = &stream_.callbackInfo;
- stream_.callbackInfo.object = (void *) this;
-
- // Set up the ASIO callback structure and create the ASIO data buffers.
- asioCallbacks.bufferSwitch = &bufferSwitch;
- asioCallbacks.sampleRateDidChange = &sampleRateChanged;
- asioCallbacks.asioMessage = &asioMessages;
- asioCallbacks.bufferSwitchTimeInfo = NULL;
- result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks );
- if ( result != ASE_OK ) {
- // Standard method failed. This can happen with strict/misbehaving drivers that return valid buffer size ranges
- // but only accept the preferred buffer size as parameter for ASIOCreateBuffers. eg. Creatives ASIO driver
- // in that case, let's be naïve and try that instead
- *bufferSize = preferSize;
- stream_.bufferSize = *bufferSize;
- result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks );
- }
-
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") creating buffers.";
- errorText_ = errorStream_.str();
- goto error;
- }
- buffersAllocated = true;
- stream_.state = STREAM_STOPPED;
-
- // Set flags for buffer conversion.
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate necessary internal buffers
- unsigned long bufferBytes;
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiAsio::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( isDuplexInput && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= bytesOut ) makeBuffer = false;
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiAsio::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- // Determine device latencies
- long inputLatency, outputLatency;
- result = ASIOGetLatencies( &inputLatency, &outputLatency );
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting latency.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING); // warn but don't fail
- }
- else {
- stream_.latency[0] = outputLatency;
- stream_.latency[1] = inputLatency;
- }
-
- // Setup the buffer conversion information structure. We don't use
- // buffers to do channel offsets, so we override that parameter
- // here.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 );
-
- return SUCCESS;
-
- error:
- if ( !isDuplexInput ) {
- // the cleanup for error in the duplex input, is done by RtApi::openStream
- // So we clean up for single channel only
-
- if ( buffersAllocated )
- ASIODisposeBuffers();
-
- drivers.removeCurrentDriver();
-
- if ( handle ) {
- CloseHandle( handle->condition );
- if ( handle->bufferInfos )
- free( handle->bufferInfos );
-
- delete handle;
- stream_.apiHandle = 0;
- }
-
-
- if ( stream_.userBuffer[mode] ) {
- free( stream_.userBuffer[mode] );
- stream_.userBuffer[mode] = 0;
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
- }
-
- return FAILURE;
-}////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void RtApiAsio :: closeStream()
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiAsio::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- if ( stream_.state == STREAM_RUNNING ) {
- stream_.state = STREAM_STOPPED;
- ASIOStop();
- }
- ASIODisposeBuffers();
- drivers.removeCurrentDriver();
-
- AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
- if ( handle ) {
- CloseHandle( handle->condition );
- if ( handle->bufferInfos )
- free( handle->bufferInfos );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-bool stopThreadCalled = false;
-
-void RtApiAsio :: startStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiAsio::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
- ASIOError result = ASIOStart();
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::startStream: error (" << getAsioErrorString( result ) << ") starting device.";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- handle->drainCounter = 0;
- handle->internalDrain = false;
- ResetEvent( handle->condition );
- stream_.state = STREAM_RUNNING;
- asioXRun = false;
-
- unlock:
- stopThreadCalled = false;
-
- if ( result == ASE_OK ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAsio :: stopStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiAsio::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- if ( handle->drainCounter == 0 ) {
- handle->drainCounter = 2;
- WaitForSingleObject( handle->condition, INFINITE ); // block until signaled
- }
- }
-
- stream_.state = STREAM_STOPPED;
-
- ASIOError result = ASIOStop();
- if ( result != ASE_OK ) {
- errorStream_ << "RtApiAsio::stopStream: error (" << getAsioErrorString( result ) << ") stopping device.";
- errorText_ = errorStream_.str();
- }
-
- if ( result == ASE_OK ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAsio :: abortStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiAsio::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- // The following lines were commented-out because some behavior was
- // noted where the device buffers need to be zeroed to avoid
- // continuing sound, even when the device buffers are completely
- // disposed. So now, calling abort is the same as calling stop.
- // AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
- // handle->drainCounter = 2;
- stopStream();
-}
-
-// This function will be called by a spawned thread when the user
-// callback function signals that the stream should be stopped or
-// aborted. It is necessary to handle it this way because the
-// callbackEvent() function must return before the ASIOStop()
-// function will return.
-static unsigned __stdcall asioStopStream( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiAsio *object = (RtApiAsio *) info->object;
-
- object->stopStream();
- _endthreadex( 0 );
- return 0;
-}
-
-bool RtApiAsio :: callbackEvent( long bufferIndex )
-{
- if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS;
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiAsio::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return FAILURE;
- }
-
- CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
- AsioHandle *handle = (AsioHandle *) stream_.apiHandle;
-
- // Check if we were draining the stream and signal if finished.
- if ( handle->drainCounter > 3 ) {
-
- stream_.state = STREAM_STOPPING;
- if ( handle->internalDrain == false )
- SetEvent( handle->condition );
- else { // spawn a thread to stop the stream
- unsigned threadId;
- stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream,
- &stream_.callbackInfo, 0, &threadId );
- }
- return SUCCESS;
- }
-
- // Invoke user callback to get fresh output data UNLESS we are
- // draining stream.
- if ( handle->drainCounter == 0 ) {
- RtAudioCallback callback = (RtAudioCallback) info->callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && asioXRun == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- asioXRun = false;
- }
- if ( stream_.mode != OUTPUT && asioXRun == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- asioXRun = false;
- }
- int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, info->userData );
- if ( cbReturnValue == 2 ) {
- stream_.state = STREAM_STOPPING;
- handle->drainCounter = 2;
- unsigned threadId;
- stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream,
- &stream_.callbackInfo, 0, &threadId );
- return SUCCESS;
- }
- else if ( cbReturnValue == 1 ) {
- handle->drainCounter = 1;
- handle->internalDrain = true;
- }
- }
-
- unsigned int nChannels, bufferBytes, i, j;
- nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1];
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- bufferBytes = stream_.bufferSize * formatBytes( stream_.deviceFormat[0] );
-
- if ( handle->drainCounter > 1 ) { // write zeros to the output stream
-
- for ( i=0, j=0; i<nChannels; i++ ) {
- if ( handle->bufferInfos[i].isInput != ASIOTrue )
- memset( handle->bufferInfos[i].buffers[bufferIndex], 0, bufferBytes );
- }
-
- }
- else if ( stream_.doConvertBuffer[0] ) {
-
- convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] );
- if ( stream_.doByteSwap[0] )
- byteSwapBuffer( stream_.deviceBuffer,
- stream_.bufferSize * stream_.nDeviceChannels[0],
- stream_.deviceFormat[0] );
-
- for ( i=0, j=0; i<nChannels; i++ ) {
- if ( handle->bufferInfos[i].isInput != ASIOTrue )
- memcpy( handle->bufferInfos[i].buffers[bufferIndex],
- &stream_.deviceBuffer[j++*bufferBytes], bufferBytes );
- }
-
- }
- else {
-
- if ( stream_.doByteSwap[0] )
- byteSwapBuffer( stream_.userBuffer[0],
- stream_.bufferSize * stream_.nUserChannels[0],
- stream_.userFormat );
-
- for ( i=0, j=0; i<nChannels; i++ ) {
- if ( handle->bufferInfos[i].isInput != ASIOTrue )
- memcpy( handle->bufferInfos[i].buffers[bufferIndex],
- &stream_.userBuffer[0][bufferBytes*j++], bufferBytes );
- }
-
- }
- }
-
- // Don't bother draining input
- if ( handle->drainCounter ) {
- handle->drainCounter++;
- goto unlock;
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- bufferBytes = stream_.bufferSize * formatBytes(stream_.deviceFormat[1]);
-
- if (stream_.doConvertBuffer[1]) {
-
- // Always interleave ASIO input data.
- for ( i=0, j=0; i<nChannels; i++ ) {
- if ( handle->bufferInfos[i].isInput == ASIOTrue )
- memcpy( &stream_.deviceBuffer[j++*bufferBytes],
- handle->bufferInfos[i].buffers[bufferIndex],
- bufferBytes );
- }
-
- if ( stream_.doByteSwap[1] )
- byteSwapBuffer( stream_.deviceBuffer,
- stream_.bufferSize * stream_.nDeviceChannels[1],
- stream_.deviceFormat[1] );
- convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
-
- }
- else {
- for ( i=0, j=0; i<nChannels; i++ ) {
- if ( handle->bufferInfos[i].isInput == ASIOTrue ) {
- memcpy( &stream_.userBuffer[1][bufferBytes*j++],
- handle->bufferInfos[i].buffers[bufferIndex],
- bufferBytes );
- }
- }
-
- if ( stream_.doByteSwap[1] )
- byteSwapBuffer( stream_.userBuffer[1],
- stream_.bufferSize * stream_.nUserChannels[1],
- stream_.userFormat );
- }
- }
-
- unlock:
- // The following call was suggested by Malte Clasen. While the API
- // documentation indicates it should not be required, some device
- // drivers apparently do not function correctly without it.
- ASIOOutputReady();
-
- RtApi::tickStreamTime();
- return SUCCESS;
-}
-
-static void sampleRateChanged( ASIOSampleRate sRate )
-{
- // The ASIO documentation says that this usually only happens during
- // external sync. Audio processing is not stopped by the driver,
- // actual sample rate might not have even changed, maybe only the
- // sample rate status of an AES/EBU or S/PDIF digital input at the
- // audio device.
-
- RtApi *object = (RtApi *) asioCallbackInfo->object;
- try {
- object->stopStream();
- }
- catch ( RtAudioError &exception ) {
- std::cerr << "\nRtApiAsio: sampleRateChanged() error (" << exception.getMessage() << ")!\n" << std::endl;
- return;
- }
-
- std::cerr << "\nRtApiAsio: driver reports sample rate changed to " << sRate << " ... stream stopped!!!\n" << std::endl;
-}
-
-static long asioMessages( long selector, long value, void* /*message*/, double* /*opt*/ )
-{
- long ret = 0;
-
- switch( selector ) {
- case kAsioSelectorSupported:
- if ( value == kAsioResetRequest
- || value == kAsioEngineVersion
- || value == kAsioResyncRequest
- || value == kAsioLatenciesChanged
- // The following three were added for ASIO 2.0, you don't
- // necessarily have to support them.
- || value == kAsioSupportsTimeInfo
- || value == kAsioSupportsTimeCode
- || value == kAsioSupportsInputMonitor)
- ret = 1L;
- break;
- case kAsioResetRequest:
- // Defer the task and perform the reset of the driver during the
- // next "safe" situation. You cannot reset the driver right now,
- // as this code is called from the driver. Reset the driver is
- // done by completely destruct is. I.e. ASIOStop(),
- // ASIODisposeBuffers(), Destruction Afterwards you initialize the
- // driver again.
- std::cerr << "\nRtApiAsio: driver reset requested!!!" << std::endl;
- ret = 1L;
- break;
- case kAsioResyncRequest:
- // This informs the application that the driver encountered some
- // non-fatal data loss. It is used for synchronization purposes
- // of different media. Added mainly to work around the Win16Mutex
- // problems in Windows 95/98 with the Windows Multimedia system,
- // which could lose data because the Mutex was held too long by
- // another thread. However a driver can issue it in other
- // situations, too.
- // std::cerr << "\nRtApiAsio: driver resync requested!!!" << std::endl;
- asioXRun = true;
- ret = 1L;
- break;
- case kAsioLatenciesChanged:
- // This will inform the host application that the drivers were
- // latencies changed. Beware, it this does not mean that the
- // buffer sizes have changed! You might need to update internal
- // delay data.
- std::cerr << "\nRtApiAsio: driver latency may have changed!!!" << std::endl;
- ret = 1L;
- break;
- case kAsioEngineVersion:
- // Return the supported ASIO version of the host application. If
- // a host application does not implement this selector, ASIO 1.0
- // is assumed by the driver.
- ret = 2L;
- break;
- case kAsioSupportsTimeInfo:
- // Informs the driver whether the
- // asioCallbacks.bufferSwitchTimeInfo() callback is supported.
- // For compatibility with ASIO 1.0 drivers the host application
- // should always support the "old" bufferSwitch method, too.
- ret = 0;
- break;
- case kAsioSupportsTimeCode:
- // Informs the driver whether application is interested in time
- // code info. If an application does not need to know about time
- // code, the driver has less work to do.
- ret = 0;
- break;
- }
- return ret;
-}
-
-static const char* getAsioErrorString( ASIOError result )
-{
- struct Messages
- {
- ASIOError value;
- const char*message;
- };
-
- static const Messages m[] =
- {
- { ASE_NotPresent, "Hardware input or output is not present or available." },
- { ASE_HWMalfunction, "Hardware is malfunctioning." },
- { ASE_InvalidParameter, "Invalid input parameter." },
- { ASE_InvalidMode, "Invalid mode." },
- { ASE_SPNotAdvancing, "Sample position not advancing." },
- { ASE_NoClock, "Sample clock or rate cannot be determined or is not present." },
- { ASE_NoMemory, "Not enough memory to complete the request." }
- };
-
- for ( unsigned int i = 0; i < sizeof(m)/sizeof(m[0]); ++i )
- if ( m[i].value == result ) return m[i].message;
-
- return "Unknown error.";
-}
-
-//******************** End of __WINDOWS_ASIO__ *********************//
-#endif
-
-
-#if defined(__WINDOWS_WASAPI__) // Windows WASAPI API
-
-// Authored by Marcus Tomlinson <themarcustomlinson@gmail.com>, April 2014
-// - Introduces support for the Windows WASAPI API
-// - Aims to deliver bit streams to and from hardware at the lowest possible latency, via the absolute minimum buffer sizes required
-// - Provides flexible stream configuration to an otherwise strict and inflexible WASAPI interface
-// - Includes automatic internal conversion of sample rate and buffer size between hardware and the user
-
-#ifndef INITGUID
- #define INITGUID
-#endif
-#include <audioclient.h>
-#include <avrt.h>
-#include <mmdeviceapi.h>
-#include <functiondiscoverykeys_devpkey.h>
-
-//=============================================================================
-
-#define SAFE_RELEASE( objectPtr )\
-if ( objectPtr )\
-{\
- objectPtr->Release();\
- objectPtr = NULL;\
-}
-
-typedef HANDLE ( __stdcall *TAvSetMmThreadCharacteristicsPtr )( LPCWSTR TaskName, LPDWORD TaskIndex );
-
-//-----------------------------------------------------------------------------
-
-// WASAPI dictates stream sample rate, format, channel count, and in some cases, buffer size.
-// Therefore we must perform all necessary conversions to user buffers in order to satisfy these
-// requirements. WasapiBuffer ring buffers are used between HwIn->UserIn and UserOut->HwOut to
-// provide intermediate storage for read / write synchronization.
-class WasapiBuffer
-{
-public:
- WasapiBuffer()
- : buffer_( NULL ),
- bufferSize_( 0 ),
- inIndex_( 0 ),
- outIndex_( 0 ) {}
-
- ~WasapiBuffer() {
- free( buffer_ );
- }
-
- // sets the length of the internal ring buffer
- void setBufferSize( unsigned int bufferSize, unsigned int formatBytes ) {
- free( buffer_ );
-
- buffer_ = ( char* ) calloc( bufferSize, formatBytes );
-
- bufferSize_ = bufferSize;
- inIndex_ = 0;
- outIndex_ = 0;
- }
-
- // attempt to push a buffer into the ring buffer at the current "in" index
- bool pushBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format )
- {
- if ( !buffer || // incoming buffer is NULL
- bufferSize == 0 || // incoming buffer has no data
- bufferSize > bufferSize_ ) // incoming buffer too large
- {
- return false;
- }
-
- unsigned int relOutIndex = outIndex_;
- unsigned int inIndexEnd = inIndex_ + bufferSize;
- if ( relOutIndex < inIndex_ && inIndexEnd >= bufferSize_ ) {
- relOutIndex += bufferSize_;
- }
-
- // "in" index can end on the "out" index but cannot begin at it
- if ( inIndex_ <= relOutIndex && inIndexEnd > relOutIndex ) {
- return false; // not enough space between "in" index and "out" index
- }
-
- // copy buffer from external to internal
- int fromZeroSize = inIndex_ + bufferSize - bufferSize_;
- fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize;
- int fromInSize = bufferSize - fromZeroSize;
-
- switch( format )
- {
- case RTAUDIO_SINT8:
- memcpy( &( ( char* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( char ) );
- memcpy( buffer_, &( ( char* ) buffer )[fromInSize], fromZeroSize * sizeof( char ) );
- break;
- case RTAUDIO_SINT16:
- memcpy( &( ( short* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( short ) );
- memcpy( buffer_, &( ( short* ) buffer )[fromInSize], fromZeroSize * sizeof( short ) );
- break;
- case RTAUDIO_SINT24:
- memcpy( &( ( S24* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( S24 ) );
- memcpy( buffer_, &( ( S24* ) buffer )[fromInSize], fromZeroSize * sizeof( S24 ) );
- break;
- case RTAUDIO_SINT32:
- memcpy( &( ( int* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( int ) );
- memcpy( buffer_, &( ( int* ) buffer )[fromInSize], fromZeroSize * sizeof( int ) );
- break;
- case RTAUDIO_FLOAT32:
- memcpy( &( ( float* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( float ) );
- memcpy( buffer_, &( ( float* ) buffer )[fromInSize], fromZeroSize * sizeof( float ) );
- break;
- case RTAUDIO_FLOAT64:
- memcpy( &( ( double* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( double ) );
- memcpy( buffer_, &( ( double* ) buffer )[fromInSize], fromZeroSize * sizeof( double ) );
- break;
- }
-
- // update "in" index
- inIndex_ += bufferSize;
- inIndex_ %= bufferSize_;
-
- return true;
- }
-
- // attempt to pull a buffer from the ring buffer from the current "out" index
- bool pullBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format )
- {
- if ( !buffer || // incoming buffer is NULL
- bufferSize == 0 || // incoming buffer has no data
- bufferSize > bufferSize_ ) // incoming buffer too large
- {
- return false;
- }
-
- unsigned int relInIndex = inIndex_;
- unsigned int outIndexEnd = outIndex_ + bufferSize;
- if ( relInIndex < outIndex_ && outIndexEnd >= bufferSize_ ) {
- relInIndex += bufferSize_;
- }
-
- // "out" index can begin at and end on the "in" index
- if ( outIndex_ < relInIndex && outIndexEnd > relInIndex ) {
- return false; // not enough space between "out" index and "in" index
- }
-
- // copy buffer from internal to external
- int fromZeroSize = outIndex_ + bufferSize - bufferSize_;
- fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize;
- int fromOutSize = bufferSize - fromZeroSize;
-
- switch( format )
- {
- case RTAUDIO_SINT8:
- memcpy( buffer, &( ( char* ) buffer_ )[outIndex_], fromOutSize * sizeof( char ) );
- memcpy( &( ( char* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( char ) );
- break;
- case RTAUDIO_SINT16:
- memcpy( buffer, &( ( short* ) buffer_ )[outIndex_], fromOutSize * sizeof( short ) );
- memcpy( &( ( short* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( short ) );
- break;
- case RTAUDIO_SINT24:
- memcpy( buffer, &( ( S24* ) buffer_ )[outIndex_], fromOutSize * sizeof( S24 ) );
- memcpy( &( ( S24* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( S24 ) );
- break;
- case RTAUDIO_SINT32:
- memcpy( buffer, &( ( int* ) buffer_ )[outIndex_], fromOutSize * sizeof( int ) );
- memcpy( &( ( int* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( int ) );
- break;
- case RTAUDIO_FLOAT32:
- memcpy( buffer, &( ( float* ) buffer_ )[outIndex_], fromOutSize * sizeof( float ) );
- memcpy( &( ( float* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( float ) );
- break;
- case RTAUDIO_FLOAT64:
- memcpy( buffer, &( ( double* ) buffer_ )[outIndex_], fromOutSize * sizeof( double ) );
- memcpy( &( ( double* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( double ) );
- break;
- }
-
- // update "out" index
- outIndex_ += bufferSize;
- outIndex_ %= bufferSize_;
-
- return true;
- }
-
-private:
- char* buffer_;
- unsigned int bufferSize_;
- unsigned int inIndex_;
- unsigned int outIndex_;
-};
-
-//-----------------------------------------------------------------------------
-
-// In order to satisfy WASAPI's buffer requirements, we need a means of converting sample rate
-// between HW and the user. The convertBufferWasapi function is used to perform this conversion
-// between HwIn->UserIn and UserOut->HwOut during the stream callback loop.
-// This sample rate converter favors speed over quality, and works best with conversions between
-// one rate and its multiple.
-void convertBufferWasapi( char* outBuffer,
- const char* inBuffer,
- const unsigned int& channelCount,
- const unsigned int& inSampleRate,
- const unsigned int& outSampleRate,
- const unsigned int& inSampleCount,
- unsigned int& outSampleCount,
- const RtAudioFormat& format )
-{
- // calculate the new outSampleCount and relative sampleStep
- float sampleRatio = ( float ) outSampleRate / inSampleRate;
- float sampleStep = 1.0f / sampleRatio;
- float inSampleFraction = 0.0f;
-
- outSampleCount = ( unsigned int ) roundf( inSampleCount * sampleRatio );
-
- // frame-by-frame, copy each relative input sample into it's corresponding output sample
- for ( unsigned int outSample = 0; outSample < outSampleCount; outSample++ )
- {
- unsigned int inSample = ( unsigned int ) inSampleFraction;
-
- switch ( format )
- {
- case RTAUDIO_SINT8:
- memcpy( &( ( char* ) outBuffer )[ outSample * channelCount ], &( ( char* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( char ) );
- break;
- case RTAUDIO_SINT16:
- memcpy( &( ( short* ) outBuffer )[ outSample * channelCount ], &( ( short* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( short ) );
- break;
- case RTAUDIO_SINT24:
- memcpy( &( ( S24* ) outBuffer )[ outSample * channelCount ], &( ( S24* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( S24 ) );
- break;
- case RTAUDIO_SINT32:
- memcpy( &( ( int* ) outBuffer )[ outSample * channelCount ], &( ( int* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( int ) );
- break;
- case RTAUDIO_FLOAT32:
- memcpy( &( ( float* ) outBuffer )[ outSample * channelCount ], &( ( float* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( float ) );
- break;
- case RTAUDIO_FLOAT64:
- memcpy( &( ( double* ) outBuffer )[ outSample * channelCount ], &( ( double* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( double ) );
- break;
- }
-
- // jump to next in sample
- inSampleFraction += sampleStep;
- }
-}
-
-//-----------------------------------------------------------------------------
-
-// A structure to hold various information related to the WASAPI implementation.
-struct WasapiHandle
-{
- IAudioClient* captureAudioClient;
- IAudioClient* renderAudioClient;
- IAudioCaptureClient* captureClient;
- IAudioRenderClient* renderClient;
- HANDLE captureEvent;
- HANDLE renderEvent;
-
- WasapiHandle()
- : captureAudioClient( NULL ),
- renderAudioClient( NULL ),
- captureClient( NULL ),
- renderClient( NULL ),
- captureEvent( NULL ),
- renderEvent( NULL ) {}
-};
-
-//=============================================================================
-
-RtApiWasapi::RtApiWasapi()
- : coInitialized_( false ), deviceEnumerator_( NULL )
-{
- // WASAPI can run either apartment or multi-threaded
- HRESULT hr = CoInitialize( NULL );
- if ( !FAILED( hr ) )
- coInitialized_ = true;
-
- // Instantiate device enumerator
- hr = CoCreateInstance( __uuidof( MMDeviceEnumerator ), NULL,
- CLSCTX_ALL, __uuidof( IMMDeviceEnumerator ),
- ( void** ) &deviceEnumerator_ );
-
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::RtApiWasapi: Unable to instantiate device enumerator";
- error( RtAudioError::DRIVER_ERROR );
- }
-}
-
-//-----------------------------------------------------------------------------
-
-RtApiWasapi::~RtApiWasapi()
-{
- if ( stream_.state != STREAM_CLOSED )
- closeStream();
-
- SAFE_RELEASE( deviceEnumerator_ );
-
- // If this object previously called CoInitialize()
- if ( coInitialized_ )
- CoUninitialize();
-}
-
-//=============================================================================
-
-unsigned int RtApiWasapi::getDeviceCount( void )
-{
- unsigned int captureDeviceCount = 0;
- unsigned int renderDeviceCount = 0;
-
- IMMDeviceCollection* captureDevices = NULL;
- IMMDeviceCollection* renderDevices = NULL;
-
- // Count capture devices
- errorText_.clear();
- HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device collection.";
- goto Exit;
- }
-
- hr = captureDevices->GetCount( &captureDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device count.";
- goto Exit;
- }
-
- // Count render devices
- hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device collection.";
- goto Exit;
- }
-
- hr = renderDevices->GetCount( &renderDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device count.";
- goto Exit;
- }
-
-Exit:
- // release all references
- SAFE_RELEASE( captureDevices );
- SAFE_RELEASE( renderDevices );
-
- if ( errorText_.empty() )
- return captureDeviceCount + renderDeviceCount;
-
- error( RtAudioError::DRIVER_ERROR );
- return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-RtAudio::DeviceInfo RtApiWasapi::getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- unsigned int captureDeviceCount = 0;
- unsigned int renderDeviceCount = 0;
- std::string defaultDeviceName;
- bool isCaptureDevice = false;
-
- PROPVARIANT deviceNameProp;
- PROPVARIANT defaultDeviceNameProp;
-
- IMMDeviceCollection* captureDevices = NULL;
- IMMDeviceCollection* renderDevices = NULL;
- IMMDevice* devicePtr = NULL;
- IMMDevice* defaultDevicePtr = NULL;
- IAudioClient* audioClient = NULL;
- IPropertyStore* devicePropStore = NULL;
- IPropertyStore* defaultDevicePropStore = NULL;
-
- WAVEFORMATEX* deviceFormat = NULL;
- WAVEFORMATEX* closestMatchFormat = NULL;
-
- // probed
- info.probed = false;
-
- // Count capture devices
- errorText_.clear();
- RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
- HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device collection.";
- goto Exit;
- }
-
- hr = captureDevices->GetCount( &captureDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device count.";
- goto Exit;
- }
-
- // Count render devices
- hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device collection.";
- goto Exit;
- }
-
- hr = renderDevices->GetCount( &renderDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device count.";
- goto Exit;
- }
-
- // validate device index
- if ( device >= captureDeviceCount + renderDeviceCount ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Invalid device index.";
- errorType = RtAudioError::INVALID_USE;
- goto Exit;
- }
-
- // determine whether index falls within capture or render devices
- if ( device >= renderDeviceCount ) {
- hr = captureDevices->Item( device - renderDeviceCount, &devicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device handle.";
- goto Exit;
- }
- isCaptureDevice = true;
- }
- else {
- hr = renderDevices->Item( device, &devicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device handle.";
- goto Exit;
- }
- isCaptureDevice = false;
- }
-
- // get default device name
- if ( isCaptureDevice ) {
- hr = deviceEnumerator_->GetDefaultAudioEndpoint( eCapture, eConsole, &defaultDevicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default capture device handle.";
- goto Exit;
- }
- }
- else {
- hr = deviceEnumerator_->GetDefaultAudioEndpoint( eRender, eConsole, &defaultDevicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default render device handle.";
- goto Exit;
- }
- }
-
- hr = defaultDevicePtr->OpenPropertyStore( STGM_READ, &defaultDevicePropStore );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open default device property store.";
- goto Exit;
- }
- PropVariantInit( &defaultDeviceNameProp );
-
- hr = defaultDevicePropStore->GetValue( PKEY_Device_FriendlyName, &defaultDeviceNameProp );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default device property: PKEY_Device_FriendlyName.";
- goto Exit;
- }
-
- defaultDeviceName = convertCharPointerToStdString(defaultDeviceNameProp.pwszVal);
-
- // name
- hr = devicePtr->OpenPropertyStore( STGM_READ, &devicePropStore );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open device property store.";
- goto Exit;
- }
-
- PropVariantInit( &deviceNameProp );
-
- hr = devicePropStore->GetValue( PKEY_Device_FriendlyName, &deviceNameProp );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device property: PKEY_Device_FriendlyName.";
- goto Exit;
- }
-
- info.name =convertCharPointerToStdString(deviceNameProp.pwszVal);
-
- // is default
- if ( isCaptureDevice ) {
- info.isDefaultInput = info.name == defaultDeviceName;
- info.isDefaultOutput = false;
- }
- else {
- info.isDefaultInput = false;
- info.isDefaultOutput = info.name == defaultDeviceName;
- }
-
- // channel count
- hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL, NULL, ( void** ) &audioClient );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device audio client.";
- goto Exit;
- }
-
- hr = audioClient->GetMixFormat( &deviceFormat );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device mix format.";
- goto Exit;
- }
-
- if ( isCaptureDevice ) {
- info.inputChannels = deviceFormat->nChannels;
- info.outputChannels = 0;
- info.duplexChannels = 0;
- }
- else {
- info.inputChannels = 0;
- info.outputChannels = deviceFormat->nChannels;
- info.duplexChannels = 0;
- }
-
- // sample rates
- info.sampleRates.clear();
-
- // allow support for all sample rates as we have a built-in sample rate converter
- for ( unsigned int i = 0; i < MAX_SAMPLE_RATES; i++ ) {
- info.sampleRates.push_back( SAMPLE_RATES[i] );
- }
- info.preferredSampleRate = deviceFormat->nSamplesPerSec;
-
- // native format
- info.nativeFormats = 0;
-
- if ( deviceFormat->wFormatTag == WAVE_FORMAT_IEEE_FLOAT ||
- ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
- ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ) )
- {
- if ( deviceFormat->wBitsPerSample == 32 ) {
- info.nativeFormats |= RTAUDIO_FLOAT32;
- }
- else if ( deviceFormat->wBitsPerSample == 64 ) {
- info.nativeFormats |= RTAUDIO_FLOAT64;
- }
- }
- else if ( deviceFormat->wFormatTag == WAVE_FORMAT_PCM ||
- ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
- ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_PCM ) )
- {
- if ( deviceFormat->wBitsPerSample == 8 ) {
- info.nativeFormats |= RTAUDIO_SINT8;
- }
- else if ( deviceFormat->wBitsPerSample == 16 ) {
- info.nativeFormats |= RTAUDIO_SINT16;
- }
- else if ( deviceFormat->wBitsPerSample == 24 ) {
- info.nativeFormats |= RTAUDIO_SINT24;
- }
- else if ( deviceFormat->wBitsPerSample == 32 ) {
- info.nativeFormats |= RTAUDIO_SINT32;
- }
- }
-
- // probed
- info.probed = true;
-
-Exit:
- // release all references
- PropVariantClear( &deviceNameProp );
- PropVariantClear( &defaultDeviceNameProp );
-
- SAFE_RELEASE( captureDevices );
- SAFE_RELEASE( renderDevices );
- SAFE_RELEASE( devicePtr );
- SAFE_RELEASE( defaultDevicePtr );
- SAFE_RELEASE( audioClient );
- SAFE_RELEASE( devicePropStore );
- SAFE_RELEASE( defaultDevicePropStore );
-
- CoTaskMemFree( deviceFormat );
- CoTaskMemFree( closestMatchFormat );
-
- if ( !errorText_.empty() )
- error( errorType );
- return info;
-}
-
-//-----------------------------------------------------------------------------
-
-unsigned int RtApiWasapi::getDefaultOutputDevice( void )
-{
- for ( unsigned int i = 0; i < getDeviceCount(); i++ ) {
- if ( getDeviceInfo( i ).isDefaultOutput ) {
- return i;
- }
- }
-
- return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-unsigned int RtApiWasapi::getDefaultInputDevice( void )
-{
- for ( unsigned int i = 0; i < getDeviceCount(); i++ ) {
- if ( getDeviceInfo( i ).isDefaultInput ) {
- return i;
- }
- }
-
- return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::closeStream( void )
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiWasapi::closeStream: No open stream to close.";
- error( RtAudioError::WARNING );
- return;
- }
-
- if ( stream_.state != STREAM_STOPPED )
- stopStream();
-
- // clean up stream memory
- SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient )
- SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient )
-
- SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureClient )
- SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderClient )
-
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent )
- CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent );
-
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent )
- CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent );
-
- delete ( WasapiHandle* ) stream_.apiHandle;
- stream_.apiHandle = NULL;
-
- for ( int i = 0; i < 2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- // update stream state
- stream_.state = STREAM_CLOSED;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::startStream( void )
-{
- verifyStream();
-
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiWasapi::startStream: The stream is already running.";
- error( RtAudioError::WARNING );
- return;
- }
-
- // update stream state
- stream_.state = STREAM_RUNNING;
-
- // create WASAPI stream thread
- stream_.callbackInfo.thread = ( ThreadHandle ) CreateThread( NULL, 0, runWasapiThread, this, CREATE_SUSPENDED, NULL );
-
- if ( !stream_.callbackInfo.thread ) {
- errorText_ = "RtApiWasapi::startStream: Unable to instantiate callback thread.";
- error( RtAudioError::THREAD_ERROR );
- }
- else {
- SetThreadPriority( ( void* ) stream_.callbackInfo.thread, stream_.callbackInfo.priority );
- ResumeThread( ( void* ) stream_.callbackInfo.thread );
- }
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::stopStream( void )
-{
- verifyStream();
-
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiWasapi::stopStream: The stream is already stopped.";
- error( RtAudioError::WARNING );
- return;
- }
-
- // inform stream thread by setting stream state to STREAM_STOPPING
- stream_.state = STREAM_STOPPING;
-
- // wait until stream thread is stopped
- while( stream_.state != STREAM_STOPPED ) {
- Sleep( 1 );
- }
-
- // Wait for the last buffer to play before stopping.
- Sleep( 1000 * stream_.bufferSize / stream_.sampleRate );
-
- // stop capture client if applicable
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) {
- HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::stopStream: Unable to stop capture stream.";
- error( RtAudioError::DRIVER_ERROR );
- return;
- }
- }
-
- // stop render client if applicable
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) {
- HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::stopStream: Unable to stop render stream.";
- error( RtAudioError::DRIVER_ERROR );
- return;
- }
- }
-
- // close thread handle
- if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) {
- errorText_ = "RtApiWasapi::stopStream: Unable to close callback thread.";
- error( RtAudioError::THREAD_ERROR );
- return;
- }
-
- stream_.callbackInfo.thread = (ThreadHandle) NULL;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::abortStream( void )
-{
- verifyStream();
-
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiWasapi::abortStream: The stream is already stopped.";
- error( RtAudioError::WARNING );
- return;
- }
-
- // inform stream thread by setting stream state to STREAM_STOPPING
- stream_.state = STREAM_STOPPING;
-
- // wait until stream thread is stopped
- while ( stream_.state != STREAM_STOPPED ) {
- Sleep( 1 );
- }
-
- // stop capture client if applicable
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) {
- HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::abortStream: Unable to stop capture stream.";
- error( RtAudioError::DRIVER_ERROR );
- return;
- }
- }
-
- // stop render client if applicable
- if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) {
- HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::abortStream: Unable to stop render stream.";
- error( RtAudioError::DRIVER_ERROR );
- return;
- }
- }
-
- // close thread handle
- if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) {
- errorText_ = "RtApiWasapi::abortStream: Unable to close callback thread.";
- error( RtAudioError::THREAD_ERROR );
- return;
- }
-
- stream_.callbackInfo.thread = (ThreadHandle) NULL;
-}
-
-//-----------------------------------------------------------------------------
-
-bool RtApiWasapi::probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int* bufferSize,
- RtAudio::StreamOptions* options )
-{
- bool methodResult = FAILURE;
- unsigned int captureDeviceCount = 0;
- unsigned int renderDeviceCount = 0;
-
- IMMDeviceCollection* captureDevices = NULL;
- IMMDeviceCollection* renderDevices = NULL;
- IMMDevice* devicePtr = NULL;
- WAVEFORMATEX* deviceFormat = NULL;
- unsigned int bufferBytes;
- stream_.state = STREAM_STOPPED;
-
- // create API Handle if not already created
- if ( !stream_.apiHandle )
- stream_.apiHandle = ( void* ) new WasapiHandle();
-
- // Count capture devices
- errorText_.clear();
- RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
- HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device collection.";
- goto Exit;
- }
-
- hr = captureDevices->GetCount( &captureDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device count.";
- goto Exit;
- }
-
- // Count render devices
- hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device collection.";
- goto Exit;
- }
-
- hr = renderDevices->GetCount( &renderDeviceCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device count.";
- goto Exit;
- }
-
- // validate device index
- if ( device >= captureDeviceCount + renderDeviceCount ) {
- errorType = RtAudioError::INVALID_USE;
- errorText_ = "RtApiWasapi::probeDeviceOpen: Invalid device index.";
- goto Exit;
- }
-
- // determine whether index falls within capture or render devices
- if ( device >= renderDeviceCount ) {
- if ( mode != INPUT ) {
- errorType = RtAudioError::INVALID_USE;
- errorText_ = "RtApiWasapi::probeDeviceOpen: Capture device selected as output device.";
- goto Exit;
- }
-
- // retrieve captureAudioClient from devicePtr
- IAudioClient*& captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient;
-
- hr = captureDevices->Item( device - renderDeviceCount, &devicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device handle.";
- goto Exit;
- }
-
- hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL,
- NULL, ( void** ) &captureAudioClient );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client.";
- goto Exit;
- }
-
- hr = captureAudioClient->GetMixFormat( &deviceFormat );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format.";
- goto Exit;
- }
-
- stream_.nDeviceChannels[mode] = deviceFormat->nChannels;
- captureAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] );
- }
- else {
- if ( mode != OUTPUT ) {
- errorType = RtAudioError::INVALID_USE;
- errorText_ = "RtApiWasapi::probeDeviceOpen: Render device selected as input device.";
- goto Exit;
- }
-
- // retrieve renderAudioClient from devicePtr
- IAudioClient*& renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient;
-
- hr = renderDevices->Item( device, &devicePtr );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device handle.";
- goto Exit;
- }
-
- hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL,
- NULL, ( void** ) &renderAudioClient );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client.";
- goto Exit;
- }
-
- hr = renderAudioClient->GetMixFormat( &deviceFormat );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format.";
- goto Exit;
- }
-
- stream_.nDeviceChannels[mode] = deviceFormat->nChannels;
- renderAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] );
- }
-
- // fill stream data
- if ( ( stream_.mode == OUTPUT && mode == INPUT ) ||
- ( stream_.mode == INPUT && mode == OUTPUT ) ) {
- stream_.mode = DUPLEX;
- }
- else {
- stream_.mode = mode;
- }
-
- stream_.device[mode] = device;
- stream_.doByteSwap[mode] = false;
- stream_.sampleRate = sampleRate;
- stream_.bufferSize = *bufferSize;
- stream_.nBuffers = 1;
- stream_.nUserChannels[mode] = channels;
- stream_.channelOffset[mode] = firstChannel;
- stream_.userFormat = format;
- stream_.deviceFormat[mode] = getDeviceInfo( device ).nativeFormats;
-
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED )
- stream_.userInterleaved = false;
- else
- stream_.userInterleaved = true;
- stream_.deviceInterleaved[mode] = true;
-
- // Set flags for buffer conversion.
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] ||
- stream_.nUserChannels != stream_.nDeviceChannels )
- stream_.doConvertBuffer[mode] = true;
- else if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- if ( stream_.doConvertBuffer[mode] )
- setConvertInfo( mode, 0 );
-
- // Allocate necessary internal buffers
- bufferBytes = stream_.nUserChannels[mode] * stream_.bufferSize * formatBytes( stream_.userFormat );
-
- stream_.userBuffer[mode] = ( char* ) calloc( bufferBytes, 1 );
- if ( !stream_.userBuffer[mode] ) {
- errorType = RtAudioError::MEMORY_ERROR;
- errorText_ = "RtApiWasapi::probeDeviceOpen: Error allocating user buffer memory.";
- goto Exit;
- }
-
- if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME )
- stream_.callbackInfo.priority = 15;
- else
- stream_.callbackInfo.priority = 0;
-
- ///! TODO: RTAUDIO_MINIMIZE_LATENCY // Provide stream buffers directly to callback
- ///! TODO: RTAUDIO_HOG_DEVICE // Exclusive mode
-
- methodResult = SUCCESS;
-
-Exit:
- //clean up
- SAFE_RELEASE( captureDevices );
- SAFE_RELEASE( renderDevices );
- SAFE_RELEASE( devicePtr );
- CoTaskMemFree( deviceFormat );
-
- // if method failed, close the stream
- if ( methodResult == FAILURE )
- closeStream();
-
- if ( !errorText_.empty() )
- error( errorType );
- return methodResult;
-}
-
-//=============================================================================
-
-DWORD WINAPI RtApiWasapi::runWasapiThread( void* wasapiPtr )
-{
- if ( wasapiPtr )
- ( ( RtApiWasapi* ) wasapiPtr )->wasapiThread();
-
- return 0;
-}
-
-DWORD WINAPI RtApiWasapi::stopWasapiThread( void* wasapiPtr )
-{
- if ( wasapiPtr )
- ( ( RtApiWasapi* ) wasapiPtr )->stopStream();
-
- return 0;
-}
-
-DWORD WINAPI RtApiWasapi::abortWasapiThread( void* wasapiPtr )
-{
- if ( wasapiPtr )
- ( ( RtApiWasapi* ) wasapiPtr )->abortStream();
-
- return 0;
-}
-
-//-----------------------------------------------------------------------------
-
-void RtApiWasapi::wasapiThread()
-{
- // as this is a new thread, we must CoInitialize it
- CoInitialize( NULL );
-
- HRESULT hr;
-
- IAudioClient* captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient;
- IAudioClient* renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient;
- IAudioCaptureClient* captureClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureClient;
- IAudioRenderClient* renderClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderClient;
- HANDLE captureEvent = ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent;
- HANDLE renderEvent = ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent;
-
- WAVEFORMATEX* captureFormat = NULL;
- WAVEFORMATEX* renderFormat = NULL;
- float captureSrRatio = 0.0f;
- float renderSrRatio = 0.0f;
- WasapiBuffer captureBuffer;
- WasapiBuffer renderBuffer;
-
- // declare local stream variables
- RtAudioCallback callback = ( RtAudioCallback ) stream_.callbackInfo.callback;
- BYTE* streamBuffer = NULL;
- unsigned long captureFlags = 0;
- unsigned int bufferFrameCount = 0;
- unsigned int numFramesPadding = 0;
- unsigned int convBufferSize = 0;
- bool callbackPushed = false;
- bool callbackPulled = false;
- bool callbackStopped = false;
- int callbackResult = 0;
-
- // convBuffer is used to store converted buffers between WASAPI and the user
- char* convBuffer = NULL;
- unsigned int convBuffSize = 0;
- unsigned int deviceBuffSize = 0;
-
- errorText_.clear();
- RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR;
-
- // Attempt to assign "Pro Audio" characteristic to thread
- HMODULE AvrtDll = LoadLibrary( (LPCTSTR) "AVRT.dll" );
- if ( AvrtDll ) {
- DWORD taskIndex = 0;
- TAvSetMmThreadCharacteristicsPtr AvSetMmThreadCharacteristicsPtr = ( TAvSetMmThreadCharacteristicsPtr ) GetProcAddress( AvrtDll, "AvSetMmThreadCharacteristicsW" );
- AvSetMmThreadCharacteristicsPtr( L"Pro Audio", &taskIndex );
- FreeLibrary( AvrtDll );
- }
-
- // start capture stream if applicable
- if ( captureAudioClient ) {
- hr = captureAudioClient->GetMixFormat( &captureFormat );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format.";
- goto Exit;
- }
-
- captureSrRatio = ( ( float ) captureFormat->nSamplesPerSec / stream_.sampleRate );
-
- // initialize capture stream according to desire buffer size
- float desiredBufferSize = stream_.bufferSize * captureSrRatio;
- REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / captureFormat->nSamplesPerSec );
-
- if ( !captureClient ) {
- hr = captureAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED,
- AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
- desiredBufferPeriod,
- desiredBufferPeriod,
- captureFormat,
- NULL );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize capture audio client.";
- goto Exit;
- }
-
- hr = captureAudioClient->GetService( __uuidof( IAudioCaptureClient ),
- ( void** ) &captureClient );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture client handle.";
- goto Exit;
- }
-
- // configure captureEvent to trigger on every available capture buffer
- captureEvent = CreateEvent( NULL, FALSE, FALSE, NULL );
- if ( !captureEvent ) {
- errorType = RtAudioError::SYSTEM_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to create capture event.";
- goto Exit;
- }
-
- hr = captureAudioClient->SetEventHandle( captureEvent );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to set capture event handle.";
- goto Exit;
- }
-
- ( ( WasapiHandle* ) stream_.apiHandle )->captureClient = captureClient;
- ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent = captureEvent;
- }
-
- unsigned int inBufferSize = 0;
- hr = captureAudioClient->GetBufferSize( &inBufferSize );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to get capture buffer size.";
- goto Exit;
- }
-
- // scale outBufferSize according to stream->user sample rate ratio
- unsigned int outBufferSize = ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT];
- inBufferSize *= stream_.nDeviceChannels[INPUT];
-
- // set captureBuffer size
- captureBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[INPUT] ) );
-
- // reset the capture stream
- hr = captureAudioClient->Reset();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to reset capture stream.";
- goto Exit;
- }
-
- // start the capture stream
- hr = captureAudioClient->Start();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to start capture stream.";
- goto Exit;
- }
- }
-
- // start render stream if applicable
- if ( renderAudioClient ) {
- hr = renderAudioClient->GetMixFormat( &renderFormat );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format.";
- goto Exit;
- }
-
- renderSrRatio = ( ( float ) renderFormat->nSamplesPerSec / stream_.sampleRate );
-
- // initialize render stream according to desire buffer size
- float desiredBufferSize = stream_.bufferSize * renderSrRatio;
- REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / renderFormat->nSamplesPerSec );
-
- if ( !renderClient ) {
- hr = renderAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED,
- AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
- desiredBufferPeriod,
- desiredBufferPeriod,
- renderFormat,
- NULL );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize render audio client.";
- goto Exit;
- }
-
- hr = renderAudioClient->GetService( __uuidof( IAudioRenderClient ),
- ( void** ) &renderClient );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render client handle.";
- goto Exit;
- }
-
- // configure renderEvent to trigger on every available render buffer
- renderEvent = CreateEvent( NULL, FALSE, FALSE, NULL );
- if ( !renderEvent ) {
- errorType = RtAudioError::SYSTEM_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to create render event.";
- goto Exit;
- }
-
- hr = renderAudioClient->SetEventHandle( renderEvent );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to set render event handle.";
- goto Exit;
- }
-
- ( ( WasapiHandle* ) stream_.apiHandle )->renderClient = renderClient;
- ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent = renderEvent;
- }
-
- unsigned int outBufferSize = 0;
- hr = renderAudioClient->GetBufferSize( &outBufferSize );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to get render buffer size.";
- goto Exit;
- }
-
- // scale inBufferSize according to user->stream sample rate ratio
- unsigned int inBufferSize = ( unsigned int ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT];
- outBufferSize *= stream_.nDeviceChannels[OUTPUT];
-
- // set renderBuffer size
- renderBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[OUTPUT] ) );
-
- // reset the render stream
- hr = renderAudioClient->Reset();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to reset render stream.";
- goto Exit;
- }
-
- // start the render stream
- hr = renderAudioClient->Start();
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to start render stream.";
- goto Exit;
- }
- }
-
- if ( stream_.mode == INPUT ) {
- convBuffSize = ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] );
- deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] );
- }
- else if ( stream_.mode == OUTPUT ) {
- convBuffSize = ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] );
- deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] );
- }
- else if ( stream_.mode == DUPLEX ) {
- convBuffSize = std::max( ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ),
- ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) );
- deviceBuffSize = std::max( stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ),
- stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) );
- }
-
- convBuffer = ( char* ) malloc( convBuffSize );
- stream_.deviceBuffer = ( char* ) malloc( deviceBuffSize );
- if ( !convBuffer || !stream_.deviceBuffer ) {
- errorType = RtAudioError::MEMORY_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Error allocating device buffer memory.";
- goto Exit;
- }
-
- // stream process loop
- while ( stream_.state != STREAM_STOPPING ) {
- if ( !callbackPulled ) {
- // Callback Input
- // ==============
- // 1. Pull callback buffer from inputBuffer
- // 2. If 1. was successful: Convert callback buffer to user sample rate and channel count
- // Convert callback buffer to user format
-
- if ( captureAudioClient ) {
- // Pull callback buffer from inputBuffer
- callbackPulled = captureBuffer.pullBuffer( convBuffer,
- ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT],
- stream_.deviceFormat[INPUT] );
-
- if ( callbackPulled ) {
- // Convert callback buffer to user sample rate
- convertBufferWasapi( stream_.deviceBuffer,
- convBuffer,
- stream_.nDeviceChannels[INPUT],
- captureFormat->nSamplesPerSec,
- stream_.sampleRate,
- ( unsigned int ) ( stream_.bufferSize * captureSrRatio ),
- convBufferSize,
- stream_.deviceFormat[INPUT] );
-
- if ( stream_.doConvertBuffer[INPUT] ) {
- // Convert callback buffer to user format
- convertBuffer( stream_.userBuffer[INPUT],
- stream_.deviceBuffer,
- stream_.convertInfo[INPUT] );
- }
- else {
- // no further conversion, simple copy deviceBuffer to userBuffer
- memcpy( stream_.userBuffer[INPUT],
- stream_.deviceBuffer,
- stream_.bufferSize * stream_.nUserChannels[INPUT] * formatBytes( stream_.userFormat ) );
- }
- }
- }
- else {
- // if there is no capture stream, set callbackPulled flag
- callbackPulled = true;
- }
-
- // Execute Callback
- // ================
- // 1. Execute user callback method
- // 2. Handle return value from callback
-
- // if callback has not requested the stream to stop
- if ( callbackPulled && !callbackStopped ) {
- // Execute user callback method
- callbackResult = callback( stream_.userBuffer[OUTPUT],
- stream_.userBuffer[INPUT],
- stream_.bufferSize,
- getStreamTime(),
- captureFlags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY ? RTAUDIO_INPUT_OVERFLOW : 0,
- stream_.callbackInfo.userData );
-
- // Handle return value from callback
- if ( callbackResult == 1 ) {
- // instantiate a thread to stop this thread
- HANDLE threadHandle = CreateThread( NULL, 0, stopWasapiThread, this, 0, NULL );
- if ( !threadHandle ) {
- errorType = RtAudioError::THREAD_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream stop thread.";
- goto Exit;
- }
- else if ( !CloseHandle( threadHandle ) ) {
- errorType = RtAudioError::THREAD_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream stop thread handle.";
- goto Exit;
- }
-
- callbackStopped = true;
- }
- else if ( callbackResult == 2 ) {
- // instantiate a thread to stop this thread
- HANDLE threadHandle = CreateThread( NULL, 0, abortWasapiThread, this, 0, NULL );
- if ( !threadHandle ) {
- errorType = RtAudioError::THREAD_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream abort thread.";
- goto Exit;
- }
- else if ( !CloseHandle( threadHandle ) ) {
- errorType = RtAudioError::THREAD_ERROR;
- errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream abort thread handle.";
- goto Exit;
- }
-
- callbackStopped = true;
- }
- }
- }
-
- // Callback Output
- // ===============
- // 1. Convert callback buffer to stream format
- // 2. Convert callback buffer to stream sample rate and channel count
- // 3. Push callback buffer into outputBuffer
-
- if ( renderAudioClient && callbackPulled ) {
- if ( stream_.doConvertBuffer[OUTPUT] ) {
- // Convert callback buffer to stream format
- convertBuffer( stream_.deviceBuffer,
- stream_.userBuffer[OUTPUT],
- stream_.convertInfo[OUTPUT] );
-
- }
-
- // Convert callback buffer to stream sample rate
- convertBufferWasapi( convBuffer,
- stream_.deviceBuffer,
- stream_.nDeviceChannels[OUTPUT],
- stream_.sampleRate,
- renderFormat->nSamplesPerSec,
- stream_.bufferSize,
- convBufferSize,
- stream_.deviceFormat[OUTPUT] );
-
- // Push callback buffer into outputBuffer
- callbackPushed = renderBuffer.pushBuffer( convBuffer,
- convBufferSize * stream_.nDeviceChannels[OUTPUT],
- stream_.deviceFormat[OUTPUT] );
- }
- else {
- // if there is no render stream, set callbackPushed flag
- callbackPushed = true;
- }
-
- // Stream Capture
- // ==============
- // 1. Get capture buffer from stream
- // 2. Push capture buffer into inputBuffer
- // 3. If 2. was successful: Release capture buffer
-
- if ( captureAudioClient ) {
- // if the callback input buffer was not pulled from captureBuffer, wait for next capture event
- if ( !callbackPulled ) {
- WaitForSingleObject( captureEvent, INFINITE );
- }
-
- // Get capture buffer from stream
- hr = captureClient->GetBuffer( &streamBuffer,
- &bufferFrameCount,
- &captureFlags, NULL, NULL );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture buffer.";
- goto Exit;
- }
-
- if ( bufferFrameCount != 0 ) {
- // Push capture buffer into inputBuffer
- if ( captureBuffer.pushBuffer( ( char* ) streamBuffer,
- bufferFrameCount * stream_.nDeviceChannels[INPUT],
- stream_.deviceFormat[INPUT] ) )
- {
- // Release capture buffer
- hr = captureClient->ReleaseBuffer( bufferFrameCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
- goto Exit;
- }
- }
- else
- {
- // Inform WASAPI that capture was unsuccessful
- hr = captureClient->ReleaseBuffer( 0 );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
- goto Exit;
- }
- }
- }
- else
- {
- // Inform WASAPI that capture was unsuccessful
- hr = captureClient->ReleaseBuffer( 0 );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer.";
- goto Exit;
- }
- }
- }
-
- // Stream Render
- // =============
- // 1. Get render buffer from stream
- // 2. Pull next buffer from outputBuffer
- // 3. If 2. was successful: Fill render buffer with next buffer
- // Release render buffer
-
- if ( renderAudioClient ) {
- // if the callback output buffer was not pushed to renderBuffer, wait for next render event
- if ( callbackPulled && !callbackPushed ) {
- WaitForSingleObject( renderEvent, INFINITE );
- }
-
- // Get render buffer from stream
- hr = renderAudioClient->GetBufferSize( &bufferFrameCount );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer size.";
- goto Exit;
- }
-
- hr = renderAudioClient->GetCurrentPadding( &numFramesPadding );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer padding.";
- goto Exit;
- }
-
- bufferFrameCount -= numFramesPadding;
-
- if ( bufferFrameCount != 0 ) {
- hr = renderClient->GetBuffer( bufferFrameCount, &streamBuffer );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer.";
- goto Exit;
- }
-
- // Pull next buffer from outputBuffer
- // Fill render buffer with next buffer
- if ( renderBuffer.pullBuffer( ( char* ) streamBuffer,
- bufferFrameCount * stream_.nDeviceChannels[OUTPUT],
- stream_.deviceFormat[OUTPUT] ) )
- {
- // Release render buffer
- hr = renderClient->ReleaseBuffer( bufferFrameCount, 0 );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
- goto Exit;
- }
- }
- else
- {
- // Inform WASAPI that render was unsuccessful
- hr = renderClient->ReleaseBuffer( 0, 0 );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
- goto Exit;
- }
- }
- }
- else
- {
- // Inform WASAPI that render was unsuccessful
- hr = renderClient->ReleaseBuffer( 0, 0 );
- if ( FAILED( hr ) ) {
- errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer.";
- goto Exit;
- }
- }
- }
-
- // if the callback buffer was pushed renderBuffer reset callbackPulled flag
- if ( callbackPushed ) {
- callbackPulled = false;
- }
-
- // tick stream time
- RtApi::tickStreamTime();
- }
-
-Exit:
- // clean up
- CoTaskMemFree( captureFormat );
- CoTaskMemFree( renderFormat );
-
- free ( convBuffer );
-
- CoUninitialize();
-
- // update stream state
- stream_.state = STREAM_STOPPED;
-
- if ( errorText_.empty() )
- return;
- else
- error( errorType );
-}
-
-//******************** End of __WINDOWS_WASAPI__ *********************//
-#endif
-
-
-#if defined(__WINDOWS_DS__) // Windows DirectSound API
-
-// Modified by Robin Davies, October 2005
-// - Improvements to DirectX pointer chasing.
-// - Bug fix for non-power-of-two Asio granularity used by Edirol PCR-A30.
-// - Auto-call CoInitialize for DSOUND and ASIO platforms.
-// Various revisions for RtAudio 4.0 by Gary Scavone, April 2007
-// Changed device query structure for RtAudio 4.0.7, January 2010
-
-#include <dsound.h>
-#include <assert.h>
-#include <algorithm>
-
-#if defined(__MINGW32__)
- // missing from latest mingw winapi
-#define WAVE_FORMAT_96M08 0x00010000 /* 96 kHz, Mono, 8-bit */
-#define WAVE_FORMAT_96S08 0x00020000 /* 96 kHz, Stereo, 8-bit */
-#define WAVE_FORMAT_96M16 0x00040000 /* 96 kHz, Mono, 16-bit */
-#define WAVE_FORMAT_96S16 0x00080000 /* 96 kHz, Stereo, 16-bit */
-#endif
-
-#define MINIMUM_DEVICE_BUFFER_SIZE 32768
-
-#ifdef _MSC_VER // if Microsoft Visual C++
-#pragma comment( lib, "winmm.lib" ) // then, auto-link winmm.lib. Otherwise, it has to be added manually.
-#endif
-
-static inline DWORD dsPointerBetween( DWORD pointer, DWORD laterPointer, DWORD earlierPointer, DWORD bufferSize )
-{
- if ( pointer > bufferSize ) pointer -= bufferSize;
- if ( laterPointer < earlierPointer ) laterPointer += bufferSize;
- if ( pointer < earlierPointer ) pointer += bufferSize;
- return pointer >= earlierPointer && pointer < laterPointer;
-}
-
-// A structure to hold various information related to the DirectSound
-// API implementation.
-struct DsHandle {
- unsigned int drainCounter; // Tracks callback counts when draining
- bool internalDrain; // Indicates if stop is initiated from callback or not.
- void *id[2];
- void *buffer[2];
- bool xrun[2];
- UINT bufferPointer[2];
- DWORD dsBufferSize[2];
- DWORD dsPointerLeadTime[2]; // the number of bytes ahead of the safe pointer to lead by.
- HANDLE condition;
-
- DsHandle()
- :drainCounter(0), internalDrain(false) { id[0] = 0; id[1] = 0; buffer[0] = 0; buffer[1] = 0; xrun[0] = false; xrun[1] = false; bufferPointer[0] = 0; bufferPointer[1] = 0; }
-};
-
-// Declarations for utility functions, callbacks, and structures
-// specific to the DirectSound implementation.
-static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid,
- LPCTSTR description,
- LPCTSTR module,
- LPVOID lpContext );
-
-static const char* getErrorString( int code );
-
-static unsigned __stdcall callbackHandler( void *ptr );
-
-struct DsDevice {
- LPGUID id[2];
- bool validId[2];
- bool found;
- std::string name;
-
- DsDevice()
- : found(false) { validId[0] = false; validId[1] = false; }
-};
-
-struct DsProbeData {
- bool isInput;
- std::vector<struct DsDevice>* dsDevices;
-};
-
-RtApiDs :: RtApiDs()
-{
- // Dsound will run both-threaded. If CoInitialize fails, then just
- // accept whatever the mainline chose for a threading model.
- coInitialized_ = false;
- HRESULT hr = CoInitialize( NULL );
- if ( !FAILED( hr ) ) coInitialized_ = true;
-}
-
-RtApiDs :: ~RtApiDs()
-{
- if ( coInitialized_ ) CoUninitialize(); // balanced call.
- if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-// The DirectSound default output is always the first device.
-unsigned int RtApiDs :: getDefaultOutputDevice( void )
-{
- return 0;
-}
-
-// The DirectSound default input is always the first input device,
-// which is the first capture device enumerated.
-unsigned int RtApiDs :: getDefaultInputDevice( void )
-{
- return 0;
-}
-
-unsigned int RtApiDs :: getDeviceCount( void )
-{
- // Set query flag for previously found devices to false, so that we
- // can check for any devices that have disappeared.
- for ( unsigned int i=0; i<dsDevices.size(); i++ )
- dsDevices[i].found = false;
-
- // Query DirectSound devices.
- struct DsProbeData probeInfo;
- probeInfo.isInput = false;
- probeInfo.dsDevices = &dsDevices;
- HRESULT result = DirectSoundEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating output devices!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
-
- // Query DirectSoundCapture devices.
- probeInfo.isInput = true;
- result = DirectSoundCaptureEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating input devices!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
-
- // Clean out any devices that may have disappeared (code update submitted by Eli Zehngut).
- for ( unsigned int i=0; i<dsDevices.size(); ) {
- if ( dsDevices[i].found == false ) dsDevices.erase( dsDevices.begin() + i );
- else i++;
- }
-
- return static_cast<unsigned int>(dsDevices.size());
-}
-
-RtAudio::DeviceInfo RtApiDs :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- if ( dsDevices.size() == 0 ) {
- // Force a query of all devices
- getDeviceCount();
- if ( dsDevices.size() == 0 ) {
- errorText_ = "RtApiDs::getDeviceInfo: no devices found!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
- }
-
- if ( device >= dsDevices.size() ) {
- errorText_ = "RtApiDs::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- HRESULT result;
- if ( dsDevices[ device ].validId[0] == false ) goto probeInput;
-
- LPDIRECTSOUND output;
- DSCAPS outCaps;
- result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto probeInput;
- }
-
- outCaps.dwSize = sizeof( outCaps );
- result = output->GetCaps( &outCaps );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting capabilities!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto probeInput;
- }
-
- // Get output channel information.
- info.outputChannels = ( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ? 2 : 1;
-
- // Get sample rate information.
- info.sampleRates.clear();
- for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
- if ( SAMPLE_RATES[k] >= (unsigned int) outCaps.dwMinSecondarySampleRate &&
- SAMPLE_RATES[k] <= (unsigned int) outCaps.dwMaxSecondarySampleRate ) {
- info.sampleRates.push_back( SAMPLE_RATES[k] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[k];
- }
- }
-
- // Get format information.
- if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) info.nativeFormats |= RTAUDIO_SINT8;
-
- output->Release();
-
- if ( getDefaultOutputDevice() == device )
- info.isDefaultOutput = true;
-
- if ( dsDevices[ device ].validId[1] == false ) {
- info.name = dsDevices[ device ].name;
- info.probed = true;
- return info;
- }
-
- probeInput:
-
- LPDIRECTSOUNDCAPTURE input;
- result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- DSCCAPS inCaps;
- inCaps.dwSize = sizeof( inCaps );
- result = input->GetCaps( &inCaps );
- if ( FAILED( result ) ) {
- input->Release();
- errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting object capabilities (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Get input channel information.
- info.inputChannels = inCaps.dwChannels;
-
- // Get sample rate and format information.
- std::vector<unsigned int> rates;
- if ( inCaps.dwChannels >= 2 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) info.nativeFormats |= RTAUDIO_SINT8;
-
- if ( info.nativeFormats & RTAUDIO_SINT16 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) rates.push_back( 11025 );
- if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) rates.push_back( 22050 );
- if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) rates.push_back( 44100 );
- if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) rates.push_back( 96000 );
- }
- else if ( info.nativeFormats & RTAUDIO_SINT8 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) rates.push_back( 11025 );
- if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) rates.push_back( 22050 );
- if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) rates.push_back( 44100 );
- if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) rates.push_back( 96000 );
- }
- }
- else if ( inCaps.dwChannels == 1 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) info.nativeFormats |= RTAUDIO_SINT16;
- if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) info.nativeFormats |= RTAUDIO_SINT8;
- if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) info.nativeFormats |= RTAUDIO_SINT8;
-
- if ( info.nativeFormats & RTAUDIO_SINT16 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) rates.push_back( 11025 );
- if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) rates.push_back( 22050 );
- if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) rates.push_back( 44100 );
- if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) rates.push_back( 96000 );
- }
- else if ( info.nativeFormats & RTAUDIO_SINT8 ) {
- if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) rates.push_back( 11025 );
- if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) rates.push_back( 22050 );
- if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) rates.push_back( 44100 );
- if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) rates.push_back( 96000 );
- }
- }
- else info.inputChannels = 0; // technically, this would be an error
-
- input->Release();
-
- if ( info.inputChannels == 0 ) return info;
-
- // Copy the supported rates to the info structure but avoid duplication.
- bool found;
- for ( unsigned int i=0; i<rates.size(); i++ ) {
- found = false;
- for ( unsigned int j=0; j<info.sampleRates.size(); j++ ) {
- if ( rates[i] == info.sampleRates[j] ) {
- found = true;
- break;
- }
- }
- if ( found == false ) info.sampleRates.push_back( rates[i] );
- }
- std::sort( info.sampleRates.begin(), info.sampleRates.end() );
-
- // If device opens for both playback and capture, we determine the channels.
- if ( info.outputChannels > 0 && info.inputChannels > 0 )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
- if ( device == 0 ) info.isDefaultInput = true;
-
- // Copy name and return.
- info.name = dsDevices[ device ].name;
- info.probed = true;
- return info;
-}
-
-bool RtApiDs :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-{
- if ( channels + firstChannel > 2 ) {
- errorText_ = "RtApiDs::probeDeviceOpen: DirectSound does not support more than 2 channels per device.";
- return FAILURE;
- }
-
- size_t nDevices = dsDevices.size();
- if ( nDevices == 0 ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiDs::probeDeviceOpen: no devices found!";
- return FAILURE;
- }
-
- if ( device >= nDevices ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiDs::probeDeviceOpen: device ID is invalid!";
- return FAILURE;
- }
-
- if ( mode == OUTPUT ) {
- if ( dsDevices[ device ].validId[0] == false ) {
- errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support output!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
- else { // mode == INPUT
- if ( dsDevices[ device ].validId[1] == false ) {
- errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support input!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // According to a note in PortAudio, using GetDesktopWindow()
- // instead of GetForegroundWindow() is supposed to avoid problems
- // that occur when the application's window is not the foreground
- // window. Also, if the application window closes before the
- // DirectSound buffer, DirectSound can crash. In the past, I had
- // problems when using GetDesktopWindow() but it seems fine now
- // (January 2010). I'll leave it commented here.
- // HWND hWnd = GetForegroundWindow();
- HWND hWnd = GetDesktopWindow();
-
- // Check the numberOfBuffers parameter and limit the lowest value to
- // two. This is a judgement call and a value of two is probably too
- // low for capture, but it should work for playback.
- int nBuffers = 0;
- if ( options ) nBuffers = options->numberOfBuffers;
- if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) nBuffers = 2;
- if ( nBuffers < 2 ) nBuffers = 3;
-
- // Check the lower range of the user-specified buffer size and set
- // (arbitrarily) to a lower bound of 32.
- if ( *bufferSize < 32 ) *bufferSize = 32;
-
- // Create the wave format structure. The data format setting will
- // be determined later.
- WAVEFORMATEX waveFormat;
- ZeroMemory( &waveFormat, sizeof(WAVEFORMATEX) );
- waveFormat.wFormatTag = WAVE_FORMAT_PCM;
- waveFormat.nChannels = channels + firstChannel;
- waveFormat.nSamplesPerSec = (unsigned long) sampleRate;
-
- // Determine the device buffer size. By default, we'll use the value
- // defined above (32K), but we will grow it to make allowances for
- // very large software buffer sizes.
- DWORD dsBufferSize = MINIMUM_DEVICE_BUFFER_SIZE;
- DWORD dsPointerLeadTime = 0;
-
- void *ohandle = 0, *bhandle = 0;
- HRESULT result;
- if ( mode == OUTPUT ) {
-
- LPDIRECTSOUND output;
- result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- DSCAPS outCaps;
- outCaps.dwSize = sizeof( outCaps );
- result = output->GetCaps( &outCaps );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting capabilities (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Check channel information.
- if ( channels + firstChannel == 2 && !( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ) {
- errorStream_ << "RtApiDs::getDeviceInfo: the output device (" << dsDevices[ device ].name << ") does not support stereo playback.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Check format information. Use 16-bit format unless not
- // supported or user requests 8-bit.
- if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT &&
- !( format == RTAUDIO_SINT8 && outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) ) {
- waveFormat.wBitsPerSample = 16;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- }
- else {
- waveFormat.wBitsPerSample = 8;
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- }
- stream_.userFormat = format;
-
- // Update wave format structure and buffer information.
- waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
- waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
- dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels;
-
- // If the user wants an even bigger buffer, increase the device buffer size accordingly.
- while ( dsPointerLeadTime * 2U > dsBufferSize )
- dsBufferSize *= 2;
-
- // Set cooperative level to DSSCL_EXCLUSIVE ... sound stops when window focus changes.
- // result = output->SetCooperativeLevel( hWnd, DSSCL_EXCLUSIVE );
- // Set cooperative level to DSSCL_PRIORITY ... sound remains when window focus changes.
- result = output->SetCooperativeLevel( hWnd, DSSCL_PRIORITY );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting cooperative level (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Even though we will write to the secondary buffer, we need to
- // access the primary buffer to set the correct output format
- // (since the default is 8-bit, 22 kHz!). Setup the DS primary
- // buffer description.
- DSBUFFERDESC bufferDescription;
- ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) );
- bufferDescription.dwSize = sizeof( DSBUFFERDESC );
- bufferDescription.dwFlags = DSBCAPS_PRIMARYBUFFER;
-
- // Obtain the primary buffer
- LPDIRECTSOUNDBUFFER buffer;
- result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") accessing primary buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Set the primary DS buffer sound format.
- result = buffer->SetFormat( &waveFormat );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting primary buffer format (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Setup the secondary DS buffer description.
- ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) );
- bufferDescription.dwSize = sizeof( DSBUFFERDESC );
- bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS |
- DSBCAPS_GLOBALFOCUS |
- DSBCAPS_GETCURRENTPOSITION2 |
- DSBCAPS_LOCHARDWARE ); // Force hardware mixing
- bufferDescription.dwBufferBytes = dsBufferSize;
- bufferDescription.lpwfxFormat = &waveFormat;
-
- // Try to create the secondary DS buffer. If that doesn't work,
- // try to use software mixing. Otherwise, there's a problem.
- result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
- if ( FAILED( result ) ) {
- bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS |
- DSBCAPS_GLOBALFOCUS |
- DSBCAPS_GETCURRENTPOSITION2 |
- DSBCAPS_LOCSOFTWARE ); // Force software mixing
- result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL );
- if ( FAILED( result ) ) {
- output->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating secondary buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // Get the buffer size ... might be different from what we specified.
- DSBCAPS dsbcaps;
- dsbcaps.dwSize = sizeof( DSBCAPS );
- result = buffer->GetCaps( &dsbcaps );
- if ( FAILED( result ) ) {
- output->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- dsBufferSize = dsbcaps.dwBufferBytes;
-
- // Lock the DS buffer
- LPVOID audioPtr;
- DWORD dataLen;
- result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 );
- if ( FAILED( result ) ) {
- output->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Zero the DS buffer
- ZeroMemory( audioPtr, dataLen );
-
- // Unlock the DS buffer
- result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
- if ( FAILED( result ) ) {
- output->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- ohandle = (void *) output;
- bhandle = (void *) buffer;
- }
-
- if ( mode == INPUT ) {
-
- LPDIRECTSOUNDCAPTURE input;
- result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- DSCCAPS inCaps;
- inCaps.dwSize = sizeof( inCaps );
- result = input->GetCaps( &inCaps );
- if ( FAILED( result ) ) {
- input->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting input capabilities (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Check channel information.
- if ( inCaps.dwChannels < channels + firstChannel ) {
- errorText_ = "RtApiDs::getDeviceInfo: the input device does not support requested input channels.";
- return FAILURE;
- }
-
- // Check format information. Use 16-bit format unless user
- // requests 8-bit.
- DWORD deviceFormats;
- if ( channels + firstChannel == 2 ) {
- deviceFormats = WAVE_FORMAT_1S08 | WAVE_FORMAT_2S08 | WAVE_FORMAT_4S08 | WAVE_FORMAT_96S08;
- if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) {
- waveFormat.wBitsPerSample = 8;
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- }
- else { // assume 16-bit is supported
- waveFormat.wBitsPerSample = 16;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- }
- }
- else { // channel == 1
- deviceFormats = WAVE_FORMAT_1M08 | WAVE_FORMAT_2M08 | WAVE_FORMAT_4M08 | WAVE_FORMAT_96M08;
- if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) {
- waveFormat.wBitsPerSample = 8;
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- }
- else { // assume 16-bit is supported
- waveFormat.wBitsPerSample = 16;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- }
- }
- stream_.userFormat = format;
-
- // Update wave format structure and buffer information.
- waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8;
- waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign;
- dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels;
-
- // If the user wants an even bigger buffer, increase the device buffer size accordingly.
- while ( dsPointerLeadTime * 2U > dsBufferSize )
- dsBufferSize *= 2;
-
- // Setup the secondary DS buffer description.
- DSCBUFFERDESC bufferDescription;
- ZeroMemory( &bufferDescription, sizeof( DSCBUFFERDESC ) );
- bufferDescription.dwSize = sizeof( DSCBUFFERDESC );
- bufferDescription.dwFlags = 0;
- bufferDescription.dwReserved = 0;
- bufferDescription.dwBufferBytes = dsBufferSize;
- bufferDescription.lpwfxFormat = &waveFormat;
-
- // Create the capture buffer.
- LPDIRECTSOUNDCAPTUREBUFFER buffer;
- result = input->CreateCaptureBuffer( &bufferDescription, &buffer, NULL );
- if ( FAILED( result ) ) {
- input->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating input buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Get the buffer size ... might be different from what we specified.
- DSCBCAPS dscbcaps;
- dscbcaps.dwSize = sizeof( DSCBCAPS );
- result = buffer->GetCaps( &dscbcaps );
- if ( FAILED( result ) ) {
- input->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- dsBufferSize = dscbcaps.dwBufferBytes;
-
- // NOTE: We could have a problem here if this is a duplex stream
- // and the play and capture hardware buffer sizes are different
- // (I'm actually not sure if that is a problem or not).
- // Currently, we are not verifying that.
-
- // Lock the capture buffer
- LPVOID audioPtr;
- DWORD dataLen;
- result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 );
- if ( FAILED( result ) ) {
- input->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking input buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Zero the buffer
- ZeroMemory( audioPtr, dataLen );
-
- // Unlock the buffer
- result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
- if ( FAILED( result ) ) {
- input->Release();
- buffer->Release();
- errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking input buffer (" << dsDevices[ device ].name << ")!";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- ohandle = (void *) input;
- bhandle = (void *) buffer;
- }
-
- // Set various stream parameters
- DsHandle *handle = 0;
- stream_.nDeviceChannels[mode] = channels + firstChannel;
- stream_.nUserChannels[mode] = channels;
- stream_.bufferSize = *bufferSize;
- stream_.channelOffset[mode] = firstChannel;
- stream_.deviceInterleaved[mode] = true;
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
- else stream_.userInterleaved = true;
-
- // Set flag for buffer conversion
- stream_.doConvertBuffer[mode] = false;
- if (stream_.nUserChannels[mode] != stream_.nDeviceChannels[mode])
- stream_.doConvertBuffer[mode] = true;
- if (stream_.userFormat != stream_.deviceFormat[mode])
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate necessary internal buffers
- long bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiDs::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( mode == INPUT ) {
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= (long) bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiDs::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- // Allocate our DsHandle structures for the stream.
- if ( stream_.apiHandle == 0 ) {
- try {
- handle = new DsHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiDs::probeDeviceOpen: error allocating AsioHandle memory.";
- goto error;
- }
-
- // Create a manual-reset event.
- handle->condition = CreateEvent( NULL, // no security
- TRUE, // manual-reset
- FALSE, // non-signaled initially
- NULL ); // unnamed
- stream_.apiHandle = (void *) handle;
- }
- else
- handle = (DsHandle *) stream_.apiHandle;
- handle->id[mode] = ohandle;
- handle->buffer[mode] = bhandle;
- handle->dsBufferSize[mode] = dsBufferSize;
- handle->dsPointerLeadTime[mode] = dsPointerLeadTime;
-
- stream_.device[mode] = device;
- stream_.state = STREAM_STOPPED;
- if ( stream_.mode == OUTPUT && mode == INPUT )
- // We had already set up an output stream.
- stream_.mode = DUPLEX;
- else
- stream_.mode = mode;
- stream_.nBuffers = nBuffers;
- stream_.sampleRate = sampleRate;
-
- // Setup the buffer conversion information structure.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
- // Setup the callback thread.
- if ( stream_.callbackInfo.isRunning == false ) {
- unsigned threadId;
- stream_.callbackInfo.isRunning = true;
- stream_.callbackInfo.object = (void *) this;
- stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &callbackHandler,
- &stream_.callbackInfo, 0, &threadId );
- if ( stream_.callbackInfo.thread == 0 ) {
- errorText_ = "RtApiDs::probeDeviceOpen: error creating callback thread!";
- goto error;
- }
-
- // Boost DS thread priority
- SetThreadPriority( (HANDLE) stream_.callbackInfo.thread, THREAD_PRIORITY_HIGHEST );
- }
- return SUCCESS;
-
- error:
- if ( handle ) {
- if ( handle->buffer[0] ) { // the object pointer can be NULL and valid
- LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0];
- LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- if ( buffer ) buffer->Release();
- object->Release();
- }
- if ( handle->buffer[1] ) {
- LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1];
- LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
- if ( buffer ) buffer->Release();
- object->Release();
- }
- CloseHandle( handle->condition );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.state = STREAM_CLOSED;
- return FAILURE;
-}
-
-void RtApiDs :: closeStream()
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiDs::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- // Stop the callback thread.
- stream_.callbackInfo.isRunning = false;
- WaitForSingleObject( (HANDLE) stream_.callbackInfo.thread, INFINITE );
- CloseHandle( (HANDLE) stream_.callbackInfo.thread );
-
- DsHandle *handle = (DsHandle *) stream_.apiHandle;
- if ( handle ) {
- if ( handle->buffer[0] ) { // the object pointer can be NULL and valid
- LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0];
- LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- if ( buffer ) {
- buffer->Stop();
- buffer->Release();
- }
- object->Release();
- }
- if ( handle->buffer[1] ) {
- LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1];
- LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
- if ( buffer ) {
- buffer->Stop();
- buffer->Release();
- }
- object->Release();
- }
- CloseHandle( handle->condition );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-void RtApiDs :: startStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiDs::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- DsHandle *handle = (DsHandle *) stream_.apiHandle;
-
- // Increase scheduler frequency on lesser windows (a side-effect of
- // increasing timer accuracy). On greater windows (Win2K or later),
- // this is already in effect.
- timeBeginPeriod( 1 );
-
- buffersRolling = false;
- duplexPrerollBytes = 0;
-
- if ( stream_.mode == DUPLEX ) {
- // 0.5 seconds of silence in DUPLEX mode while the devices spin up and synchronize.
- duplexPrerollBytes = (int) ( 0.5 * stream_.sampleRate * formatBytes( stream_.deviceFormat[1] ) * stream_.nDeviceChannels[1] );
- }
-
- HRESULT result = 0;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- result = buffer->Play( 0, 0, DSBPLAY_LOOPING );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting output buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
- result = buffer->Start( DSCBSTART_LOOPING );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting input buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- handle->drainCounter = 0;
- handle->internalDrain = false;
- ResetEvent( handle->condition );
- stream_.state = STREAM_RUNNING;
-
- unlock:
- if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiDs :: stopStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiDs::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- HRESULT result = 0;
- LPVOID audioPtr;
- DWORD dataLen;
- DsHandle *handle = (DsHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- if ( handle->drainCounter == 0 ) {
- handle->drainCounter = 2;
- WaitForSingleObject( handle->condition, INFINITE ); // block until signaled
- }
-
- stream_.state = STREAM_STOPPED;
-
- MUTEX_LOCK( &stream_.mutex );
-
- // Stop the buffer and clear memory
- LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- result = buffer->Stop();
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping output buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // Lock the buffer and clear it so that if we start to play again,
- // we won't have old data playing.
- result = buffer->Lock( 0, handle->dsBufferSize[0], &audioPtr, &dataLen, NULL, NULL, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking output buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // Zero the DS buffer
- ZeroMemory( audioPtr, dataLen );
-
- // Unlock the DS buffer
- result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking output buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // If we start playing again, we must begin at beginning of buffer.
- handle->bufferPointer[0] = 0;
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
- LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
- audioPtr = NULL;
- dataLen = 0;
-
- stream_.state = STREAM_STOPPED;
-
- if ( stream_.mode != DUPLEX )
- MUTEX_LOCK( &stream_.mutex );
-
- result = buffer->Stop();
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping input buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // Lock the buffer and clear it so that if we start to play again,
- // we won't have old data playing.
- result = buffer->Lock( 0, handle->dsBufferSize[1], &audioPtr, &dataLen, NULL, NULL, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking input buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // Zero the DS buffer
- ZeroMemory( audioPtr, dataLen );
-
- // Unlock the DS buffer
- result = buffer->Unlock( audioPtr, dataLen, NULL, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking input buffer!";
- errorText_ = errorStream_.str();
- goto unlock;
- }
-
- // If we start recording again, we must begin at beginning of buffer.
- handle->bufferPointer[1] = 0;
- }
-
- unlock:
- timeEndPeriod( 1 ); // revert to normal scheduler frequency on lesser windows.
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiDs :: abortStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiDs::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- DsHandle *handle = (DsHandle *) stream_.apiHandle;
- handle->drainCounter = 2;
-
- stopStream();
-}
-
-void RtApiDs :: callbackEvent()
-{
- if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) {
- Sleep( 50 ); // sleep 50 milliseconds
- return;
- }
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiDs::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return;
- }
-
- CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo;
- DsHandle *handle = (DsHandle *) stream_.apiHandle;
-
- // Check if we were draining the stream and signal is finished.
- if ( handle->drainCounter > stream_.nBuffers + 2 ) {
-
- stream_.state = STREAM_STOPPING;
- if ( handle->internalDrain == false )
- SetEvent( handle->condition );
- else
- stopStream();
- return;
- }
-
- // Invoke user callback to get fresh output data UNLESS we are
- // draining stream.
- if ( handle->drainCounter == 0 ) {
- RtAudioCallback callback = (RtAudioCallback) info->callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- handle->xrun[0] = false;
- }
- if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- handle->xrun[1] = false;
- }
- int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, info->userData );
- if ( cbReturnValue == 2 ) {
- stream_.state = STREAM_STOPPING;
- handle->drainCounter = 2;
- abortStream();
- return;
- }
- else if ( cbReturnValue == 1 ) {
- handle->drainCounter = 1;
- handle->internalDrain = true;
- }
- }
-
- HRESULT result;
- DWORD currentWritePointer, safeWritePointer;
- DWORD currentReadPointer, safeReadPointer;
- UINT nextWritePointer;
-
- LPVOID buffer1 = NULL;
- LPVOID buffer2 = NULL;
- DWORD bufferSize1 = 0;
- DWORD bufferSize2 = 0;
-
- char *buffer;
- long bufferBytes;
-
- MUTEX_LOCK( &stream_.mutex );
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
-
- if ( buffersRolling == false ) {
- if ( stream_.mode == DUPLEX ) {
- //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] );
-
- // It takes a while for the devices to get rolling. As a result,
- // there's no guarantee that the capture and write device pointers
- // will move in lockstep. Wait here for both devices to start
- // rolling, and then set our buffer pointers accordingly.
- // e.g. Crystal Drivers: the capture buffer starts up 5700 to 9600
- // bytes later than the write buffer.
-
- // Stub: a serious risk of having a pre-emptive scheduling round
- // take place between the two GetCurrentPosition calls... but I'm
- // really not sure how to solve the problem. Temporarily boost to
- // Realtime priority, maybe; but I'm not sure what priority the
- // DirectSound service threads run at. We *should* be roughly
- // within a ms or so of correct.
-
- LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- LPDIRECTSOUNDCAPTUREBUFFER dsCaptureBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
-
- DWORD startSafeWritePointer, startSafeReadPointer;
-
- result = dsWriteBuffer->GetCurrentPosition( NULL, &startSafeWritePointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- result = dsCaptureBuffer->GetCurrentPosition( NULL, &startSafeReadPointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- while ( true ) {
- result = dsWriteBuffer->GetCurrentPosition( NULL, &safeWritePointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- result = dsCaptureBuffer->GetCurrentPosition( NULL, &safeReadPointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- if ( safeWritePointer != startSafeWritePointer && safeReadPointer != startSafeReadPointer ) break;
- Sleep( 1 );
- }
-
- //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] );
-
- handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0];
- if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0];
- handle->bufferPointer[1] = safeReadPointer;
- }
- else if ( stream_.mode == OUTPUT ) {
-
- // Set the proper nextWritePosition after initial startup.
- LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
- result = dsWriteBuffer->GetCurrentPosition( ¤tWritePointer, &safeWritePointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0];
- if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0];
- }
-
- buffersRolling = true;
- }
-
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- LPDIRECTSOUNDBUFFER dsBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0];
-
- if ( handle->drainCounter > 1 ) { // write zeros to the output stream
- bufferBytes = stream_.bufferSize * stream_.nUserChannels[0];
- bufferBytes *= formatBytes( stream_.userFormat );
- memset( stream_.userBuffer[0], 0, bufferBytes );
- }
-
- // Setup parameters and do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[0] ) {
- buffer = stream_.deviceBuffer;
- convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
- bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[0];
- bufferBytes *= formatBytes( stream_.deviceFormat[0] );
- }
- else {
- buffer = stream_.userBuffer[0];
- bufferBytes = stream_.bufferSize * stream_.nUserChannels[0];
- bufferBytes *= formatBytes( stream_.userFormat );
- }
-
- // No byte swapping necessary in DirectSound implementation.
-
- // Ahhh ... windoze. 16-bit data is signed but 8-bit data is
- // unsigned. So, we need to convert our signed 8-bit data here to
- // unsigned.
- if ( stream_.deviceFormat[0] == RTAUDIO_SINT8 )
- for ( int i=0; i<bufferBytes; i++ ) buffer[i] = (unsigned char) ( buffer[i] + 128 );
-
- DWORD dsBufferSize = handle->dsBufferSize[0];
- nextWritePointer = handle->bufferPointer[0];
-
- DWORD endWrite, leadPointer;
- while ( true ) {
- // Find out where the read and "safe write" pointers are.
- result = dsBuffer->GetCurrentPosition( ¤tWritePointer, &safeWritePointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!";
- errorText_ = errorStream_.str();
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
-
- // We will copy our output buffer into the region between
- // safeWritePointer and leadPointer. If leadPointer is not
- // beyond the next endWrite position, wait until it is.
- leadPointer = safeWritePointer + handle->dsPointerLeadTime[0];
- //std::cout << "safeWritePointer = " << safeWritePointer << ", leadPointer = " << leadPointer << ", nextWritePointer = " << nextWritePointer << std::endl;
- if ( leadPointer > dsBufferSize ) leadPointer -= dsBufferSize;
- if ( leadPointer < nextWritePointer ) leadPointer += dsBufferSize; // unwrap offset
- endWrite = nextWritePointer + bufferBytes;
-
- // Check whether the entire write region is behind the play pointer.
- if ( leadPointer >= endWrite ) break;
-
- // If we are here, then we must wait until the leadPointer advances
- // beyond the end of our next write region. We use the
- // Sleep() function to suspend operation until that happens.
- double millis = ( endWrite - leadPointer ) * 1000.0;
- millis /= ( formatBytes( stream_.deviceFormat[0]) * stream_.nDeviceChannels[0] * stream_.sampleRate);
- if ( millis < 1.0 ) millis = 1.0;
- Sleep( (DWORD) millis );
- }
-
- if ( dsPointerBetween( nextWritePointer, safeWritePointer, currentWritePointer, dsBufferSize )
- || dsPointerBetween( endWrite, safeWritePointer, currentWritePointer, dsBufferSize ) ) {
- // We've strayed into the forbidden zone ... resync the read pointer.
- handle->xrun[0] = true;
- nextWritePointer = safeWritePointer + handle->dsPointerLeadTime[0] - bufferBytes;
- if ( nextWritePointer >= dsBufferSize ) nextWritePointer -= dsBufferSize;
- handle->bufferPointer[0] = nextWritePointer;
- endWrite = nextWritePointer + bufferBytes;
- }
-
- // Lock free space in the buffer
- result = dsBuffer->Lock( nextWritePointer, bufferBytes, &buffer1,
- &bufferSize1, &buffer2, &bufferSize2, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking buffer during playback!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
-
- // Copy our buffer into the DS buffer
- CopyMemory( buffer1, buffer, bufferSize1 );
- if ( buffer2 != NULL ) CopyMemory( buffer2, buffer+bufferSize1, bufferSize2 );
-
- // Update our buffer offset and unlock sound buffer
- dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking buffer during playback!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- nextWritePointer = ( nextWritePointer + bufferSize1 + bufferSize2 ) % dsBufferSize;
- handle->bufferPointer[0] = nextWritePointer;
- }
-
- // Don't bother draining input
- if ( handle->drainCounter ) {
- handle->drainCounter++;
- goto unlock;
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- // Setup parameters.
- if ( stream_.doConvertBuffer[1] ) {
- buffer = stream_.deviceBuffer;
- bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[1];
- bufferBytes *= formatBytes( stream_.deviceFormat[1] );
- }
- else {
- buffer = stream_.userBuffer[1];
- bufferBytes = stream_.bufferSize * stream_.nUserChannels[1];
- bufferBytes *= formatBytes( stream_.userFormat );
- }
-
- LPDIRECTSOUNDCAPTUREBUFFER dsBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1];
- long nextReadPointer = handle->bufferPointer[1];
- DWORD dsBufferSize = handle->dsBufferSize[1];
-
- // Find out where the write and "safe read" pointers are.
- result = dsBuffer->GetCurrentPosition( ¤tReadPointer, &safeReadPointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
-
- if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset
- DWORD endRead = nextReadPointer + bufferBytes;
-
- // Handling depends on whether we are INPUT or DUPLEX.
- // If we're in INPUT mode then waiting is a good thing. If we're in DUPLEX mode,
- // then a wait here will drag the write pointers into the forbidden zone.
- //
- // In DUPLEX mode, rather than wait, we will back off the read pointer until
- // it's in a safe position. This causes dropouts, but it seems to be the only
- // practical way to sync up the read and write pointers reliably, given the
- // the very complex relationship between phase and increment of the read and write
- // pointers.
- //
- // In order to minimize audible dropouts in DUPLEX mode, we will
- // provide a pre-roll period of 0.5 seconds in which we return
- // zeros from the read buffer while the pointers sync up.
-
- if ( stream_.mode == DUPLEX ) {
- if ( safeReadPointer < endRead ) {
- if ( duplexPrerollBytes <= 0 ) {
- // Pre-roll time over. Be more agressive.
- int adjustment = endRead-safeReadPointer;
-
- handle->xrun[1] = true;
- // Two cases:
- // - large adjustments: we've probably run out of CPU cycles, so just resync exactly,
- // and perform fine adjustments later.
- // - small adjustments: back off by twice as much.
- if ( adjustment >= 2*bufferBytes )
- nextReadPointer = safeReadPointer-2*bufferBytes;
- else
- nextReadPointer = safeReadPointer-bufferBytes-adjustment;
-
- if ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize;
-
- }
- else {
- // In pre=roll time. Just do it.
- nextReadPointer = safeReadPointer - bufferBytes;
- while ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize;
- }
- endRead = nextReadPointer + bufferBytes;
- }
- }
- else { // mode == INPUT
- while ( safeReadPointer < endRead && stream_.callbackInfo.isRunning ) {
- // See comments for playback.
- double millis = (endRead - safeReadPointer) * 1000.0;
- millis /= ( formatBytes(stream_.deviceFormat[1]) * stream_.nDeviceChannels[1] * stream_.sampleRate);
- if ( millis < 1.0 ) millis = 1.0;
- Sleep( (DWORD) millis );
-
- // Wake up and find out where we are now.
- result = dsBuffer->GetCurrentPosition( ¤tReadPointer, &safeReadPointer );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
-
- if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset
- }
- }
-
- // Lock free space in the buffer
- result = dsBuffer->Lock( nextReadPointer, bufferBytes, &buffer1,
- &bufferSize1, &buffer2, &bufferSize2, 0 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking capture buffer!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
-
- if ( duplexPrerollBytes <= 0 ) {
- // Copy our buffer into the DS buffer
- CopyMemory( buffer, buffer1, bufferSize1 );
- if ( buffer2 != NULL ) CopyMemory( buffer+bufferSize1, buffer2, bufferSize2 );
- }
- else {
- memset( buffer, 0, bufferSize1 );
- if ( buffer2 != NULL ) memset( buffer + bufferSize1, 0, bufferSize2 );
- duplexPrerollBytes -= bufferSize1 + bufferSize2;
- }
-
- // Update our buffer offset and unlock sound buffer
- nextReadPointer = ( nextReadPointer + bufferSize1 + bufferSize2 ) % dsBufferSize;
- dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 );
- if ( FAILED( result ) ) {
- errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking capture buffer!";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- handle->bufferPointer[1] = nextReadPointer;
-
- // No byte swapping necessary in DirectSound implementation.
-
- // If necessary, convert 8-bit data from unsigned to signed.
- if ( stream_.deviceFormat[1] == RTAUDIO_SINT8 )
- for ( int j=0; j<bufferBytes; j++ ) buffer[j] = (signed char) ( buffer[j] - 128 );
-
- // Do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[1] )
- convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
- }
-
- unlock:
- MUTEX_UNLOCK( &stream_.mutex );
- RtApi::tickStreamTime();
-}
-
-// Definitions for utility functions and callbacks
-// specific to the DirectSound implementation.
-
-static unsigned __stdcall callbackHandler( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiDs *object = (RtApiDs *) info->object;
- bool* isRunning = &info->isRunning;
-
- while ( *isRunning == true ) {
- object->callbackEvent();
- }
-
- _endthreadex( 0 );
- return 0;
-}
-
-static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid,
- LPCTSTR description,
- LPCTSTR /*module*/,
- LPVOID lpContext )
-{
- struct DsProbeData& probeInfo = *(struct DsProbeData*) lpContext;
- std::vector<struct DsDevice>& dsDevices = *probeInfo.dsDevices;
-
- HRESULT hr;
- bool validDevice = false;
- if ( probeInfo.isInput == true ) {
- DSCCAPS caps;
- LPDIRECTSOUNDCAPTURE object;
-
- hr = DirectSoundCaptureCreate( lpguid, &object, NULL );
- if ( hr != DS_OK ) return TRUE;
-
- caps.dwSize = sizeof(caps);
- hr = object->GetCaps( &caps );
- if ( hr == DS_OK ) {
- if ( caps.dwChannels > 0 && caps.dwFormats > 0 )
- validDevice = true;
- }
- object->Release();
- }
- else {
- DSCAPS caps;
- LPDIRECTSOUND object;
- hr = DirectSoundCreate( lpguid, &object, NULL );
- if ( hr != DS_OK ) return TRUE;
-
- caps.dwSize = sizeof(caps);
- hr = object->GetCaps( &caps );
- if ( hr == DS_OK ) {
- if ( caps.dwFlags & DSCAPS_PRIMARYMONO || caps.dwFlags & DSCAPS_PRIMARYSTEREO )
- validDevice = true;
- }
- object->Release();
- }
-
- // If good device, then save its name and guid.
- std::string name = convertCharPointerToStdString( description );
- //if ( name == "Primary Sound Driver" || name == "Primary Sound Capture Driver" )
- if ( lpguid == NULL )
- name = "Default Device";
- if ( validDevice ) {
- for ( unsigned int i=0; i<dsDevices.size(); i++ ) {
- if ( dsDevices[i].name == name ) {
- dsDevices[i].found = true;
- if ( probeInfo.isInput ) {
- dsDevices[i].id[1] = lpguid;
- dsDevices[i].validId[1] = true;
- }
- else {
- dsDevices[i].id[0] = lpguid;
- dsDevices[i].validId[0] = true;
- }
- return TRUE;
- }
- }
-
- DsDevice device;
- device.name = name;
- device.found = true;
- if ( probeInfo.isInput ) {
- device.id[1] = lpguid;
- device.validId[1] = true;
- }
- else {
- device.id[0] = lpguid;
- device.validId[0] = true;
- }
- dsDevices.push_back( device );
- }
-
- return TRUE;
-}
-
-static const char* getErrorString( int code )
-{
- switch ( code ) {
-
- case DSERR_ALLOCATED:
- return "Already allocated";
-
- case DSERR_CONTROLUNAVAIL:
- return "Control unavailable";
-
- case DSERR_INVALIDPARAM:
- return "Invalid parameter";
-
- case DSERR_INVALIDCALL:
- return "Invalid call";
-
- case DSERR_GENERIC:
- return "Generic error";
-
- case DSERR_PRIOLEVELNEEDED:
- return "Priority level needed";
-
- case DSERR_OUTOFMEMORY:
- return "Out of memory";
-
- case DSERR_BADFORMAT:
- return "The sample rate or the channel format is not supported";
-
- case DSERR_UNSUPPORTED:
- return "Not supported";
-
- case DSERR_NODRIVER:
- return "No driver";
-
- case DSERR_ALREADYINITIALIZED:
- return "Already initialized";
-
- case DSERR_NOAGGREGATION:
- return "No aggregation";
-
- case DSERR_BUFFERLOST:
- return "Buffer lost";
-
- case DSERR_OTHERAPPHASPRIO:
- return "Another application already has priority";
-
- case DSERR_UNINITIALIZED:
- return "Uninitialized";
-
- default:
- return "DirectSound unknown error";
- }
-}
-//******************** End of __WINDOWS_DS__ *********************//
-#endif
-
-
-#if defined(__LINUX_ALSA__)
-
-#include <alsa/asoundlib.h>
-#include <unistd.h>
-
- // A structure to hold various information related to the ALSA API
- // implementation.
-struct AlsaHandle {
- snd_pcm_t *handles[2];
- bool synchronized;
- bool xrun[2];
- pthread_cond_t runnable_cv;
- bool runnable;
-
- AlsaHandle()
- :synchronized(false), runnable(false) { xrun[0] = false; xrun[1] = false; }
-};
-
-static void *alsaCallbackHandler( void * ptr );
-
-RtApiAlsa :: RtApiAlsa()
-{
- // Nothing to do here.
-}
-
-RtApiAlsa :: ~RtApiAlsa()
-{
- if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiAlsa :: getDeviceCount( void )
-{
- unsigned nDevices = 0;
- int result, subdevice, card;
- char name[64];
- snd_ctl_t *handle;
-
- // Count cards and devices
- card = -1;
- snd_card_next( &card );
- while ( card >= 0 ) {
- sprintf( name, "hw:%d", card );
- result = snd_ctl_open( &handle, name, 0 );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceCount: control open, card = " << card << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto nextcard;
- }
- subdevice = -1;
- while( 1 ) {
- result = snd_ctl_pcm_next_device( handle, &subdevice );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceCount: control next device, card = " << card << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- break;
- }
- if ( subdevice < 0 )
- break;
- nDevices++;
- }
- nextcard:
- snd_ctl_close( handle );
- snd_card_next( &card );
- }
-
- result = snd_ctl_open( &handle, "default", 0 );
- if (result == 0) {
- nDevices++;
- snd_ctl_close( handle );
- }
-
- return nDevices;
-}
-
-RtAudio::DeviceInfo RtApiAlsa :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- unsigned nDevices = 0;
- int result, subdevice, card;
- char name[64];
- snd_ctl_t *chandle;
-
- // Count cards and devices
- card = -1;
- subdevice = -1;
- snd_card_next( &card );
- while ( card >= 0 ) {
- sprintf( name, "hw:%d", card );
- result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceInfo: control open, card = " << card << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto nextcard;
- }
- subdevice = -1;
- while( 1 ) {
- result = snd_ctl_pcm_next_device( chandle, &subdevice );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceInfo: control next device, card = " << card << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- break;
- }
- if ( subdevice < 0 ) break;
- if ( nDevices == device ) {
- sprintf( name, "hw:%d,%d", card, subdevice );
- goto foundDevice;
- }
- nDevices++;
- }
- nextcard:
- snd_ctl_close( chandle );
- snd_card_next( &card );
- }
-
- result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK );
- if ( result == 0 ) {
- if ( nDevices == device ) {
- strcpy( name, "default" );
- goto foundDevice;
- }
- nDevices++;
- }
-
- if ( nDevices == 0 ) {
- errorText_ = "RtApiAlsa::getDeviceInfo: no devices found!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- if ( device >= nDevices ) {
- errorText_ = "RtApiAlsa::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- foundDevice:
-
- // If a stream is already open, we cannot probe the stream devices.
- // Thus, use the saved results.
- if ( stream_.state != STREAM_CLOSED &&
- ( stream_.device[0] == device || stream_.device[1] == device ) ) {
- snd_ctl_close( chandle );
- if ( device >= devices_.size() ) {
- errorText_ = "RtApiAlsa::getDeviceInfo: device ID was not present before stream was opened.";
- error( RtAudioError::WARNING );
- return info;
- }
- return devices_[ device ];
- }
-
- int openMode = SND_PCM_ASYNC;
- snd_pcm_stream_t stream;
- snd_pcm_info_t *pcminfo;
- snd_pcm_info_alloca( &pcminfo );
- snd_pcm_t *phandle;
- snd_pcm_hw_params_t *params;
- snd_pcm_hw_params_alloca( ¶ms );
-
- // First try for playback unless default device (which has subdev -1)
- stream = SND_PCM_STREAM_PLAYBACK;
- snd_pcm_info_set_stream( pcminfo, stream );
- if ( subdevice != -1 ) {
- snd_pcm_info_set_device( pcminfo, subdevice );
- snd_pcm_info_set_subdevice( pcminfo, 0 );
-
- result = snd_ctl_pcm_info( chandle, pcminfo );
- if ( result < 0 ) {
- // Device probably doesn't support playback.
- goto captureProbe;
- }
- }
-
- result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto captureProbe;
- }
-
- // The device is open ... fill the parameter structure.
- result = snd_pcm_hw_params_any( phandle, params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto captureProbe;
- }
-
- // Get output channel information.
- unsigned int value;
- result = snd_pcm_hw_params_get_channels_max( params, &value );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") output channels, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- goto captureProbe;
- }
- info.outputChannels = value;
- snd_pcm_close( phandle );
-
- captureProbe:
- stream = SND_PCM_STREAM_CAPTURE;
- snd_pcm_info_set_stream( pcminfo, stream );
-
- // Now try for capture unless default device (with subdev = -1)
- if ( subdevice != -1 ) {
- result = snd_ctl_pcm_info( chandle, pcminfo );
- snd_ctl_close( chandle );
- if ( result < 0 ) {
- // Device probably doesn't support capture.
- if ( info.outputChannels == 0 ) return info;
- goto probeParameters;
- }
- }
- else
- snd_ctl_close( chandle );
-
- result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK);
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- if ( info.outputChannels == 0 ) return info;
- goto probeParameters;
- }
-
- // The device is open ... fill the parameter structure.
- result = snd_pcm_hw_params_any( phandle, params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- if ( info.outputChannels == 0 ) return info;
- goto probeParameters;
- }
-
- result = snd_pcm_hw_params_get_channels_max( params, &value );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") input channels, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- if ( info.outputChannels == 0 ) return info;
- goto probeParameters;
- }
- info.inputChannels = value;
- snd_pcm_close( phandle );
-
- // If device opens for both playback and capture, we determine the channels.
- if ( info.outputChannels > 0 && info.inputChannels > 0 )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
-
- // ALSA doesn't provide default devices so we'll use the first available one.
- if ( device == 0 && info.outputChannels > 0 )
- info.isDefaultOutput = true;
- if ( device == 0 && info.inputChannels > 0 )
- info.isDefaultInput = true;
-
- probeParameters:
- // At this point, we just need to figure out the supported data
- // formats and sample rates. We'll proceed by opening the device in
- // the direction with the maximum number of channels, or playback if
- // they are equal. This might limit our sample rate options, but so
- // be it.
-
- if ( info.outputChannels >= info.inputChannels )
- stream = SND_PCM_STREAM_PLAYBACK;
- else
- stream = SND_PCM_STREAM_CAPTURE;
- snd_pcm_info_set_stream( pcminfo, stream );
-
- result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK);
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // The device is open ... fill the parameter structure.
- result = snd_pcm_hw_params_any( phandle, params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Test our discrete set of sample rate values.
- info.sampleRates.clear();
- for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) {
- if ( snd_pcm_hw_params_test_rate( phandle, params, SAMPLE_RATES[i], 0 ) == 0 ) {
- info.sampleRates.push_back( SAMPLE_RATES[i] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[i];
- }
- }
- if ( info.sampleRates.size() == 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: no supported sample rates found for device (" << name << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Probe the supported data formats ... we don't care about endian-ness just yet
- snd_pcm_format_t format;
- info.nativeFormats = 0;
- format = SND_PCM_FORMAT_S8;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_SINT8;
- format = SND_PCM_FORMAT_S16;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_SINT16;
- format = SND_PCM_FORMAT_S24;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_SINT24;
- format = SND_PCM_FORMAT_S32;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_SINT32;
- format = SND_PCM_FORMAT_FLOAT;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_FLOAT32;
- format = SND_PCM_FORMAT_FLOAT64;
- if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 )
- info.nativeFormats |= RTAUDIO_FLOAT64;
-
- // Check that we have at least one supported format
- if ( info.nativeFormats == 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::getDeviceInfo: pcm device (" << name << ") data format not supported by RtAudio.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Get the device name
- char *cardname;
- result = snd_card_get_name( card, &cardname );
- if ( result >= 0 ) {
- sprintf( name, "hw:%s,%d", cardname, subdevice );
- free( cardname );
- }
- info.name = name;
-
- // That's all ... close the device and return
- snd_pcm_close( phandle );
- info.probed = true;
- return info;
-}
-
-void RtApiAlsa :: saveDeviceInfo( void )
-{
- devices_.clear();
-
- unsigned int nDevices = getDeviceCount();
- devices_.resize( nDevices );
- for ( unsigned int i=0; i<nDevices; i++ )
- devices_[i] = getDeviceInfo( i );
-}
-
-bool RtApiAlsa :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-
-{
-#if defined(__RTAUDIO_DEBUG__)
- snd_output_t *out;
- snd_output_stdio_attach(&out, stderr, 0);
-#endif
-
- // I'm not using the "plug" interface ... too much inconsistent behavior.
-
- unsigned nDevices = 0;
- int result, subdevice, card;
- char name[64];
- snd_ctl_t *chandle;
-
- if ( options && options->flags & RTAUDIO_ALSA_USE_DEFAULT )
- snprintf(name, sizeof(name), "%s", "default");
- else {
- // Count cards and devices
- card = -1;
- snd_card_next( &card );
- while ( card >= 0 ) {
- sprintf( name, "hw:%d", card );
- result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::probeDeviceOpen: control open, card = " << card << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- subdevice = -1;
- while( 1 ) {
- result = snd_ctl_pcm_next_device( chandle, &subdevice );
- if ( result < 0 ) break;
- if ( subdevice < 0 ) break;
- if ( nDevices == device ) {
- sprintf( name, "hw:%d,%d", card, subdevice );
- snd_ctl_close( chandle );
- goto foundDevice;
- }
- nDevices++;
- }
- snd_ctl_close( chandle );
- snd_card_next( &card );
- }
-
- result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK );
- if ( result == 0 ) {
- if ( nDevices == device ) {
- strcpy( name, "default" );
- goto foundDevice;
- }
- nDevices++;
- }
-
- if ( nDevices == 0 ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiAlsa::probeDeviceOpen: no devices found!";
- return FAILURE;
- }
-
- if ( device >= nDevices ) {
- // This should not happen because a check is made before this function is called.
- errorText_ = "RtApiAlsa::probeDeviceOpen: device ID is invalid!";
- return FAILURE;
- }
- }
-
- foundDevice:
-
- // The getDeviceInfo() function will not work for a device that is
- // already open. Thus, we'll probe the system before opening a
- // stream and save the results for use by getDeviceInfo().
- if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) // only do once
- this->saveDeviceInfo();
-
- snd_pcm_stream_t stream;
- if ( mode == OUTPUT )
- stream = SND_PCM_STREAM_PLAYBACK;
- else
- stream = SND_PCM_STREAM_CAPTURE;
-
- snd_pcm_t *phandle;
- int openMode = SND_PCM_ASYNC;
- result = snd_pcm_open( &phandle, name, stream, openMode );
- if ( result < 0 ) {
- if ( mode == OUTPUT )
- errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for output.";
- else
- errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for input.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Fill the parameter structure.
- snd_pcm_hw_params_t *hw_params;
- snd_pcm_hw_params_alloca( &hw_params );
- result = snd_pcm_hw_params_any( phandle, hw_params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") parameters, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
-#if defined(__RTAUDIO_DEBUG__)
- fprintf( stderr, "\nRtApiAlsa: dump hardware params just after device open:\n\n" );
- snd_pcm_hw_params_dump( hw_params, out );
-#endif
-
- // Set access ... check user preference.
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) {
- stream_.userInterleaved = false;
- result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED );
- if ( result < 0 ) {
- result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED );
- stream_.deviceInterleaved[mode] = true;
- }
- else
- stream_.deviceInterleaved[mode] = false;
- }
- else {
- stream_.userInterleaved = true;
- result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED );
- if ( result < 0 ) {
- result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED );
- stream_.deviceInterleaved[mode] = false;
- }
- else
- stream_.deviceInterleaved[mode] = true;
- }
-
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") access, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Determine how to set the device format.
- stream_.userFormat = format;
- snd_pcm_format_t deviceFormat = SND_PCM_FORMAT_UNKNOWN;
-
- if ( format == RTAUDIO_SINT8 )
- deviceFormat = SND_PCM_FORMAT_S8;
- else if ( format == RTAUDIO_SINT16 )
- deviceFormat = SND_PCM_FORMAT_S16;
- else if ( format == RTAUDIO_SINT24 )
- deviceFormat = SND_PCM_FORMAT_S24;
- else if ( format == RTAUDIO_SINT32 )
- deviceFormat = SND_PCM_FORMAT_S32;
- else if ( format == RTAUDIO_FLOAT32 )
- deviceFormat = SND_PCM_FORMAT_FLOAT;
- else if ( format == RTAUDIO_FLOAT64 )
- deviceFormat = SND_PCM_FORMAT_FLOAT64;
-
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat) == 0) {
- stream_.deviceFormat[mode] = format;
- goto setFormat;
- }
-
- // The user requested format is not natively supported by the device.
- deviceFormat = SND_PCM_FORMAT_FLOAT64;
- if ( snd_pcm_hw_params_test_format( phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT64;
- goto setFormat;
- }
-
- deviceFormat = SND_PCM_FORMAT_FLOAT;
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
- goto setFormat;
- }
-
- deviceFormat = SND_PCM_FORMAT_S32;
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- goto setFormat;
- }
-
- deviceFormat = SND_PCM_FORMAT_S24;
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- goto setFormat;
- }
-
- deviceFormat = SND_PCM_FORMAT_S16;
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- goto setFormat;
- }
-
- deviceFormat = SND_PCM_FORMAT_S8;
- if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) {
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- goto setFormat;
- }
-
- // If we get here, no supported format was found.
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device " << device << " data format not supported by RtAudio.";
- errorText_ = errorStream_.str();
- return FAILURE;
-
- setFormat:
- result = snd_pcm_hw_params_set_format( phandle, hw_params, deviceFormat );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") data format, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Determine whether byte-swaping is necessary.
- stream_.doByteSwap[mode] = false;
- if ( deviceFormat != SND_PCM_FORMAT_S8 ) {
- result = snd_pcm_format_cpu_endian( deviceFormat );
- if ( result == 0 )
- stream_.doByteSwap[mode] = true;
- else if (result < 0) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") endian-ness, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
-
- // Set the sample rate.
- result = snd_pcm_hw_params_set_rate_near( phandle, hw_params, (unsigned int*) &sampleRate, 0 );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting sample rate on device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Determine the number of channels for this device. We support a possible
- // minimum device channel number > than the value requested by the user.
- stream_.nUserChannels[mode] = channels;
- unsigned int value;
- result = snd_pcm_hw_params_get_channels_max( hw_params, &value );
- unsigned int deviceChannels = value;
- if ( result < 0 || deviceChannels < channels + firstChannel ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: requested channel parameters not supported by device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- result = snd_pcm_hw_params_get_channels_min( hw_params, &value );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting minimum channels for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- deviceChannels = value;
- if ( deviceChannels < channels + firstChannel ) deviceChannels = channels + firstChannel;
- stream_.nDeviceChannels[mode] = deviceChannels;
-
- // Set the device channels.
- result = snd_pcm_hw_params_set_channels( phandle, hw_params, deviceChannels );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting channels for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Set the buffer (or period) size.
- int dir = 0;
- snd_pcm_uframes_t periodSize = *bufferSize;
- result = snd_pcm_hw_params_set_period_size_near( phandle, hw_params, &periodSize, &dir );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting period size for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- *bufferSize = periodSize;
-
- // Set the buffer number, which in ALSA is referred to as the "period".
- unsigned int periods = 0;
- if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) periods = 2;
- if ( options && options->numberOfBuffers > 0 ) periods = options->numberOfBuffers;
- if ( periods < 2 ) periods = 4; // a fairly safe default value
- result = snd_pcm_hw_params_set_periods_near( phandle, hw_params, &periods, &dir );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting periods for device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // If attempting to setup a duplex stream, the bufferSize parameter
- // MUST be the same in both directions!
- if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- stream_.bufferSize = *bufferSize;
-
- // Install the hardware configuration
- result = snd_pcm_hw_params( phandle, hw_params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing hardware configuration on device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
-#if defined(__RTAUDIO_DEBUG__)
- fprintf(stderr, "\nRtApiAlsa: dump hardware params after installation:\n\n");
- snd_pcm_hw_params_dump( hw_params, out );
-#endif
-
- // Set the software configuration to fill buffers with zeros and prevent device stopping on xruns.
- snd_pcm_sw_params_t *sw_params = NULL;
- snd_pcm_sw_params_alloca( &sw_params );
- snd_pcm_sw_params_current( phandle, sw_params );
- snd_pcm_sw_params_set_start_threshold( phandle, sw_params, *bufferSize );
- snd_pcm_sw_params_set_stop_threshold( phandle, sw_params, ULONG_MAX );
- snd_pcm_sw_params_set_silence_threshold( phandle, sw_params, 0 );
-
- // The following two settings were suggested by Theo Veenker
- //snd_pcm_sw_params_set_avail_min( phandle, sw_params, *bufferSize );
- //snd_pcm_sw_params_set_xfer_align( phandle, sw_params, 1 );
-
- // here are two options for a fix
- //snd_pcm_sw_params_set_silence_size( phandle, sw_params, ULONG_MAX );
- snd_pcm_uframes_t val;
- snd_pcm_sw_params_get_boundary( sw_params, &val );
- snd_pcm_sw_params_set_silence_size( phandle, sw_params, val );
-
- result = snd_pcm_sw_params( phandle, sw_params );
- if ( result < 0 ) {
- snd_pcm_close( phandle );
- errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing software configuration on device (" << name << "), " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
-#if defined(__RTAUDIO_DEBUG__)
- fprintf(stderr, "\nRtApiAlsa: dump software params after installation:\n\n");
- snd_pcm_sw_params_dump( sw_params, out );
-#endif
-
- // Set flags for buffer conversion
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate the ApiHandle if necessary and then save.
- AlsaHandle *apiInfo = 0;
- if ( stream_.apiHandle == 0 ) {
- try {
- apiInfo = (AlsaHandle *) new AlsaHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating AlsaHandle memory.";
- goto error;
- }
-
- if ( pthread_cond_init( &apiInfo->runnable_cv, NULL ) ) {
- errorText_ = "RtApiAlsa::probeDeviceOpen: error initializing pthread condition variable.";
- goto error;
- }
-
- stream_.apiHandle = (void *) apiInfo;
- apiInfo->handles[0] = 0;
- apiInfo->handles[1] = 0;
- }
- else {
- apiInfo = (AlsaHandle *) stream_.apiHandle;
- }
- apiInfo->handles[mode] = phandle;
- phandle = 0;
-
- // Allocate necessary internal buffers.
- unsigned long bufferBytes;
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( mode == INPUT ) {
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- stream_.sampleRate = sampleRate;
- stream_.nBuffers = periods;
- stream_.device[mode] = device;
- stream_.state = STREAM_STOPPED;
-
- // Setup the buffer conversion information structure.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
- // Setup thread if necessary.
- if ( stream_.mode == OUTPUT && mode == INPUT ) {
- // We had already set up an output stream.
- stream_.mode = DUPLEX;
- // Link the streams if possible.
- apiInfo->synchronized = false;
- if ( snd_pcm_link( apiInfo->handles[0], apiInfo->handles[1] ) == 0 )
- apiInfo->synchronized = true;
- else {
- errorText_ = "RtApiAlsa::probeDeviceOpen: unable to synchronize input and output devices.";
- error( RtAudioError::WARNING );
- }
- }
- else {
- stream_.mode = mode;
-
- // Setup callback thread.
- stream_.callbackInfo.object = (void *) this;
-
- // Set the thread attributes for joinable and realtime scheduling
- // priority (optional). The higher priority will only take affect
- // if the program is run as root or suid. Note, under Linux
- // processes with CAP_SYS_NICE privilege, a user can change
- // scheduling policy and priority (thus need not be root). See
- // POSIX "capabilities".
- pthread_attr_t attr;
- pthread_attr_init( &attr );
- pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );
-
-#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
- if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) {
- // We previously attempted to increase the audio callback priority
- // to SCHED_RR here via the attributes. However, while no errors
- // were reported in doing so, it did not work. So, now this is
- // done in the alsaCallbackHandler function.
- stream_.callbackInfo.doRealtime = true;
- int priority = options->priority;
- int min = sched_get_priority_min( SCHED_RR );
- int max = sched_get_priority_max( SCHED_RR );
- if ( priority < min ) priority = min;
- else if ( priority > max ) priority = max;
- stream_.callbackInfo.priority = priority;
- }
-#endif
-
- stream_.callbackInfo.isRunning = true;
- result = pthread_create( &stream_.callbackInfo.thread, &attr, alsaCallbackHandler, &stream_.callbackInfo );
- pthread_attr_destroy( &attr );
- if ( result ) {
- stream_.callbackInfo.isRunning = false;
- errorText_ = "RtApiAlsa::error creating callback thread!";
- goto error;
- }
- }
-
- return SUCCESS;
-
- error:
- if ( apiInfo ) {
- pthread_cond_destroy( &apiInfo->runnable_cv );
- if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] );
- if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] );
- delete apiInfo;
- stream_.apiHandle = 0;
- }
-
- if ( phandle) snd_pcm_close( phandle );
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.state = STREAM_CLOSED;
- return FAILURE;
-}
-
-void RtApiAlsa :: closeStream()
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiAlsa::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
- stream_.callbackInfo.isRunning = false;
- MUTEX_LOCK( &stream_.mutex );
- if ( stream_.state == STREAM_STOPPED ) {
- apiInfo->runnable = true;
- pthread_cond_signal( &apiInfo->runnable_cv );
- }
- MUTEX_UNLOCK( &stream_.mutex );
- pthread_join( stream_.callbackInfo.thread, NULL );
-
- if ( stream_.state == STREAM_RUNNING ) {
- stream_.state = STREAM_STOPPED;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
- snd_pcm_drop( apiInfo->handles[0] );
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX )
- snd_pcm_drop( apiInfo->handles[1] );
- }
-
- if ( apiInfo ) {
- pthread_cond_destroy( &apiInfo->runnable_cv );
- if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] );
- if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] );
- delete apiInfo;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-void RtApiAlsa :: startStream()
-{
- // This method calls snd_pcm_prepare if the device isn't already in that state.
-
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiAlsa::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- int result = 0;
- snd_pcm_state_t state;
- AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
- snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- state = snd_pcm_state( handle[0] );
- if ( state != SND_PCM_STATE_PREPARED ) {
- result = snd_pcm_prepare( handle[0] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::startStream: error preparing output pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
- }
-
- if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
- result = snd_pcm_drop(handle[1]); // fix to remove stale data received since device has been open
- state = snd_pcm_state( handle[1] );
- if ( state != SND_PCM_STATE_PREPARED ) {
- result = snd_pcm_prepare( handle[1] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::startStream: error preparing input pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
- }
-
- stream_.state = STREAM_RUNNING;
-
- unlock:
- apiInfo->runnable = true;
- pthread_cond_signal( &apiInfo->runnable_cv );
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( result >= 0 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAlsa :: stopStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiAlsa::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_LOCK( &stream_.mutex );
-
- int result = 0;
- AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
- snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- if ( apiInfo->synchronized )
- result = snd_pcm_drop( handle[0] );
- else
- result = snd_pcm_drain( handle[0] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::stopStream: error draining output pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
- result = snd_pcm_drop( handle[1] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::stopStream: error stopping input pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- unlock:
- apiInfo->runnable = false; // fixes high CPU usage when stopped
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( result >= 0 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAlsa :: abortStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiAlsa::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_LOCK( &stream_.mutex );
-
- int result = 0;
- AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
- snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- result = snd_pcm_drop( handle[0] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::abortStream: error aborting output pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) {
- result = snd_pcm_drop( handle[1] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::abortStream: error aborting input pcm device, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- unlock:
- apiInfo->runnable = false; // fixes high CPU usage when stopped
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( result >= 0 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiAlsa :: callbackEvent()
-{
- AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle;
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_LOCK( &stream_.mutex );
- while ( !apiInfo->runnable )
- pthread_cond_wait( &apiInfo->runnable_cv, &stream_.mutex );
-
- if ( stream_.state != STREAM_RUNNING ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
- MUTEX_UNLOCK( &stream_.mutex );
- }
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiAlsa::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return;
- }
-
- int doStopStream = 0;
- RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && apiInfo->xrun[0] == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- apiInfo->xrun[0] = false;
- }
- if ( stream_.mode != OUTPUT && apiInfo->xrun[1] == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- apiInfo->xrun[1] = false;
- }
- doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData );
-
- if ( doStopStream == 2 ) {
- abortStream();
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- // The state might change while waiting on a mutex.
- if ( stream_.state == STREAM_STOPPED ) goto unlock;
-
- int result;
- char *buffer;
- int channels;
- snd_pcm_t **handle;
- snd_pcm_sframes_t frames;
- RtAudioFormat format;
- handle = (snd_pcm_t **) apiInfo->handles;
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- // Setup parameters.
- if ( stream_.doConvertBuffer[1] ) {
- buffer = stream_.deviceBuffer;
- channels = stream_.nDeviceChannels[1];
- format = stream_.deviceFormat[1];
- }
- else {
- buffer = stream_.userBuffer[1];
- channels = stream_.nUserChannels[1];
- format = stream_.userFormat;
- }
-
- // Read samples from device in interleaved/non-interleaved format.
- if ( stream_.deviceInterleaved[1] )
- result = snd_pcm_readi( handle[1], buffer, stream_.bufferSize );
- else {
- void *bufs[channels];
- size_t offset = stream_.bufferSize * formatBytes( format );
- for ( int i=0; i<channels; i++ )
- bufs[i] = (void *) (buffer + (i * offset));
- result = snd_pcm_readn( handle[1], bufs, stream_.bufferSize );
- }
-
- if ( result < (int) stream_.bufferSize ) {
- // Either an error or overrun occured.
- if ( result == -EPIPE ) {
- snd_pcm_state_t state = snd_pcm_state( handle[1] );
- if ( state == SND_PCM_STATE_XRUN ) {
- apiInfo->xrun[1] = true;
- result = snd_pcm_prepare( handle[1] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after overrun, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- }
- else {
- errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- }
- else {
- errorStream_ << "RtApiAlsa::callbackEvent: audio read error, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- error( RtAudioError::WARNING );
- goto tryOutput;
- }
-
- // Do byte swapping if necessary.
- if ( stream_.doByteSwap[1] )
- byteSwapBuffer( buffer, stream_.bufferSize * channels, format );
-
- // Do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[1] )
- convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
-
- // Check stream latency
- result = snd_pcm_delay( handle[1], &frames );
- if ( result == 0 && frames > 0 ) stream_.latency[1] = frames;
- }
-
- tryOutput:
-
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- // Setup parameters and do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[0] ) {
- buffer = stream_.deviceBuffer;
- convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
- channels = stream_.nDeviceChannels[0];
- format = stream_.deviceFormat[0];
- }
- else {
- buffer = stream_.userBuffer[0];
- channels = stream_.nUserChannels[0];
- format = stream_.userFormat;
- }
-
- // Do byte swapping if necessary.
- if ( stream_.doByteSwap[0] )
- byteSwapBuffer(buffer, stream_.bufferSize * channels, format);
-
- // Write samples to device in interleaved/non-interleaved format.
- if ( stream_.deviceInterleaved[0] )
- result = snd_pcm_writei( handle[0], buffer, stream_.bufferSize );
- else {
- void *bufs[channels];
- size_t offset = stream_.bufferSize * formatBytes( format );
- for ( int i=0; i<channels; i++ )
- bufs[i] = (void *) (buffer + (i * offset));
- result = snd_pcm_writen( handle[0], bufs, stream_.bufferSize );
- }
-
- if ( result < (int) stream_.bufferSize ) {
- // Either an error or underrun occured.
- if ( result == -EPIPE ) {
- snd_pcm_state_t state = snd_pcm_state( handle[0] );
- if ( state == SND_PCM_STATE_XRUN ) {
- apiInfo->xrun[0] = true;
- result = snd_pcm_prepare( handle[0] );
- if ( result < 0 ) {
- errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after underrun, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- else
- errorText_ = "RtApiAlsa::callbackEvent: audio write error, underrun.";
- }
- else {
- errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- }
- else {
- errorStream_ << "RtApiAlsa::callbackEvent: audio write error, " << snd_strerror( result ) << ".";
- errorText_ = errorStream_.str();
- }
- error( RtAudioError::WARNING );
- goto unlock;
- }
-
- // Check stream latency
- result = snd_pcm_delay( handle[0], &frames );
- if ( result == 0 && frames > 0 ) stream_.latency[0] = frames;
- }
-
- unlock:
- MUTEX_UNLOCK( &stream_.mutex );
-
- RtApi::tickStreamTime();
- if ( doStopStream == 1 ) this->stopStream();
-}
-
-static void *alsaCallbackHandler( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiAlsa *object = (RtApiAlsa *) info->object;
- bool *isRunning = &info->isRunning;
-
-#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
- if ( &info->doRealtime ) {
- pthread_t tID = pthread_self(); // ID of this thread
- sched_param prio = { info->priority }; // scheduling priority of thread
- pthread_setschedparam( tID, SCHED_RR, &prio );
- }
-#endif
-
- while ( *isRunning == true ) {
- pthread_testcancel();
- object->callbackEvent();
- }
-
- pthread_exit( NULL );
-}
-
-//******************** End of __LINUX_ALSA__ *********************//
-#endif
-
-#if defined(__LINUX_PULSE__)
-
-// Code written by Peter Meerwald, pmeerw@pmeerw.net
-// and Tristan Matthews.
-
-#include <pulse/error.h>
-#include <pulse/simple.h>
-#include <cstdio>
-
-static const unsigned int SUPPORTED_SAMPLERATES[] = { 8000, 16000, 22050, 32000,
- 44100, 48000, 96000, 0};
-
-struct rtaudio_pa_format_mapping_t {
- RtAudioFormat rtaudio_format;
- pa_sample_format_t pa_format;
-};
-
-static const rtaudio_pa_format_mapping_t supported_sampleformats[] = {
- {RTAUDIO_SINT16, PA_SAMPLE_S16LE},
- {RTAUDIO_SINT32, PA_SAMPLE_S32LE},
- {RTAUDIO_FLOAT32, PA_SAMPLE_FLOAT32LE},
- {0, PA_SAMPLE_INVALID}};
-
-struct PulseAudioHandle {
- pa_simple *s_play;
- pa_simple *s_rec;
- pthread_t thread;
- pthread_cond_t runnable_cv;
- bool runnable;
- PulseAudioHandle() : s_play(0), s_rec(0), runnable(false) { }
-};
-
-RtApiPulse::~RtApiPulse()
-{
- if ( stream_.state != STREAM_CLOSED )
- closeStream();
-}
-
-unsigned int RtApiPulse::getDeviceCount( void )
-{
- return 1;
-}
-
-RtAudio::DeviceInfo RtApiPulse::getDeviceInfo( unsigned int /*device*/ )
-{
- RtAudio::DeviceInfo info;
- info.probed = true;
- info.name = "PulseAudio";
- info.outputChannels = 2;
- info.inputChannels = 2;
- info.duplexChannels = 2;
- info.isDefaultOutput = true;
- info.isDefaultInput = true;
-
- for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr )
- info.sampleRates.push_back( *sr );
-
- info.preferredSampleRate = 48000;
- info.nativeFormats = RTAUDIO_SINT16 | RTAUDIO_SINT32 | RTAUDIO_FLOAT32;
-
- return info;
-}
-
-static void *pulseaudio_callback( void * user )
-{
- CallbackInfo *cbi = static_cast<CallbackInfo *>( user );
- RtApiPulse *context = static_cast<RtApiPulse *>( cbi->object );
- volatile bool *isRunning = &cbi->isRunning;
-
- while ( *isRunning ) {
- pthread_testcancel();
- context->callbackEvent();
- }
-
- pthread_exit( NULL );
-}
-
-void RtApiPulse::closeStream( void )
-{
- PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
- stream_.callbackInfo.isRunning = false;
- if ( pah ) {
- MUTEX_LOCK( &stream_.mutex );
- if ( stream_.state == STREAM_STOPPED ) {
- pah->runnable = true;
- pthread_cond_signal( &pah->runnable_cv );
- }
- MUTEX_UNLOCK( &stream_.mutex );
-
- pthread_join( pah->thread, 0 );
- if ( pah->s_play ) {
- pa_simple_flush( pah->s_play, NULL );
- pa_simple_free( pah->s_play );
- }
- if ( pah->s_rec )
- pa_simple_free( pah->s_rec );
-
- pthread_cond_destroy( &pah->runnable_cv );
- delete pah;
- stream_.apiHandle = 0;
- }
-
- if ( stream_.userBuffer[0] ) {
- free( stream_.userBuffer[0] );
- stream_.userBuffer[0] = 0;
- }
- if ( stream_.userBuffer[1] ) {
- free( stream_.userBuffer[1] );
- stream_.userBuffer[1] = 0;
- }
-
- stream_.state = STREAM_CLOSED;
- stream_.mode = UNINITIALIZED;
-}
-
-void RtApiPulse::callbackEvent( void )
-{
- PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_LOCK( &stream_.mutex );
- while ( !pah->runnable )
- pthread_cond_wait( &pah->runnable_cv, &stream_.mutex );
-
- if ( stream_.state != STREAM_RUNNING ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
- MUTEX_UNLOCK( &stream_.mutex );
- }
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiPulse::callbackEvent(): the stream is closed ... "
- "this shouldn't happen!";
- error( RtAudioError::WARNING );
- return;
- }
-
- RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- int doStopStream = callback( stream_.userBuffer[OUTPUT], stream_.userBuffer[INPUT],
- stream_.bufferSize, streamTime, status,
- stream_.callbackInfo.userData );
-
- if ( doStopStream == 2 ) {
- abortStream();
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
- void *pulse_in = stream_.doConvertBuffer[INPUT] ? stream_.deviceBuffer : stream_.userBuffer[INPUT];
- void *pulse_out = stream_.doConvertBuffer[OUTPUT] ? stream_.deviceBuffer : stream_.userBuffer[OUTPUT];
-
- if ( stream_.state != STREAM_RUNNING )
- goto unlock;
-
- int pa_error;
- size_t bytes;
- if (stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- if ( stream_.doConvertBuffer[OUTPUT] ) {
- convertBuffer( stream_.deviceBuffer,
- stream_.userBuffer[OUTPUT],
- stream_.convertInfo[OUTPUT] );
- bytes = stream_.nDeviceChannels[OUTPUT] * stream_.bufferSize *
- formatBytes( stream_.deviceFormat[OUTPUT] );
- } else
- bytes = stream_.nUserChannels[OUTPUT] * stream_.bufferSize *
- formatBytes( stream_.userFormat );
-
- if ( pa_simple_write( pah->s_play, pulse_out, bytes, &pa_error ) < 0 ) {
- errorStream_ << "RtApiPulse::callbackEvent: audio write error, " <<
- pa_strerror( pa_error ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX) {
- if ( stream_.doConvertBuffer[INPUT] )
- bytes = stream_.nDeviceChannels[INPUT] * stream_.bufferSize *
- formatBytes( stream_.deviceFormat[INPUT] );
- else
- bytes = stream_.nUserChannels[INPUT] * stream_.bufferSize *
- formatBytes( stream_.userFormat );
-
- if ( pa_simple_read( pah->s_rec, pulse_in, bytes, &pa_error ) < 0 ) {
- errorStream_ << "RtApiPulse::callbackEvent: audio read error, " <<
- pa_strerror( pa_error ) << ".";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
- if ( stream_.doConvertBuffer[INPUT] ) {
- convertBuffer( stream_.userBuffer[INPUT],
- stream_.deviceBuffer,
- stream_.convertInfo[INPUT] );
- }
- }
-
- unlock:
- MUTEX_UNLOCK( &stream_.mutex );
- RtApi::tickStreamTime();
-
- if ( doStopStream == 1 )
- stopStream();
-}
-
-void RtApiPulse::startStream( void )
-{
- PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiPulse::startStream(): the stream is not open!";
- error( RtAudioError::INVALID_USE );
- return;
- }
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiPulse::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- stream_.state = STREAM_RUNNING;
-
- pah->runnable = true;
- pthread_cond_signal( &pah->runnable_cv );
- MUTEX_UNLOCK( &stream_.mutex );
-}
-
-void RtApiPulse::stopStream( void )
-{
- PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiPulse::stopStream(): the stream is not open!";
- error( RtAudioError::INVALID_USE );
- return;
- }
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiPulse::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_LOCK( &stream_.mutex );
-
- if ( pah && pah->s_play ) {
- int pa_error;
- if ( pa_simple_drain( pah->s_play, &pa_error ) < 0 ) {
- errorStream_ << "RtApiPulse::stopStream: error draining output device, " <<
- pa_strerror( pa_error ) << ".";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_UNLOCK( &stream_.mutex );
-}
-
-void RtApiPulse::abortStream( void )
-{
- PulseAudioHandle *pah = static_cast<PulseAudioHandle*>( stream_.apiHandle );
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiPulse::abortStream(): the stream is not open!";
- error( RtAudioError::INVALID_USE );
- return;
- }
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiPulse::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_LOCK( &stream_.mutex );
-
- if ( pah && pah->s_play ) {
- int pa_error;
- if ( pa_simple_flush( pah->s_play, &pa_error ) < 0 ) {
- errorStream_ << "RtApiPulse::abortStream: error flushing output device, " <<
- pa_strerror( pa_error ) << ".";
- errorText_ = errorStream_.str();
- MUTEX_UNLOCK( &stream_.mutex );
- error( RtAudioError::SYSTEM_ERROR );
- return;
- }
- }
-
- stream_.state = STREAM_STOPPED;
- MUTEX_UNLOCK( &stream_.mutex );
-}
-
-bool RtApiPulse::probeDeviceOpen( unsigned int device, StreamMode mode,
- unsigned int channels, unsigned int firstChannel,
- unsigned int sampleRate, RtAudioFormat format,
- unsigned int *bufferSize, RtAudio::StreamOptions *options )
-{
- PulseAudioHandle *pah = 0;
- unsigned long bufferBytes = 0;
- pa_sample_spec ss;
-
- if ( device != 0 ) return false;
- if ( mode != INPUT && mode != OUTPUT ) return false;
- if ( channels != 1 && channels != 2 ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: unsupported number of channels.";
- return false;
- }
- ss.channels = channels;
-
- if ( firstChannel != 0 ) return false;
-
- bool sr_found = false;
- for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr ) {
- if ( sampleRate == *sr ) {
- sr_found = true;
- stream_.sampleRate = sampleRate;
- ss.rate = sampleRate;
- break;
- }
- }
- if ( !sr_found ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: unsupported sample rate.";
- return false;
- }
-
- bool sf_found = 0;
- for ( const rtaudio_pa_format_mapping_t *sf = supported_sampleformats;
- sf->rtaudio_format && sf->pa_format != PA_SAMPLE_INVALID; ++sf ) {
- if ( format == sf->rtaudio_format ) {
- sf_found = true;
- stream_.userFormat = sf->rtaudio_format;
- stream_.deviceFormat[mode] = stream_.userFormat;
- ss.format = sf->pa_format;
- break;
- }
- }
- if ( !sf_found ) { // Use internal data format conversion.
- stream_.userFormat = format;
- stream_.deviceFormat[mode] = RTAUDIO_FLOAT32;
- ss.format = PA_SAMPLE_FLOAT32LE;
- }
-
- // Set other stream parameters.
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false;
- else stream_.userInterleaved = true;
- stream_.deviceInterleaved[mode] = true;
- stream_.nBuffers = 1;
- stream_.doByteSwap[mode] = false;
- stream_.nUserChannels[mode] = channels;
- stream_.nDeviceChannels[mode] = channels + firstChannel;
- stream_.channelOffset[mode] = 0;
- std::string streamName = "RtAudio";
-
- // Set flags for buffer conversion.
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate necessary internal buffers.
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
- stream_.bufferSize = *bufferSize;
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( mode == INPUT ) {
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- stream_.device[mode] = device;
-
- // Setup the buffer conversion information structure.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
- if ( !stream_.apiHandle ) {
- PulseAudioHandle *pah = new PulseAudioHandle;
- if ( !pah ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error allocating memory for handle.";
- goto error;
- }
-
- stream_.apiHandle = pah;
- if ( pthread_cond_init( &pah->runnable_cv, NULL ) != 0 ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error creating condition variable.";
- goto error;
- }
- }
- pah = static_cast<PulseAudioHandle *>( stream_.apiHandle );
-
- int error;
- if ( options && !options->streamName.empty() ) streamName = options->streamName;
- switch ( mode ) {
- case INPUT:
- pa_buffer_attr buffer_attr;
- buffer_attr.fragsize = bufferBytes;
- buffer_attr.maxlength = -1;
-
- pah->s_rec = pa_simple_new( NULL, streamName.c_str(), PA_STREAM_RECORD, NULL, "Record", &ss, NULL, &buffer_attr, &error );
- if ( !pah->s_rec ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error connecting input to PulseAudio server.";
- goto error;
- }
- break;
- case OUTPUT:
- pah->s_play = pa_simple_new( NULL, "RtAudio", PA_STREAM_PLAYBACK, NULL, "Playback", &ss, NULL, NULL, &error );
- if ( !pah->s_play ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error connecting output to PulseAudio server.";
- goto error;
- }
- break;
- default:
- goto error;
- }
-
- if ( stream_.mode == UNINITIALIZED )
- stream_.mode = mode;
- else if ( stream_.mode == mode )
- goto error;
- else
- stream_.mode = DUPLEX;
-
- if ( !stream_.callbackInfo.isRunning ) {
- stream_.callbackInfo.object = this;
- stream_.callbackInfo.isRunning = true;
- if ( pthread_create( &pah->thread, NULL, pulseaudio_callback, (void *)&stream_.callbackInfo) != 0 ) {
- errorText_ = "RtApiPulse::probeDeviceOpen: error creating thread.";
- goto error;
- }
- }
-
- stream_.state = STREAM_STOPPED;
- return true;
-
- error:
- if ( pah && stream_.callbackInfo.isRunning ) {
- pthread_cond_destroy( &pah->runnable_cv );
- delete pah;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- return FAILURE;
-}
-
-//******************** End of __LINUX_PULSE__ *********************//
-#endif
-
-#if defined(__LINUX_OSS__)
-
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/soundcard.h>
-#include <errno.h>
-#include <math.h>
-
-static void *ossCallbackHandler(void * ptr);
-
-// A structure to hold various information related to the OSS API
-// implementation.
-struct OssHandle {
- int id[2]; // device ids
- bool xrun[2];
- bool triggered;
- pthread_cond_t runnable;
-
- OssHandle()
- :triggered(false) { id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; }
-};
-
-RtApiOss :: RtApiOss()
-{
- // Nothing to do here.
-}
-
-RtApiOss :: ~RtApiOss()
-{
- if ( stream_.state != STREAM_CLOSED ) closeStream();
-}
-
-unsigned int RtApiOss :: getDeviceCount( void )
-{
- int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
- if ( mixerfd == -1 ) {
- errorText_ = "RtApiOss::getDeviceCount: error opening '/dev/mixer'.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- oss_sysinfo sysinfo;
- if ( ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo ) == -1 ) {
- close( mixerfd );
- errorText_ = "RtApiOss::getDeviceCount: error getting sysinfo, OSS version >= 4.0 is required.";
- error( RtAudioError::WARNING );
- return 0;
- }
-
- close( mixerfd );
- return sysinfo.numaudios;
-}
-
-RtAudio::DeviceInfo RtApiOss :: getDeviceInfo( unsigned int device )
-{
- RtAudio::DeviceInfo info;
- info.probed = false;
-
- int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
- if ( mixerfd == -1 ) {
- errorText_ = "RtApiOss::getDeviceInfo: error opening '/dev/mixer'.";
- error( RtAudioError::WARNING );
- return info;
- }
-
- oss_sysinfo sysinfo;
- int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo );
- if ( result == -1 ) {
- close( mixerfd );
- errorText_ = "RtApiOss::getDeviceInfo: error getting sysinfo, OSS version >= 4.0 is required.";
- error( RtAudioError::WARNING );
- return info;
- }
-
- unsigned nDevices = sysinfo.numaudios;
- if ( nDevices == 0 ) {
- close( mixerfd );
- errorText_ = "RtApiOss::getDeviceInfo: no devices found!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- if ( device >= nDevices ) {
- close( mixerfd );
- errorText_ = "RtApiOss::getDeviceInfo: device ID is invalid!";
- error( RtAudioError::INVALID_USE );
- return info;
- }
-
- oss_audioinfo ainfo;
- ainfo.dev = device;
- result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo );
- close( mixerfd );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Probe channels
- if ( ainfo.caps & PCM_CAP_OUTPUT ) info.outputChannels = ainfo.max_channels;
- if ( ainfo.caps & PCM_CAP_INPUT ) info.inputChannels = ainfo.max_channels;
- if ( ainfo.caps & PCM_CAP_DUPLEX ) {
- if ( info.outputChannels > 0 && info.inputChannels > 0 && ainfo.caps & PCM_CAP_DUPLEX )
- info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels;
- }
-
- // Probe data formats ... do for input
- unsigned long mask = ainfo.iformats;
- if ( mask & AFMT_S16_LE || mask & AFMT_S16_BE )
- info.nativeFormats |= RTAUDIO_SINT16;
- if ( mask & AFMT_S8 )
- info.nativeFormats |= RTAUDIO_SINT8;
- if ( mask & AFMT_S32_LE || mask & AFMT_S32_BE )
- info.nativeFormats |= RTAUDIO_SINT32;
- if ( mask & AFMT_FLOAT )
- info.nativeFormats |= RTAUDIO_FLOAT32;
- if ( mask & AFMT_S24_LE || mask & AFMT_S24_BE )
- info.nativeFormats |= RTAUDIO_SINT24;
-
- // Check that we have at least one supported format
- if ( info.nativeFormats == 0 ) {
- errorStream_ << "RtApiOss::getDeviceInfo: device (" << ainfo.name << ") data format not supported by RtAudio.";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- return info;
- }
-
- // Probe the supported sample rates.
- info.sampleRates.clear();
- if ( ainfo.nrates ) {
- for ( unsigned int i=0; i<ainfo.nrates; i++ ) {
- for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
- if ( ainfo.rates[i] == SAMPLE_RATES[k] ) {
- info.sampleRates.push_back( SAMPLE_RATES[k] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[k];
-
- break;
- }
- }
- }
- }
- else {
- // Check min and max rate values;
- for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) {
- if ( ainfo.min_rate <= (int) SAMPLE_RATES[k] && ainfo.max_rate >= (int) SAMPLE_RATES[k] ) {
- info.sampleRates.push_back( SAMPLE_RATES[k] );
-
- if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) )
- info.preferredSampleRate = SAMPLE_RATES[k];
- }
- }
- }
-
- if ( info.sampleRates.size() == 0 ) {
- errorStream_ << "RtApiOss::getDeviceInfo: no supported sample rates found for device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- error( RtAudioError::WARNING );
- }
- else {
- info.probed = true;
- info.name = ainfo.name;
- }
-
- return info;
-}
-
-
-bool RtApiOss :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels,
- unsigned int firstChannel, unsigned int sampleRate,
- RtAudioFormat format, unsigned int *bufferSize,
- RtAudio::StreamOptions *options )
-{
- int mixerfd = open( "/dev/mixer", O_RDWR, 0 );
- if ( mixerfd == -1 ) {
- errorText_ = "RtApiOss::probeDeviceOpen: error opening '/dev/mixer'.";
- return FAILURE;
- }
-
- oss_sysinfo sysinfo;
- int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo );
- if ( result == -1 ) {
- close( mixerfd );
- errorText_ = "RtApiOss::probeDeviceOpen: error getting sysinfo, OSS version >= 4.0 is required.";
- return FAILURE;
- }
-
- unsigned nDevices = sysinfo.numaudios;
- if ( nDevices == 0 ) {
- // This should not happen because a check is made before this function is called.
- close( mixerfd );
- errorText_ = "RtApiOss::probeDeviceOpen: no devices found!";
- return FAILURE;
- }
-
- if ( device >= nDevices ) {
- // This should not happen because a check is made before this function is called.
- close( mixerfd );
- errorText_ = "RtApiOss::probeDeviceOpen: device ID is invalid!";
- return FAILURE;
- }
-
- oss_audioinfo ainfo;
- ainfo.dev = device;
- result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo );
- close( mixerfd );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Check if device supports input or output
- if ( ( mode == OUTPUT && !( ainfo.caps & PCM_CAP_OUTPUT ) ) ||
- ( mode == INPUT && !( ainfo.caps & PCM_CAP_INPUT ) ) ) {
- if ( mode == OUTPUT )
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support output.";
- else
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support input.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- int flags = 0;
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- if ( mode == OUTPUT )
- flags |= O_WRONLY;
- else { // mode == INPUT
- if (stream_.mode == OUTPUT && stream_.device[0] == device) {
- // We just set the same device for playback ... close and reopen for duplex (OSS only).
- close( handle->id[0] );
- handle->id[0] = 0;
- if ( !( ainfo.caps & PCM_CAP_DUPLEX ) ) {
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support duplex mode.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- // Check that the number previously set channels is the same.
- if ( stream_.nUserChannels[0] != channels ) {
- errorStream_ << "RtApiOss::probeDeviceOpen: input/output channels must be equal for OSS duplex device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- flags |= O_RDWR;
- }
- else
- flags |= O_RDONLY;
- }
-
- // Set exclusive access if specified.
- if ( options && options->flags & RTAUDIO_HOG_DEVICE ) flags |= O_EXCL;
-
- // Try to open the device.
- int fd;
- fd = open( ainfo.devnode, flags, 0 );
- if ( fd == -1 ) {
- if ( errno == EBUSY )
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") is busy.";
- else
- errorStream_ << "RtApiOss::probeDeviceOpen: error opening device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // For duplex operation, specifically set this mode (this doesn't seem to work).
- /*
- if ( flags | O_RDWR ) {
- result = ioctl( fd, SNDCTL_DSP_SETDUPLEX, NULL );
- if ( result == -1) {
- errorStream_ << "RtApiOss::probeDeviceOpen: error setting duplex mode for device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- }
- */
-
- // Check the device channel support.
- stream_.nUserChannels[mode] = channels;
- if ( ainfo.max_channels < (int)(channels + firstChannel) ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: the device (" << ainfo.name << ") does not support requested channel parameters.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Set the number of channels.
- int deviceChannels = channels + firstChannel;
- result = ioctl( fd, SNDCTL_DSP_CHANNELS, &deviceChannels );
- if ( result == -1 || deviceChannels < (int)(channels + firstChannel) ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: error setting channel parameters on device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- stream_.nDeviceChannels[mode] = deviceChannels;
-
- // Get the data format mask
- int mask;
- result = ioctl( fd, SNDCTL_DSP_GETFMTS, &mask );
- if ( result == -1 ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: error getting device (" << ainfo.name << ") data formats.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Determine how to set the device format.
- stream_.userFormat = format;
- int deviceFormat = -1;
- stream_.doByteSwap[mode] = false;
- if ( format == RTAUDIO_SINT8 ) {
- if ( mask & AFMT_S8 ) {
- deviceFormat = AFMT_S8;
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- }
- }
- else if ( format == RTAUDIO_SINT16 ) {
- if ( mask & AFMT_S16_NE ) {
- deviceFormat = AFMT_S16_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- }
- else if ( mask & AFMT_S16_OE ) {
- deviceFormat = AFMT_S16_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- stream_.doByteSwap[mode] = true;
- }
- }
- else if ( format == RTAUDIO_SINT24 ) {
- if ( mask & AFMT_S24_NE ) {
- deviceFormat = AFMT_S24_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- }
- else if ( mask & AFMT_S24_OE ) {
- deviceFormat = AFMT_S24_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- stream_.doByteSwap[mode] = true;
- }
- }
- else if ( format == RTAUDIO_SINT32 ) {
- if ( mask & AFMT_S32_NE ) {
- deviceFormat = AFMT_S32_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- }
- else if ( mask & AFMT_S32_OE ) {
- deviceFormat = AFMT_S32_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- stream_.doByteSwap[mode] = true;
- }
- }
-
- if ( deviceFormat == -1 ) {
- // The user requested format is not natively supported by the device.
- if ( mask & AFMT_S16_NE ) {
- deviceFormat = AFMT_S16_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- }
- else if ( mask & AFMT_S32_NE ) {
- deviceFormat = AFMT_S32_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- }
- else if ( mask & AFMT_S24_NE ) {
- deviceFormat = AFMT_S24_NE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- }
- else if ( mask & AFMT_S16_OE ) {
- deviceFormat = AFMT_S16_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT16;
- stream_.doByteSwap[mode] = true;
- }
- else if ( mask & AFMT_S32_OE ) {
- deviceFormat = AFMT_S32_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT32;
- stream_.doByteSwap[mode] = true;
- }
- else if ( mask & AFMT_S24_OE ) {
- deviceFormat = AFMT_S24_OE;
- stream_.deviceFormat[mode] = RTAUDIO_SINT24;
- stream_.doByteSwap[mode] = true;
- }
- else if ( mask & AFMT_S8) {
- deviceFormat = AFMT_S8;
- stream_.deviceFormat[mode] = RTAUDIO_SINT8;
- }
- }
-
- if ( stream_.deviceFormat[mode] == 0 ) {
- // This really shouldn't happen ...
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") data format not supported by RtAudio.";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Set the data format.
- int temp = deviceFormat;
- result = ioctl( fd, SNDCTL_DSP_SETFMT, &deviceFormat );
- if ( result == -1 || deviceFormat != temp ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: error setting data format on device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Attempt to set the buffer size. According to OSS, the minimum
- // number of buffers is two. The supposed minimum buffer size is 16
- // bytes, so that will be our lower bound. The argument to this
- // call is in the form 0xMMMMSSSS (hex), where the buffer size (in
- // bytes) is given as 2^SSSS and the number of buffers as 2^MMMM.
- // We'll check the actual value used near the end of the setup
- // procedure.
- int ossBufferBytes = *bufferSize * formatBytes( stream_.deviceFormat[mode] ) * deviceChannels;
- if ( ossBufferBytes < 16 ) ossBufferBytes = 16;
- int buffers = 0;
- if ( options ) buffers = options->numberOfBuffers;
- if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) buffers = 2;
- if ( buffers < 2 ) buffers = 3;
- temp = ((int) buffers << 16) + (int)( log10( (double)ossBufferBytes ) / log10( 2.0 ) );
- result = ioctl( fd, SNDCTL_DSP_SETFRAGMENT, &temp );
- if ( result == -1 ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: error setting buffer size on device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- stream_.nBuffers = buffers;
-
- // Save buffer size (in sample frames).
- *bufferSize = ossBufferBytes / ( formatBytes(stream_.deviceFormat[mode]) * deviceChannels );
- stream_.bufferSize = *bufferSize;
-
- // Set the sample rate.
- int srate = sampleRate;
- result = ioctl( fd, SNDCTL_DSP_SPEED, &srate );
- if ( result == -1 ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: error setting sample rate (" << sampleRate << ") on device (" << ainfo.name << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
-
- // Verify the sample rate setup worked.
- if ( abs( srate - sampleRate ) > 100 ) {
- close( fd );
- errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support sample rate (" << sampleRate << ").";
- errorText_ = errorStream_.str();
- return FAILURE;
- }
- stream_.sampleRate = sampleRate;
-
- if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device) {
- // We're doing duplex setup here.
- stream_.deviceFormat[0] = stream_.deviceFormat[1];
- stream_.nDeviceChannels[0] = deviceChannels;
- }
-
- // Set interleaving parameters.
- stream_.userInterleaved = true;
- stream_.deviceInterleaved[mode] = true;
- if ( options && options->flags & RTAUDIO_NONINTERLEAVED )
- stream_.userInterleaved = false;
-
- // Set flags for buffer conversion
- stream_.doConvertBuffer[mode] = false;
- if ( stream_.userFormat != stream_.deviceFormat[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] )
- stream_.doConvertBuffer[mode] = true;
- if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] &&
- stream_.nUserChannels[mode] > 1 )
- stream_.doConvertBuffer[mode] = true;
-
- // Allocate the stream handles if necessary and then save.
- if ( stream_.apiHandle == 0 ) {
- try {
- handle = new OssHandle;
- }
- catch ( std::bad_alloc& ) {
- errorText_ = "RtApiOss::probeDeviceOpen: error allocating OssHandle memory.";
- goto error;
- }
-
- if ( pthread_cond_init( &handle->runnable, NULL ) ) {
- errorText_ = "RtApiOss::probeDeviceOpen: error initializing pthread condition variable.";
- goto error;
- }
-
- stream_.apiHandle = (void *) handle;
- }
- else {
- handle = (OssHandle *) stream_.apiHandle;
- }
- handle->id[mode] = fd;
-
- // Allocate necessary internal buffers.
- unsigned long bufferBytes;
- bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat );
- stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 );
- if ( stream_.userBuffer[mode] == NULL ) {
- errorText_ = "RtApiOss::probeDeviceOpen: error allocating user buffer memory.";
- goto error;
- }
-
- if ( stream_.doConvertBuffer[mode] ) {
-
- bool makeBuffer = true;
- bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] );
- if ( mode == INPUT ) {
- if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) {
- unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] );
- if ( bufferBytes <= bytesOut ) makeBuffer = false;
- }
- }
-
- if ( makeBuffer ) {
- bufferBytes *= *bufferSize;
- if ( stream_.deviceBuffer ) free( stream_.deviceBuffer );
- stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 );
- if ( stream_.deviceBuffer == NULL ) {
- errorText_ = "RtApiOss::probeDeviceOpen: error allocating device buffer memory.";
- goto error;
- }
- }
- }
-
- stream_.device[mode] = device;
- stream_.state = STREAM_STOPPED;
-
- // Setup the buffer conversion information structure.
- if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel );
-
- // Setup thread if necessary.
- if ( stream_.mode == OUTPUT && mode == INPUT ) {
- // We had already set up an output stream.
- stream_.mode = DUPLEX;
- if ( stream_.device[0] == device ) handle->id[0] = fd;
- }
- else {
- stream_.mode = mode;
-
- // Setup callback thread.
- stream_.callbackInfo.object = (void *) this;
-
- // Set the thread attributes for joinable and realtime scheduling
- // priority. The higher priority will only take affect if the
- // program is run as root or suid.
- pthread_attr_t attr;
- pthread_attr_init( &attr );
- pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE );
-#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread)
- if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) {
- struct sched_param param;
- int priority = options->priority;
- int min = sched_get_priority_min( SCHED_RR );
- int max = sched_get_priority_max( SCHED_RR );
- if ( priority < min ) priority = min;
- else if ( priority > max ) priority = max;
- param.sched_priority = priority;
- pthread_attr_setschedparam( &attr, ¶m );
- pthread_attr_setschedpolicy( &attr, SCHED_RR );
- }
- else
- pthread_attr_setschedpolicy( &attr, SCHED_OTHER );
-#else
- pthread_attr_setschedpolicy( &attr, SCHED_OTHER );
-#endif
-
- stream_.callbackInfo.isRunning = true;
- result = pthread_create( &stream_.callbackInfo.thread, &attr, ossCallbackHandler, &stream_.callbackInfo );
- pthread_attr_destroy( &attr );
- if ( result ) {
- stream_.callbackInfo.isRunning = false;
- errorText_ = "RtApiOss::error creating callback thread!";
- goto error;
- }
- }
-
- return SUCCESS;
-
- error:
- if ( handle ) {
- pthread_cond_destroy( &handle->runnable );
- if ( handle->id[0] ) close( handle->id[0] );
- if ( handle->id[1] ) close( handle->id[1] );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- return FAILURE;
-}
-
-void RtApiOss :: closeStream()
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiOss::closeStream(): no open stream to close!";
- error( RtAudioError::WARNING );
- return;
- }
-
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- stream_.callbackInfo.isRunning = false;
- MUTEX_LOCK( &stream_.mutex );
- if ( stream_.state == STREAM_STOPPED )
- pthread_cond_signal( &handle->runnable );
- MUTEX_UNLOCK( &stream_.mutex );
- pthread_join( stream_.callbackInfo.thread, NULL );
-
- if ( stream_.state == STREAM_RUNNING ) {
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX )
- ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
- else
- ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
- stream_.state = STREAM_STOPPED;
- }
-
- if ( handle ) {
- pthread_cond_destroy( &handle->runnable );
- if ( handle->id[0] ) close( handle->id[0] );
- if ( handle->id[1] ) close( handle->id[1] );
- delete handle;
- stream_.apiHandle = 0;
- }
-
- for ( int i=0; i<2; i++ ) {
- if ( stream_.userBuffer[i] ) {
- free( stream_.userBuffer[i] );
- stream_.userBuffer[i] = 0;
- }
- }
-
- if ( stream_.deviceBuffer ) {
- free( stream_.deviceBuffer );
- stream_.deviceBuffer = 0;
- }
-
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
-}
-
-void RtApiOss :: startStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_RUNNING ) {
- errorText_ = "RtApiOss::startStream(): the stream is already running!";
- error( RtAudioError::WARNING );
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- stream_.state = STREAM_RUNNING;
-
- // No need to do anything else here ... OSS automatically starts
- // when fed samples.
-
- MUTEX_UNLOCK( &stream_.mutex );
-
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- pthread_cond_signal( &handle->runnable );
-}
-
-void RtApiOss :: stopStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiOss::stopStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- // The state might change while waiting on a mutex.
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
-
- int result = 0;
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- // Flush the output with zeros a few times.
- char *buffer;
- int samples;
- RtAudioFormat format;
-
- if ( stream_.doConvertBuffer[0] ) {
- buffer = stream_.deviceBuffer;
- samples = stream_.bufferSize * stream_.nDeviceChannels[0];
- format = stream_.deviceFormat[0];
- }
- else {
- buffer = stream_.userBuffer[0];
- samples = stream_.bufferSize * stream_.nUserChannels[0];
- format = stream_.userFormat;
- }
-
- memset( buffer, 0, samples * formatBytes(format) );
- for ( unsigned int i=0; i<stream_.nBuffers+1; i++ ) {
- result = write( handle->id[0], buffer, samples * formatBytes(format) );
- if ( result == -1 ) {
- errorText_ = "RtApiOss::stopStream: audio write error.";
- error( RtAudioError::WARNING );
- }
- }
-
- result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::stopStream: system error stopping callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- handle->triggered = false;
- }
-
- if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) {
- result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::stopStream: system error stopping input callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- unlock:
- stream_.state = STREAM_STOPPED;
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( result != -1 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiOss :: abortStream()
-{
- verifyStream();
- if ( stream_.state == STREAM_STOPPED ) {
- errorText_ = "RtApiOss::abortStream(): the stream is already stopped!";
- error( RtAudioError::WARNING );
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- // The state might change while waiting on a mutex.
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
-
- int result = 0;
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
- result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::abortStream: system error stopping callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- handle->triggered = false;
- }
-
- if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) {
- result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 );
- if ( result == -1 ) {
- errorStream_ << "RtApiOss::abortStream: system error stopping input callback procedure on device (" << stream_.device[0] << ").";
- errorText_ = errorStream_.str();
- goto unlock;
- }
- }
-
- unlock:
- stream_.state = STREAM_STOPPED;
- MUTEX_UNLOCK( &stream_.mutex );
-
- if ( result != -1 ) return;
- error( RtAudioError::SYSTEM_ERROR );
-}
-
-void RtApiOss :: callbackEvent()
-{
- OssHandle *handle = (OssHandle *) stream_.apiHandle;
- if ( stream_.state == STREAM_STOPPED ) {
- MUTEX_LOCK( &stream_.mutex );
- pthread_cond_wait( &handle->runnable, &stream_.mutex );
- if ( stream_.state != STREAM_RUNNING ) {
- MUTEX_UNLOCK( &stream_.mutex );
- return;
- }
- MUTEX_UNLOCK( &stream_.mutex );
- }
-
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApiOss::callbackEvent(): the stream is closed ... this shouldn't happen!";
- error( RtAudioError::WARNING );
- return;
- }
-
- // Invoke user callback to get fresh output data.
- int doStopStream = 0;
- RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback;
- double streamTime = getStreamTime();
- RtAudioStreamStatus status = 0;
- if ( stream_.mode != INPUT && handle->xrun[0] == true ) {
- status |= RTAUDIO_OUTPUT_UNDERFLOW;
- handle->xrun[0] = false;
- }
- if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) {
- status |= RTAUDIO_INPUT_OVERFLOW;
- handle->xrun[1] = false;
- }
- doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1],
- stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData );
- if ( doStopStream == 2 ) {
- this->abortStream();
- return;
- }
-
- MUTEX_LOCK( &stream_.mutex );
-
- // The state might change while waiting on a mutex.
- if ( stream_.state == STREAM_STOPPED ) goto unlock;
-
- int result;
- char *buffer;
- int samples;
- RtAudioFormat format;
-
- if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) {
-
- // Setup parameters and do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[0] ) {
- buffer = stream_.deviceBuffer;
- convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] );
- samples = stream_.bufferSize * stream_.nDeviceChannels[0];
- format = stream_.deviceFormat[0];
- }
- else {
- buffer = stream_.userBuffer[0];
- samples = stream_.bufferSize * stream_.nUserChannels[0];
- format = stream_.userFormat;
- }
-
- // Do byte swapping if necessary.
- if ( stream_.doByteSwap[0] )
- byteSwapBuffer( buffer, samples, format );
-
- if ( stream_.mode == DUPLEX && handle->triggered == false ) {
- int trig = 0;
- ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig );
- result = write( handle->id[0], buffer, samples * formatBytes(format) );
- trig = PCM_ENABLE_INPUT|PCM_ENABLE_OUTPUT;
- ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig );
- handle->triggered = true;
- }
- else
- // Write samples to device.
- result = write( handle->id[0], buffer, samples * formatBytes(format) );
-
- if ( result == -1 ) {
- // We'll assume this is an underrun, though there isn't a
- // specific means for determining that.
- handle->xrun[0] = true;
- errorText_ = "RtApiOss::callbackEvent: audio write error.";
- error( RtAudioError::WARNING );
- // Continue on to input section.
- }
- }
-
- if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) {
-
- // Setup parameters.
- if ( stream_.doConvertBuffer[1] ) {
- buffer = stream_.deviceBuffer;
- samples = stream_.bufferSize * stream_.nDeviceChannels[1];
- format = stream_.deviceFormat[1];
- }
- else {
- buffer = stream_.userBuffer[1];
- samples = stream_.bufferSize * stream_.nUserChannels[1];
- format = stream_.userFormat;
- }
-
- // Read samples from device.
- result = read( handle->id[1], buffer, samples * formatBytes(format) );
-
- if ( result == -1 ) {
- // We'll assume this is an overrun, though there isn't a
- // specific means for determining that.
- handle->xrun[1] = true;
- errorText_ = "RtApiOss::callbackEvent: audio read error.";
- error( RtAudioError::WARNING );
- goto unlock;
- }
-
- // Do byte swapping if necessary.
- if ( stream_.doByteSwap[1] )
- byteSwapBuffer( buffer, samples, format );
-
- // Do buffer conversion if necessary.
- if ( stream_.doConvertBuffer[1] )
- convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] );
- }
-
- unlock:
- MUTEX_UNLOCK( &stream_.mutex );
-
- RtApi::tickStreamTime();
- if ( doStopStream == 1 ) this->stopStream();
-}
-
-static void *ossCallbackHandler( void *ptr )
-{
- CallbackInfo *info = (CallbackInfo *) ptr;
- RtApiOss *object = (RtApiOss *) info->object;
- bool *isRunning = &info->isRunning;
-
- while ( *isRunning == true ) {
- pthread_testcancel();
- object->callbackEvent();
- }
-
- pthread_exit( NULL );
-}
-
-//******************** End of __LINUX_OSS__ *********************//
-#endif
-
-
-// *************************************************** //
-//
-// Protected common (OS-independent) RtAudio methods.
-//
-// *************************************************** //
-
-// This method can be modified to control the behavior of error
-// message printing.
-void RtApi :: error( RtAudioError::Type type )
-{
- errorStream_.str(""); // clear the ostringstream
-
- RtAudioErrorCallback errorCallback = (RtAudioErrorCallback) stream_.callbackInfo.errorCallback;
- if ( errorCallback ) {
- // abortStream() can generate new error messages. Ignore them. Just keep original one.
-
- if ( firstErrorOccurred_ )
- return;
-
- firstErrorOccurred_ = true;
- const std::string errorMessage = errorText_;
-
- if ( type != RtAudioError::WARNING && stream_.state != STREAM_STOPPED) {
- stream_.callbackInfo.isRunning = false; // exit from the thread
- abortStream();
- }
-
- errorCallback( type, errorMessage );
- firstErrorOccurred_ = false;
- return;
- }
-
- if ( type == RtAudioError::WARNING && showWarnings_ == true )
- std::cerr << '\n' << errorText_ << "\n\n";
- else if ( type != RtAudioError::WARNING )
- throw( RtAudioError( errorText_, type ) );
-}
-
-void RtApi :: verifyStream()
-{
- if ( stream_.state == STREAM_CLOSED ) {
- errorText_ = "RtApi:: a stream is not open!";
- error( RtAudioError::INVALID_USE );
- }
-}
-
-void RtApi :: clearStreamInfo()
-{
- stream_.mode = UNINITIALIZED;
- stream_.state = STREAM_CLOSED;
- stream_.sampleRate = 0;
- stream_.bufferSize = 0;
- stream_.nBuffers = 0;
- stream_.userFormat = 0;
- stream_.userInterleaved = true;
- stream_.streamTime = 0.0;
- stream_.apiHandle = 0;
- stream_.deviceBuffer = 0;
- stream_.callbackInfo.callback = 0;
- stream_.callbackInfo.userData = 0;
- stream_.callbackInfo.isRunning = false;
- stream_.callbackInfo.errorCallback = 0;
- for ( int i=0; i<2; i++ ) {
- stream_.device[i] = 11111;
- stream_.doConvertBuffer[i] = false;
- stream_.deviceInterleaved[i] = true;
- stream_.doByteSwap[i] = false;
- stream_.nUserChannels[i] = 0;
- stream_.nDeviceChannels[i] = 0;
- stream_.channelOffset[i] = 0;
- stream_.deviceFormat[i] = 0;
- stream_.latency[i] = 0;
- stream_.userBuffer[i] = 0;
- stream_.convertInfo[i].channels = 0;
- stream_.convertInfo[i].inJump = 0;
- stream_.convertInfo[i].outJump = 0;
- stream_.convertInfo[i].inFormat = 0;
- stream_.convertInfo[i].outFormat = 0;
- stream_.convertInfo[i].inOffset.clear();
- stream_.convertInfo[i].outOffset.clear();
- }
-}
-
-unsigned int RtApi :: formatBytes( RtAudioFormat format )
-{
- if ( format == RTAUDIO_SINT16 )
- return 2;
- else if ( format == RTAUDIO_SINT32 || format == RTAUDIO_FLOAT32 )
- return 4;
- else if ( format == RTAUDIO_FLOAT64 )
- return 8;
- else if ( format == RTAUDIO_SINT24 )
- return 3;
- else if ( format == RTAUDIO_SINT8 )
- return 1;
-
- errorText_ = "RtApi::formatBytes: undefined format.";
- error( RtAudioError::WARNING );
-
- return 0;
-}
-
-void RtApi :: setConvertInfo( StreamMode mode, unsigned int firstChannel )
-{
- if ( mode == INPUT ) { // convert device to user buffer
- stream_.convertInfo[mode].inJump = stream_.nDeviceChannels[1];
- stream_.convertInfo[mode].outJump = stream_.nUserChannels[1];
- stream_.convertInfo[mode].inFormat = stream_.deviceFormat[1];
- stream_.convertInfo[mode].outFormat = stream_.userFormat;
- }
- else { // convert user to device buffer
- stream_.convertInfo[mode].inJump = stream_.nUserChannels[0];
- stream_.convertInfo[mode].outJump = stream_.nDeviceChannels[0];
- stream_.convertInfo[mode].inFormat = stream_.userFormat;
- stream_.convertInfo[mode].outFormat = stream_.deviceFormat[0];
- }
-
- if ( stream_.convertInfo[mode].inJump < stream_.convertInfo[mode].outJump )
- stream_.convertInfo[mode].channels = stream_.convertInfo[mode].inJump;
- else
- stream_.convertInfo[mode].channels = stream_.convertInfo[mode].outJump;
-
- // Set up the interleave/deinterleave offsets.
- if ( stream_.deviceInterleaved[mode] != stream_.userInterleaved ) {
- if ( ( mode == OUTPUT && stream_.deviceInterleaved[mode] ) ||
- ( mode == INPUT && stream_.userInterleaved ) ) {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
- stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize );
- stream_.convertInfo[mode].outOffset.push_back( k );
- stream_.convertInfo[mode].inJump = 1;
- }
- }
- else {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
- stream_.convertInfo[mode].inOffset.push_back( k );
- stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize );
- stream_.convertInfo[mode].outJump = 1;
- }
- }
- }
- else { // no (de)interleaving
- if ( stream_.userInterleaved ) {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
- stream_.convertInfo[mode].inOffset.push_back( k );
- stream_.convertInfo[mode].outOffset.push_back( k );
- }
- }
- else {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) {
- stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize );
- stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize );
- stream_.convertInfo[mode].inJump = 1;
- stream_.convertInfo[mode].outJump = 1;
- }
- }
- }
-
- // Add channel offset.
- if ( firstChannel > 0 ) {
- if ( stream_.deviceInterleaved[mode] ) {
- if ( mode == OUTPUT ) {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
- stream_.convertInfo[mode].outOffset[k] += firstChannel;
- }
- else {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
- stream_.convertInfo[mode].inOffset[k] += firstChannel;
- }
- }
- else {
- if ( mode == OUTPUT ) {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
- stream_.convertInfo[mode].outOffset[k] += ( firstChannel * stream_.bufferSize );
- }
- else {
- for ( int k=0; k<stream_.convertInfo[mode].channels; k++ )
- stream_.convertInfo[mode].inOffset[k] += ( firstChannel * stream_.bufferSize );
- }
- }
- }
-}
-
-void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info )
-{
- // This function does format conversion, input/output channel compensation, and
- // data interleaving/deinterleaving. 24-bit integers are assumed to occupy
- // the lower three bytes of a 32-bit integer.
-
- // Clear our device buffer when in/out duplex device channels are different
- if ( outBuffer == stream_.deviceBuffer && stream_.mode == DUPLEX &&
- ( stream_.nDeviceChannels[0] < stream_.nDeviceChannels[1] ) )
- memset( outBuffer, 0, stream_.bufferSize * info.outJump * formatBytes( info.outFormat ) );
-
- int j;
- if (info.outFormat == RTAUDIO_FLOAT64) {
- Float64 scale;
- Float64 *out = (Float64 *)outBuffer;
-
- if (info.inFormat == RTAUDIO_SINT8) {
- signed char *in = (signed char *)inBuffer;
- scale = 1.0 / 127.5;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT16) {
- Int16 *in = (Int16 *)inBuffer;
- scale = 1.0 / 32767.5;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- Int24 *in = (Int24 *)inBuffer;
- scale = 1.0 / 8388607.5;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float64) (in[info.inOffset[j]].asInt());
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- Int32 *in = (Int32 *)inBuffer;
- scale = 1.0 / 2147483647.5;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float64) in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- // Channel compensation and/or (de)interleaving only.
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
- else if (info.outFormat == RTAUDIO_FLOAT32) {
- Float32 scale;
- Float32 *out = (Float32 *)outBuffer;
-
- if (info.inFormat == RTAUDIO_SINT8) {
- signed char *in = (signed char *)inBuffer;
- scale = (Float32) ( 1.0 / 127.5 );
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT16) {
- Int16 *in = (Int16 *)inBuffer;
- scale = (Float32) ( 1.0 / 32767.5 );
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- Int24 *in = (Int24 *)inBuffer;
- scale = (Float32) ( 1.0 / 8388607.5 );
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float32) (in[info.inOffset[j]].asInt());
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- Int32 *in = (Int32 *)inBuffer;
- scale = (Float32) ( 1.0 / 2147483647.5 );
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
- out[info.outOffset[j]] += 0.5;
- out[info.outOffset[j]] *= scale;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- // Channel compensation and/or (de)interleaving only.
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Float32) in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
- else if (info.outFormat == RTAUDIO_SINT32) {
- Int32 *out = (Int32 *)outBuffer;
- if (info.inFormat == RTAUDIO_SINT8) {
- signed char *in = (signed char *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) in[info.inOffset[j]];
- out[info.outOffset[j]] <<= 24;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT16) {
- Int16 *in = (Int16 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) in[info.inOffset[j]];
- out[info.outOffset[j]] <<= 16;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- Int24 *in = (Int24 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) in[info.inOffset[j]].asInt();
- out[info.outOffset[j]] <<= 8;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- // Channel compensation and/or (de)interleaving only.
- Int32 *in = (Int32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
- else if (info.outFormat == RTAUDIO_SINT24) {
- Int24 *out = (Int24 *)outBuffer;
- if (info.inFormat == RTAUDIO_SINT8) {
- signed char *in = (signed char *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 16);
- //out[info.outOffset[j]] <<= 16;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT16) {
- Int16 *in = (Int16 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 8);
- //out[info.outOffset[j]] <<= 8;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- // Channel compensation and/or (de)interleaving only.
- Int24 *in = (Int24 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- Int32 *in = (Int32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] >> 8);
- //out[info.outOffset[j]] >>= 8;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
- else if (info.outFormat == RTAUDIO_SINT16) {
- Int16 *out = (Int16 *)outBuffer;
- if (info.inFormat == RTAUDIO_SINT8) {
- signed char *in = (signed char *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int16) in[info.inOffset[j]];
- out[info.outOffset[j]] <<= 8;
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT16) {
- // Channel compensation and/or (de)interleaving only.
- Int16 *in = (Int16 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- Int24 *in = (Int24 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]].asInt() >> 8);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- Int32 *in = (Int32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int16) ((in[info.inOffset[j]] >> 16) & 0x0000ffff);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
- else if (info.outFormat == RTAUDIO_SINT8) {
- signed char *out = (signed char *)outBuffer;
- if (info.inFormat == RTAUDIO_SINT8) {
- // Channel compensation and/or (de)interleaving only.
- signed char *in = (signed char *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = in[info.inOffset[j]];
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- if (info.inFormat == RTAUDIO_SINT16) {
- Int16 *in = (Int16 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 8) & 0x00ff);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT24) {
- Int24 *in = (Int24 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]].asInt() >> 16);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_SINT32) {
- Int32 *in = (Int32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 24) & 0x000000ff);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT32) {
- Float32 *in = (Float32 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- else if (info.inFormat == RTAUDIO_FLOAT64) {
- Float64 *in = (Float64 *)inBuffer;
- for (unsigned int i=0; i<stream_.bufferSize; i++) {
- for (j=0; j<info.channels; j++) {
- out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5);
- }
- in += info.inJump;
- out += info.outJump;
- }
- }
- }
-}
-
-//static inline uint16_t bswap_16(uint16_t x) { return (x>>8) | (x<<8); }
-//static inline uint32_t bswap_32(uint32_t x) { return (bswap_16(x&0xffff)<<16) | (bswap_16(x>>16)); }
-//static inline uint64_t bswap_64(uint64_t x) { return (((unsigned long long)bswap_32(x&0xffffffffull))<<32) | (bswap_32(x>>32)); }
-
-void RtApi :: byteSwapBuffer( char *buffer, unsigned int samples, RtAudioFormat format )
-{
- register char val;
- register char *ptr;
-
- ptr = buffer;
- if ( format == RTAUDIO_SINT16 ) {
- for ( unsigned int i=0; i<samples; i++ ) {
- // Swap 1st and 2nd bytes.
- val = *(ptr);
- *(ptr) = *(ptr+1);
- *(ptr+1) = val;
-
- // Increment 2 bytes.
- ptr += 2;
- }
- }
- else if ( format == RTAUDIO_SINT32 ||
- format == RTAUDIO_FLOAT32 ) {
- for ( unsigned int i=0; i<samples; i++ ) {
- // Swap 1st and 4th bytes.
- val = *(ptr);
- *(ptr) = *(ptr+3);
- *(ptr+3) = val;
-
- // Swap 2nd and 3rd bytes.
- ptr += 1;
- val = *(ptr);
- *(ptr) = *(ptr+1);
- *(ptr+1) = val;
-
- // Increment 3 more bytes.
- ptr += 3;
- }
- }
- else if ( format == RTAUDIO_SINT24 ) {
- for ( unsigned int i=0; i<samples; i++ ) {
- // Swap 1st and 3rd bytes.
- val = *(ptr);
- *(ptr) = *(ptr+2);
- *(ptr+2) = val;
-
- // Increment 2 more bytes.
- ptr += 2;
- }
- }
- else if ( format == RTAUDIO_FLOAT64 ) {
- for ( unsigned int i=0; i<samples; i++ ) {
- // Swap 1st and 8th bytes
- val = *(ptr);
- *(ptr) = *(ptr+7);
- *(ptr+7) = val;
-
- // Swap 2nd and 7th bytes
- ptr += 1;
- val = *(ptr);
- *(ptr) = *(ptr+5);
- *(ptr+5) = val;
-
- // Swap 3rd and 6th bytes
- ptr += 1;
- val = *(ptr);
- *(ptr) = *(ptr+3);
- *(ptr+3) = val;
-
- // Swap 4th and 5th bytes
- ptr += 1;
- val = *(ptr);
- *(ptr) = *(ptr+1);
- *(ptr+1) = val;
-
- // Increment 5 more bytes.
- ptr += 5;
- }
- }
-}
-
- // Indentation settings for Vim and Emacs
- //
- // Local Variables:
- // c-basic-offset: 2
- // indent-tabs-mode: nil
- // End:
- //
- // vim: et sts=2 sw=2
-
-#endif
+#ifdef RTAUDIO_ENABLED +/************************************************************************/ +/*! \class RtAudio + \brief Realtime audio i/o C++ classes. + + RtAudio provides a common API (Application Programming Interface) + for realtime audio input/output across Linux (native ALSA, Jack, + and OSS), Macintosh OS X (CoreAudio and Jack), and Windows + (DirectSound, ASIO and WASAPI) operating systems. + + RtAudio WWW site: http://www.music.mcgill.ca/~gary/rtaudio/ + + RtAudio: realtime audio i/o C++ classes + Copyright (c) 2001-2014 Gary P. Scavone + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + Any person wishing to distribute modifications to the Software is + asked to send the modifications to the original developer so that + they can be incorporated into the canonical version. This is, + however, not a binding provision of this license. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ +/************************************************************************/ + +// RtAudio: Version 4.1.1 + +#include "RtAudio.h" +#include <iostream> +#include <cstdlib> +#include <cstring> +#include <climits> +#include <algorithm> + +// Static variable definitions. +const unsigned int RtApi::MAX_SAMPLE_RATES = 14; +const unsigned int RtApi::SAMPLE_RATES[] = { + 4000, 5512, 8000, 9600, 11025, 16000, 22050, + 32000, 44100, 48000, 88200, 96000, 176400, 192000 +}; + +#if defined(__WINDOWS_DS__) || defined(__WINDOWS_ASIO__) || defined(__WINDOWS_WASAPI__) +#ifdef WINRT_ENABLED + #define MUTEX_INITIALIZE(A) InitializeCriticalSectionEx(A, 0, 0) +#else + #define MUTEX_INITIALIZE(A) InitializeCriticalSection(A) +#endif + #define MUTEX_DESTROY(A) DeleteCriticalSection(A) + #define MUTEX_LOCK(A) EnterCriticalSection(A) + #define MUTEX_UNLOCK(A) LeaveCriticalSection(A) + + #include "tchar.h" + + static std::string convertCharPointerToStdString(const char *text) + { + return std::string(text); + } + + static std::string convertCharPointerToStdString(const wchar_t *text) + { + int length = WideCharToMultiByte(CP_UTF8, 0, text, -1, NULL, 0, NULL, NULL); + std::string s( length-1, '\0' ); + WideCharToMultiByte(CP_UTF8, 0, text, -1, &s[0], length, NULL, NULL); + return s; + } + +#elif defined(__LINUX_ALSA__) || defined(__LINUX_PULSE__) || defined(__UNIX_JACK__) || defined(__LINUX_OSS__) || defined(__MACOSX_CORE__) + // pthread API + #define MUTEX_INITIALIZE(A) pthread_mutex_init(A, NULL) + #define MUTEX_DESTROY(A) pthread_mutex_destroy(A) + #define MUTEX_LOCK(A) pthread_mutex_lock(A) + #define MUTEX_UNLOCK(A) pthread_mutex_unlock(A) +#else + #define MUTEX_INITIALIZE(A) abs(*A) // dummy definitions + #define MUTEX_DESTROY(A) abs(*A) // dummy definitions +#endif + +// *************************************************** // +// +// RtAudio definitions. +// +// *************************************************** // + +std::string RtAudio :: getVersion( void ) throw() +{ + return RTAUDIO_VERSION; +} + +void RtAudio :: getCompiledApi( std::vector<RtAudio::Api> &apis ) throw() +{ + apis.clear(); + + // The order here will control the order of RtAudio's API search in + // the constructor. +#if defined(__UNIX_JACK__) + apis.push_back( UNIX_JACK ); +#endif +#if defined(__LINUX_ALSA__) + apis.push_back( LINUX_ALSA ); +#endif +#if defined(__LINUX_PULSE__) + apis.push_back( LINUX_PULSE ); +#endif +#if defined(__LINUX_OSS__) + apis.push_back( LINUX_OSS ); +#endif +#if defined(__WINDOWS_ASIO__) + apis.push_back( WINDOWS_ASIO ); +#endif +#if defined(__WINDOWS_WASAPI__) + apis.push_back( WINDOWS_WASAPI ); +#endif +#if defined(__WINDOWS_DS__) + apis.push_back( WINDOWS_DS ); +#endif +#if defined(__MACOSX_CORE__) + apis.push_back( MACOSX_CORE ); +#endif +#if defined(__RTAUDIO_DUMMY__) + apis.push_back( RTAUDIO_DUMMY ); +#endif +} + +void RtAudio :: openRtApi( RtAudio::Api api ) +{ + if ( rtapi_ ) + delete rtapi_; + rtapi_ = 0; + +#if defined(__UNIX_JACK__) + if ( api == UNIX_JACK ) + rtapi_ = new RtApiJack(); +#endif +#if defined(__LINUX_ALSA__) + if ( api == LINUX_ALSA ) + rtapi_ = new RtApiAlsa(); +#endif +#if defined(__LINUX_PULSE__) + if ( api == LINUX_PULSE ) + rtapi_ = new RtApiPulse(); +#endif +#if defined(__LINUX_OSS__) + if ( api == LINUX_OSS ) + rtapi_ = new RtApiOss(); +#endif +#if defined(__WINDOWS_ASIO__) + if ( api == WINDOWS_ASIO ) + rtapi_ = new RtApiAsio(); +#endif +#if defined(__WINDOWS_WASAPI__) + if ( api == WINDOWS_WASAPI ) + rtapi_ = new RtApiWasapi(); +#endif +#if defined(__WINDOWS_DS__) + if ( api == WINDOWS_DS ) + rtapi_ = new RtApiDs(); +#endif +#if defined(__MACOSX_CORE__) + if ( api == MACOSX_CORE ) + rtapi_ = new RtApiCore(); +#endif +#if defined(__RTAUDIO_DUMMY__) + if ( api == RTAUDIO_DUMMY ) + rtapi_ = new RtApiDummy(); +#endif +} + +RtAudio :: RtAudio( RtAudio::Api api ) +{ + rtapi_ = 0; + + if ( api != UNSPECIFIED ) { + // Attempt to open the specified API. + openRtApi( api ); + if ( rtapi_ ) return; + + // No compiled support for specified API value. Issue a debug + // warning and continue as if no API was specified. + std::cerr << "\nRtAudio: no compiled support for specified API argument!\n" << std::endl; + } + + // Iterate through the compiled APIs and return as soon as we find + // one with at least one device or we reach the end of the list. + std::vector< RtAudio::Api > apis; + getCompiledApi( apis ); + for ( unsigned int i=0; i<apis.size(); i++ ) { + openRtApi( apis[i] ); + if ( rtapi_ && rtapi_->getDeviceCount() ) break; + } + + if ( rtapi_ ) return; + + // It should not be possible to get here because the preprocessor + // definition __RTAUDIO_DUMMY__ is automatically defined if no + // API-specific definitions are passed to the compiler. But just in + // case something weird happens, we'll thow an error. + std::string errorText = "\nRtAudio: no compiled API support found ... critical error!!\n\n"; + throw( RtAudioError( errorText, RtAudioError::UNSPECIFIED ) ); +} + +RtAudio :: ~RtAudio() throw() +{ + if ( rtapi_ ) + delete rtapi_; +} + +void RtAudio :: openStream( RtAudio::StreamParameters *outputParameters, + RtAudio::StreamParameters *inputParameters, + RtAudioFormat format, unsigned int sampleRate, + unsigned int *bufferFrames, + RtAudioCallback callback, void *userData, + RtAudio::StreamOptions *options, + RtAudioErrorCallback errorCallback ) +{ + return rtapi_->openStream( outputParameters, inputParameters, format, + sampleRate, bufferFrames, callback, + userData, options, errorCallback ); +} + +// *************************************************** // +// +// Public RtApi definitions (see end of file for +// private or protected utility functions). +// +// *************************************************** // + +RtApi :: RtApi() +{ + stream_.state = STREAM_CLOSED; + stream_.mode = UNINITIALIZED; + stream_.apiHandle = 0; + stream_.userBuffer[0] = 0; + stream_.userBuffer[1] = 0; + MUTEX_INITIALIZE( &stream_.mutex ); + showWarnings_ = true; + firstErrorOccurred_ = false; +} + +RtApi :: ~RtApi() +{ + MUTEX_DESTROY( &stream_.mutex ); +} + +void RtApi :: openStream( RtAudio::StreamParameters *oParams, + RtAudio::StreamParameters *iParams, + RtAudioFormat format, unsigned int sampleRate, + unsigned int *bufferFrames, + RtAudioCallback callback, void *userData, + RtAudio::StreamOptions *options, + RtAudioErrorCallback errorCallback ) +{ + if ( stream_.state != STREAM_CLOSED ) { + errorText_ = "RtApi::openStream: a stream is already open!"; + error( RtAudioError::INVALID_USE ); + return; + } + + // Clear stream information potentially left from a previously open stream. + clearStreamInfo(); + + if ( oParams && oParams->nChannels < 1 ) { + errorText_ = "RtApi::openStream: a non-NULL output StreamParameters structure cannot have an nChannels value less than one."; + error( RtAudioError::INVALID_USE ); + return; + } + + if ( iParams && iParams->nChannels < 1 ) { + errorText_ = "RtApi::openStream: a non-NULL input StreamParameters structure cannot have an nChannels value less than one."; + error( RtAudioError::INVALID_USE ); + return; + } + + if ( oParams == NULL && iParams == NULL ) { + errorText_ = "RtApi::openStream: input and output StreamParameters structures are both NULL!"; + error( RtAudioError::INVALID_USE ); + return; + } + + if ( formatBytes(format) == 0 ) { + errorText_ = "RtApi::openStream: 'format' parameter value is undefined."; + error( RtAudioError::INVALID_USE ); + return; + } + + unsigned int nDevices = getDeviceCount(); + unsigned int oChannels = 0; + if ( oParams ) { + oChannels = oParams->nChannels; + if ( oParams->deviceId >= nDevices ) { + errorText_ = "RtApi::openStream: output device parameter value is invalid."; + error( RtAudioError::INVALID_USE ); + return; + } + } + + unsigned int iChannels = 0; + if ( iParams ) { + iChannels = iParams->nChannels; + if ( iParams->deviceId >= nDevices ) { + errorText_ = "RtApi::openStream: input device parameter value is invalid."; + error( RtAudioError::INVALID_USE ); + return; + } + } + + bool result; + + if ( oChannels > 0 ) { + + result = probeDeviceOpen( oParams->deviceId, OUTPUT, oChannels, oParams->firstChannel, + sampleRate, format, bufferFrames, options ); + if ( result == false ) { + error( RtAudioError::SYSTEM_ERROR ); + return; + } + } + + if ( iChannels > 0 ) { + + result = probeDeviceOpen( iParams->deviceId, INPUT, iChannels, iParams->firstChannel, + sampleRate, format, bufferFrames, options ); + if ( result == false ) { + if ( oChannels > 0 ) closeStream(); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + } + + stream_.callbackInfo.callback = (void *) callback; + stream_.callbackInfo.userData = userData; + stream_.callbackInfo.errorCallback = (void *) errorCallback; + + if ( options ) options->numberOfBuffers = stream_.nBuffers; + stream_.state = STREAM_STOPPED; +} + +unsigned int RtApi :: getDefaultInputDevice( void ) +{ + // Should be implemented in subclasses if possible. + return 0; +} + +unsigned int RtApi :: getDefaultOutputDevice( void ) +{ + // Should be implemented in subclasses if possible. + return 0; +} + +void RtApi :: closeStream( void ) +{ + // MUST be implemented in subclasses! + return; +} + +bool RtApi :: probeDeviceOpen( unsigned int /*device*/, StreamMode /*mode*/, unsigned int /*channels*/, + unsigned int /*firstChannel*/, unsigned int /*sampleRate*/, + RtAudioFormat /*format*/, unsigned int * /*bufferSize*/, + RtAudio::StreamOptions * /*options*/ ) +{ + // MUST be implemented in subclasses! + return FAILURE; +} + +void RtApi :: tickStreamTime( void ) +{ + // Subclasses that do not provide their own implementation of + // getStreamTime should call this function once per buffer I/O to + // provide basic stream time support. + + stream_.streamTime += ( stream_.bufferSize * 1.0 / stream_.sampleRate ); + +#if defined( HAVE_GETTIMEOFDAY ) + gettimeofday( &stream_.lastTickTimestamp, NULL ); +#endif +} + +long RtApi :: getStreamLatency( void ) +{ + verifyStream(); + + long totalLatency = 0; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) + totalLatency = stream_.latency[0]; + if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) + totalLatency += stream_.latency[1]; + + return totalLatency; +} + +double RtApi :: getStreamTime( void ) +{ + verifyStream(); + +#if defined( HAVE_GETTIMEOFDAY ) + // Return a very accurate estimate of the stream time by + // adding in the elapsed time since the last tick. + struct timeval then; + struct timeval now; + + if ( stream_.state != STREAM_RUNNING || stream_.streamTime == 0.0 ) + return stream_.streamTime; + + gettimeofday( &now, NULL ); + then = stream_.lastTickTimestamp; + return stream_.streamTime + + ((now.tv_sec + 0.000001 * now.tv_usec) - + (then.tv_sec + 0.000001 * then.tv_usec)); +#else + return stream_.streamTime; +#endif +} + +void RtApi :: setStreamTime( double time ) +{ + verifyStream(); + + if ( time >= 0.0 ) + stream_.streamTime = time; +} + +unsigned int RtApi :: getStreamSampleRate( void ) +{ + verifyStream(); + + return stream_.sampleRate; +} + + +// *************************************************** // +// +// OS/API-specific methods. +// +// *************************************************** // + +#if defined(__MACOSX_CORE__) + +// The OS X CoreAudio API is designed to use a separate callback +// procedure for each of its audio devices. A single RtAudio duplex +// stream using two different devices is supported here, though it +// cannot be guaranteed to always behave correctly because we cannot +// synchronize these two callbacks. +// +// A property listener is installed for over/underrun information. +// However, no functionality is currently provided to allow property +// listeners to trigger user handlers because it is unclear what could +// be done if a critical stream parameter (buffer size, sample rate, +// device disconnect) notification arrived. The listeners entail +// quite a bit of extra code and most likely, a user program wouldn't +// be prepared for the result anyway. However, we do provide a flag +// to the client callback function to inform of an over/underrun. + +// A structure to hold various information related to the CoreAudio API +// implementation. +struct CoreHandle { + AudioDeviceID id[2]; // device ids +#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 ) + AudioDeviceIOProcID procId[2]; +#endif + UInt32 iStream[2]; // device stream index (or first if using multiple) + UInt32 nStreams[2]; // number of streams to use + bool xrun[2]; + char *deviceBuffer; + pthread_cond_t condition; + int drainCounter; // Tracks callback counts when draining + bool internalDrain; // Indicates if stop is initiated from callback or not. + + CoreHandle() + :deviceBuffer(0), drainCounter(0), internalDrain(false) { nStreams[0] = 1; nStreams[1] = 1; id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; } +}; + +RtApiCore:: RtApiCore() +{ +#if defined( AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER ) + // This is a largely undocumented but absolutely necessary + // requirement starting with OS-X 10.6. If not called, queries and + // updates to various audio device properties are not handled + // correctly. + CFRunLoopRef theRunLoop = NULL; + AudioObjectPropertyAddress property = { kAudioHardwarePropertyRunLoop, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster }; + OSStatus result = AudioObjectSetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, sizeof(CFRunLoopRef), &theRunLoop); + if ( result != noErr ) { + errorText_ = "RtApiCore::RtApiCore: error setting run loop property!"; + error( RtAudioError::WARNING ); + } +#endif +} + +RtApiCore :: ~RtApiCore() +{ + // The subclass destructor gets called before the base class + // destructor, so close an existing stream before deallocating + // apiDeviceId memory. + if ( stream_.state != STREAM_CLOSED ) closeStream(); +} + +unsigned int RtApiCore :: getDeviceCount( void ) +{ + // Find out how many audio devices there are, if any. + UInt32 dataSize; + AudioObjectPropertyAddress propertyAddress = { kAudioHardwarePropertyDevices, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster }; + OSStatus result = AudioObjectGetPropertyDataSize( kAudioObjectSystemObject, &propertyAddress, 0, NULL, &dataSize ); + if ( result != noErr ) { + errorText_ = "RtApiCore::getDeviceCount: OS-X error getting device info!"; + error( RtAudioError::WARNING ); + return 0; + } + + return dataSize / sizeof( AudioDeviceID ); +} + +unsigned int RtApiCore :: getDefaultInputDevice( void ) +{ + unsigned int nDevices = getDeviceCount(); + if ( nDevices <= 1 ) return 0; + + AudioDeviceID id; + UInt32 dataSize = sizeof( AudioDeviceID ); + AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultInputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster }; + OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id ); + if ( result != noErr ) { + errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device."; + error( RtAudioError::WARNING ); + return 0; + } + + dataSize *= nDevices; + AudioDeviceID deviceList[ nDevices ]; + property.mSelector = kAudioHardwarePropertyDevices; + result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList ); + if ( result != noErr ) { + errorText_ = "RtApiCore::getDefaultInputDevice: OS-X system error getting device IDs."; + error( RtAudioError::WARNING ); + return 0; + } + + for ( unsigned int i=0; i<nDevices; i++ ) + if ( id == deviceList[i] ) return i; + + errorText_ = "RtApiCore::getDefaultInputDevice: No default device found!"; + error( RtAudioError::WARNING ); + return 0; +} + +unsigned int RtApiCore :: getDefaultOutputDevice( void ) +{ + unsigned int nDevices = getDeviceCount(); + if ( nDevices <= 1 ) return 0; + + AudioDeviceID id; + UInt32 dataSize = sizeof( AudioDeviceID ); + AudioObjectPropertyAddress property = { kAudioHardwarePropertyDefaultOutputDevice, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster }; + OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, &id ); + if ( result != noErr ) { + errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device."; + error( RtAudioError::WARNING ); + return 0; + } + + dataSize = sizeof( AudioDeviceID ) * nDevices; + AudioDeviceID deviceList[ nDevices ]; + property.mSelector = kAudioHardwarePropertyDevices; + result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, 0, NULL, &dataSize, (void *) &deviceList ); + if ( result != noErr ) { + errorText_ = "RtApiCore::getDefaultOutputDevice: OS-X system error getting device IDs."; + error( RtAudioError::WARNING ); + return 0; + } + + for ( unsigned int i=0; i<nDevices; i++ ) + if ( id == deviceList[i] ) return i; + + errorText_ = "RtApiCore::getDefaultOutputDevice: No default device found!"; + error( RtAudioError::WARNING ); + return 0; +} + +RtAudio::DeviceInfo RtApiCore :: getDeviceInfo( unsigned int device ) +{ + RtAudio::DeviceInfo info; + info.probed = false; + + // Get device ID + unsigned int nDevices = getDeviceCount(); + if ( nDevices == 0 ) { + errorText_ = "RtApiCore::getDeviceInfo: no devices found!"; + error( RtAudioError::INVALID_USE ); + return info; + } + + if ( device >= nDevices ) { + errorText_ = "RtApiCore::getDeviceInfo: device ID is invalid!"; + error( RtAudioError::INVALID_USE ); + return info; + } + + AudioDeviceID deviceList[ nDevices ]; + UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices; + AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster }; + OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, + 0, NULL, &dataSize, (void *) &deviceList ); + if ( result != noErr ) { + errorText_ = "RtApiCore::getDeviceInfo: OS-X system error getting device IDs."; + error( RtAudioError::WARNING ); + return info; + } + + AudioDeviceID id = deviceList[ device ]; + + // Get the device name. + info.name.erase(); + CFStringRef cfname; + dataSize = sizeof( CFStringRef ); + property.mSelector = kAudioObjectPropertyManufacturer; + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device manufacturer."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + //const char *mname = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() ); + int length = CFStringGetLength(cfname); + char *mname = (char *)malloc(length * 3 + 1); +#if defined( UNICODE ) || defined( _UNICODE ) + CFStringGetCString(cfname, mname, length * 3 + 1, kCFStringEncodingUTF8); +#else + CFStringGetCString(cfname, mname, length * 3 + 1, CFStringGetSystemEncoding()); +#endif + info.name.append( (const char *)mname, strlen(mname) ); + info.name.append( ": " ); + CFRelease( cfname ); + free(mname); + + property.mSelector = kAudioObjectPropertyName; + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &cfname ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceInfo: system error (" << getErrorCode( result ) << ") getting device name."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + //const char *name = CFStringGetCStringPtr( cfname, CFStringGetSystemEncoding() ); + length = CFStringGetLength(cfname); + char *name = (char *)malloc(length * 3 + 1); +#if defined( UNICODE ) || defined( _UNICODE ) + CFStringGetCString(cfname, name, length * 3 + 1, kCFStringEncodingUTF8); +#else + CFStringGetCString(cfname, name, length * 3 + 1, CFStringGetSystemEncoding()); +#endif + info.name.append( (const char *)name, strlen(name) ); + CFRelease( cfname ); + free(name); + + // Get the output stream "configuration". + AudioBufferList *bufferList = nil; + property.mSelector = kAudioDevicePropertyStreamConfiguration; + property.mScope = kAudioDevicePropertyScopeOutput; + // property.mElement = kAudioObjectPropertyElementWildcard; + dataSize = 0; + result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize ); + if ( result != noErr || dataSize == 0 ) { + errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration info for device (" << device << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Allocate the AudioBufferList. + bufferList = (AudioBufferList *) malloc( dataSize ); + if ( bufferList == NULL ) { + errorText_ = "RtApiCore::getDeviceInfo: memory error allocating output AudioBufferList."; + error( RtAudioError::WARNING ); + return info; + } + + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList ); + if ( result != noErr || dataSize == 0 ) { + free( bufferList ); + errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting output stream configuration for device (" << device << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Get output channel information. + unsigned int i, nStreams = bufferList->mNumberBuffers; + for ( i=0; i<nStreams; i++ ) + info.outputChannels += bufferList->mBuffers[i].mNumberChannels; + free( bufferList ); + + // Get the input stream "configuration". + property.mScope = kAudioDevicePropertyScopeInput; + result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize ); + if ( result != noErr || dataSize == 0 ) { + errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration info for device (" << device << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Allocate the AudioBufferList. + bufferList = (AudioBufferList *) malloc( dataSize ); + if ( bufferList == NULL ) { + errorText_ = "RtApiCore::getDeviceInfo: memory error allocating input AudioBufferList."; + error( RtAudioError::WARNING ); + return info; + } + + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList ); + if (result != noErr || dataSize == 0) { + free( bufferList ); + errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting input stream configuration for device (" << device << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Get input channel information. + nStreams = bufferList->mNumberBuffers; + for ( i=0; i<nStreams; i++ ) + info.inputChannels += bufferList->mBuffers[i].mNumberChannels; + free( bufferList ); + + // If device opens for both playback and capture, we determine the channels. + if ( info.outputChannels > 0 && info.inputChannels > 0 ) + info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels; + + // Probe the device sample rates. + bool isInput = false; + if ( info.outputChannels == 0 ) isInput = true; + + // Determine the supported sample rates. + property.mSelector = kAudioDevicePropertyAvailableNominalSampleRates; + if ( isInput == false ) property.mScope = kAudioDevicePropertyScopeOutput; + result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize ); + if ( result != kAudioHardwareNoError || dataSize == 0 ) { + errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rate info."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + UInt32 nRanges = dataSize / sizeof( AudioValueRange ); + AudioValueRange rangeList[ nRanges ]; + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &rangeList ); + if ( result != kAudioHardwareNoError ) { + errorStream_ << "RtApiCore::getDeviceInfo: system error (" << getErrorCode( result ) << ") getting sample rates."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // The sample rate reporting mechanism is a bit of a mystery. It + // seems that it can either return individual rates or a range of + // rates. I assume that if the min / max range values are the same, + // then that represents a single supported rate and if the min / max + // range values are different, the device supports an arbitrary + // range of values (though there might be multiple ranges, so we'll + // use the most conservative range). + Float64 minimumRate = 1.0, maximumRate = 10000000000.0; + bool haveValueRange = false; + info.sampleRates.clear(); + for ( UInt32 i=0; i<nRanges; i++ ) { + if ( rangeList[i].mMinimum == rangeList[i].mMaximum ) { + unsigned int tmpSr = (unsigned int) rangeList[i].mMinimum; + info.sampleRates.push_back( tmpSr ); + + if ( !info.preferredSampleRate || ( tmpSr <= 48000 && tmpSr > info.preferredSampleRate ) ) + info.preferredSampleRate = tmpSr; + + } else { + haveValueRange = true; + if ( rangeList[i].mMinimum > minimumRate ) minimumRate = rangeList[i].mMinimum; + if ( rangeList[i].mMaximum < maximumRate ) maximumRate = rangeList[i].mMaximum; + } + } + + if ( haveValueRange ) { + for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) { + if ( SAMPLE_RATES[k] >= (unsigned int) minimumRate && SAMPLE_RATES[k] <= (unsigned int) maximumRate ) { + info.sampleRates.push_back( SAMPLE_RATES[k] ); + + if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) ) + info.preferredSampleRate = SAMPLE_RATES[k]; + } + } + } + + // Sort and remove any redundant values + std::sort( info.sampleRates.begin(), info.sampleRates.end() ); + info.sampleRates.erase( unique( info.sampleRates.begin(), info.sampleRates.end() ), info.sampleRates.end() ); + + if ( info.sampleRates.size() == 0 ) { + errorStream_ << "RtApiCore::probeDeviceInfo: No supported sample rates found for device (" << device << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // CoreAudio always uses 32-bit floating point data for PCM streams. + // Thus, any other "physical" formats supported by the device are of + // no interest to the client. + info.nativeFormats = RTAUDIO_FLOAT32; + + if ( info.outputChannels > 0 ) + if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true; + if ( info.inputChannels > 0 ) + if ( getDefaultInputDevice() == device ) info.isDefaultInput = true; + + info.probed = true; + return info; +} + +static OSStatus callbackHandler( AudioDeviceID inDevice, + const AudioTimeStamp* /*inNow*/, + const AudioBufferList* inInputData, + const AudioTimeStamp* /*inInputTime*/, + AudioBufferList* outOutputData, + const AudioTimeStamp* /*inOutputTime*/, + void* infoPointer ) +{ + CallbackInfo *info = (CallbackInfo *) infoPointer; + + RtApiCore *object = (RtApiCore *) info->object; + if ( object->callbackEvent( inDevice, inInputData, outOutputData ) == false ) + return kAudioHardwareUnspecifiedError; + else + return kAudioHardwareNoError; +} + +static OSStatus xrunListener( AudioObjectID /*inDevice*/, + UInt32 nAddresses, + const AudioObjectPropertyAddress properties[], + void* handlePointer ) +{ + CoreHandle *handle = (CoreHandle *) handlePointer; + for ( UInt32 i=0; i<nAddresses; i++ ) { + if ( properties[i].mSelector == kAudioDeviceProcessorOverload ) { + if ( properties[i].mScope == kAudioDevicePropertyScopeInput ) + handle->xrun[1] = true; + else + handle->xrun[0] = true; + } + } + + return kAudioHardwareNoError; +} + +static OSStatus rateListener( AudioObjectID inDevice, + UInt32 /*nAddresses*/, + const AudioObjectPropertyAddress /*properties*/[], + void* ratePointer ) +{ + Float64 *rate = (Float64 *) ratePointer; + UInt32 dataSize = sizeof( Float64 ); + AudioObjectPropertyAddress property = { kAudioDevicePropertyNominalSampleRate, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster }; + AudioObjectGetPropertyData( inDevice, &property, 0, NULL, &dataSize, rate ); + return kAudioHardwareNoError; +} + +bool RtApiCore :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels, + unsigned int firstChannel, unsigned int sampleRate, + RtAudioFormat format, unsigned int *bufferSize, + RtAudio::StreamOptions *options ) +{ + // Get device ID + unsigned int nDevices = getDeviceCount(); + if ( nDevices == 0 ) { + // This should not happen because a check is made before this function is called. + errorText_ = "RtApiCore::probeDeviceOpen: no devices found!"; + return FAILURE; + } + + if ( device >= nDevices ) { + // This should not happen because a check is made before this function is called. + errorText_ = "RtApiCore::probeDeviceOpen: device ID is invalid!"; + return FAILURE; + } + + AudioDeviceID deviceList[ nDevices ]; + UInt32 dataSize = sizeof( AudioDeviceID ) * nDevices; + AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster }; + OSStatus result = AudioObjectGetPropertyData( kAudioObjectSystemObject, &property, + 0, NULL, &dataSize, (void *) &deviceList ); + if ( result != noErr ) { + errorText_ = "RtApiCore::probeDeviceOpen: OS-X system error getting device IDs."; + return FAILURE; + } + + AudioDeviceID id = deviceList[ device ]; + + // Setup for stream mode. + bool isInput = false; + if ( mode == INPUT ) { + isInput = true; + property.mScope = kAudioDevicePropertyScopeInput; + } + else + property.mScope = kAudioDevicePropertyScopeOutput; + + // Get the stream "configuration". + AudioBufferList *bufferList = nil; + dataSize = 0; + property.mSelector = kAudioDevicePropertyStreamConfiguration; + result = AudioObjectGetPropertyDataSize( id, &property, 0, NULL, &dataSize ); + if ( result != noErr || dataSize == 0 ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration info for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Allocate the AudioBufferList. + bufferList = (AudioBufferList *) malloc( dataSize ); + if ( bufferList == NULL ) { + errorText_ = "RtApiCore::probeDeviceOpen: memory error allocating AudioBufferList."; + return FAILURE; + } + + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, bufferList ); + if (result != noErr || dataSize == 0) { + free( bufferList ); + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream configuration for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Search for one or more streams that contain the desired number of + // channels. CoreAudio devices can have an arbitrary number of + // streams and each stream can have an arbitrary number of channels. + // For each stream, a single buffer of interleaved samples is + // provided. RtAudio prefers the use of one stream of interleaved + // data or multiple consecutive single-channel streams. However, we + // now support multiple consecutive multi-channel streams of + // interleaved data as well. + UInt32 iStream, offsetCounter = firstChannel; + UInt32 nStreams = bufferList->mNumberBuffers; + bool monoMode = false; + bool foundStream = false; + + // First check that the device supports the requested number of + // channels. + UInt32 deviceChannels = 0; + for ( iStream=0; iStream<nStreams; iStream++ ) + deviceChannels += bufferList->mBuffers[iStream].mNumberChannels; + + if ( deviceChannels < ( channels + firstChannel ) ) { + free( bufferList ); + errorStream_ << "RtApiCore::probeDeviceOpen: the device (" << device << ") does not support the requested channel count."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Look for a single stream meeting our needs. + UInt32 firstStream, streamCount = 1, streamChannels = 0, channelOffset = 0; + for ( iStream=0; iStream<nStreams; iStream++ ) { + streamChannels = bufferList->mBuffers[iStream].mNumberChannels; + if ( streamChannels >= channels + offsetCounter ) { + firstStream = iStream; + channelOffset = offsetCounter; + foundStream = true; + break; + } + if ( streamChannels > offsetCounter ) break; + offsetCounter -= streamChannels; + } + + // If we didn't find a single stream above, then we should be able + // to meet the channel specification with multiple streams. + if ( foundStream == false ) { + monoMode = true; + offsetCounter = firstChannel; + for ( iStream=0; iStream<nStreams; iStream++ ) { + streamChannels = bufferList->mBuffers[iStream].mNumberChannels; + if ( streamChannels > offsetCounter ) break; + offsetCounter -= streamChannels; + } + + firstStream = iStream; + channelOffset = offsetCounter; + Int32 channelCounter = channels + offsetCounter - streamChannels; + + if ( streamChannels > 1 ) monoMode = false; + while ( channelCounter > 0 ) { + streamChannels = bufferList->mBuffers[++iStream].mNumberChannels; + if ( streamChannels > 1 ) monoMode = false; + channelCounter -= streamChannels; + streamCount++; + } + } + + free( bufferList ); + + // Determine the buffer size. + AudioValueRange bufferRange; + dataSize = sizeof( AudioValueRange ); + property.mSelector = kAudioDevicePropertyBufferFrameSizeRange; + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &bufferRange ); + + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting buffer size range for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + if ( bufferRange.mMinimum > *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMinimum; + else if ( bufferRange.mMaximum < *bufferSize ) *bufferSize = (unsigned long) bufferRange.mMaximum; + if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) *bufferSize = (unsigned long) bufferRange.mMinimum; + + // Set the buffer size. For multiple streams, I'm assuming we only + // need to make this setting for the master channel. + UInt32 theSize = (UInt32) *bufferSize; + dataSize = sizeof( UInt32 ); + property.mSelector = kAudioDevicePropertyBufferFrameSize; + result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &theSize ); + + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting the buffer size for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // If attempting to setup a duplex stream, the bufferSize parameter + // MUST be the same in both directions! + *bufferSize = theSize; + if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + stream_.bufferSize = *bufferSize; + stream_.nBuffers = 1; + + // Try to set "hog" mode ... it's not clear to me this is working. + if ( options && options->flags & RTAUDIO_HOG_DEVICE ) { + pid_t hog_pid; + dataSize = sizeof( hog_pid ); + property.mSelector = kAudioDevicePropertyHogMode; + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &hog_pid ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting 'hog' state!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + if ( hog_pid != getpid() ) { + hog_pid = getpid(); + result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &hog_pid ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting 'hog' state!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + } + } + + // Check and if necessary, change the sample rate for the device. + Float64 nominalRate; + dataSize = sizeof( Float64 ); + property.mSelector = kAudioDevicePropertyNominalSampleRate; + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &nominalRate ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting current sample rate."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Only change the sample rate if off by more than 1 Hz. + if ( fabs( nominalRate - (double)sampleRate ) > 1.0 ) { + + // Set a property listener for the sample rate change + Float64 reportedRate = 0.0; + AudioObjectPropertyAddress tmp = { kAudioDevicePropertyNominalSampleRate, kAudioObjectPropertyScopeGlobal, kAudioObjectPropertyElementMaster }; + result = AudioObjectAddPropertyListener( id, &tmp, rateListener, (void *) &reportedRate ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate property listener for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + nominalRate = (Float64) sampleRate; + result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &nominalRate ); + if ( result != noErr ) { + AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate ); + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Now wait until the reported nominal rate is what we just set. + UInt32 microCounter = 0; + while ( reportedRate != nominalRate ) { + microCounter += 5000; + if ( microCounter > 5000000 ) break; + usleep( 5000 ); + } + + // Remove the property listener. + AudioObjectRemovePropertyListener( id, &tmp, rateListener, (void *) &reportedRate ); + + if ( microCounter > 5000000 ) { + errorStream_ << "RtApiCore::probeDeviceOpen: timeout waiting for sample rate update for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + } + + // Now set the stream format for all streams. Also, check the + // physical format of the device and change that if necessary. + AudioStreamBasicDescription description; + dataSize = sizeof( AudioStreamBasicDescription ); + property.mSelector = kAudioStreamPropertyVirtualFormat; + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &description ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream format for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Set the sample rate and data format id. However, only make the + // change if the sample rate is not within 1.0 of the desired + // rate and the format is not linear pcm. + bool updateFormat = false; + if ( fabs( description.mSampleRate - (Float64)sampleRate ) > 1.0 ) { + description.mSampleRate = (Float64) sampleRate; + updateFormat = true; + } + + if ( description.mFormatID != kAudioFormatLinearPCM ) { + description.mFormatID = kAudioFormatLinearPCM; + updateFormat = true; + } + + if ( updateFormat ) { + result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &description ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting sample rate or data format for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + } + + // Now check the physical format. + property.mSelector = kAudioStreamPropertyPhysicalFormat; + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &description ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting stream physical format for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + //std::cout << "Current physical stream format:" << std::endl; + //std::cout << " mBitsPerChan = " << description.mBitsPerChannel << std::endl; + //std::cout << " aligned high = " << (description.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (description.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl; + //std::cout << " bytesPerFrame = " << description.mBytesPerFrame << std::endl; + //std::cout << " sample rate = " << description.mSampleRate << std::endl; + + if ( description.mFormatID != kAudioFormatLinearPCM || description.mBitsPerChannel < 16 ) { + description.mFormatID = kAudioFormatLinearPCM; + //description.mSampleRate = (Float64) sampleRate; + AudioStreamBasicDescription testDescription = description; + UInt32 formatFlags; + + // We'll try higher bit rates first and then work our way down. + std::vector< std::pair<UInt32, UInt32> > physicalFormats; + formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsFloat) & ~kLinearPCMFormatFlagIsSignedInteger; + physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) ); + formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat; + physicalFormats.push_back( std::pair<Float32, UInt32>( 32, formatFlags ) ); + physicalFormats.push_back( std::pair<Float32, UInt32>( 24, formatFlags ) ); // 24-bit packed + formatFlags &= ~( kAudioFormatFlagIsPacked | kAudioFormatFlagIsAlignedHigh ); + physicalFormats.push_back( std::pair<Float32, UInt32>( 24.2, formatFlags ) ); // 24-bit in 4 bytes, aligned low + formatFlags |= kAudioFormatFlagIsAlignedHigh; + physicalFormats.push_back( std::pair<Float32, UInt32>( 24.4, formatFlags ) ); // 24-bit in 4 bytes, aligned high + formatFlags = (description.mFormatFlags | kLinearPCMFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked) & ~kLinearPCMFormatFlagIsFloat; + physicalFormats.push_back( std::pair<Float32, UInt32>( 16, formatFlags ) ); + physicalFormats.push_back( std::pair<Float32, UInt32>( 8, formatFlags ) ); + + bool setPhysicalFormat = false; + for( unsigned int i=0; i<physicalFormats.size(); i++ ) { + testDescription = description; + testDescription.mBitsPerChannel = (UInt32) physicalFormats[i].first; + testDescription.mFormatFlags = physicalFormats[i].second; + if ( (24 == (UInt32)physicalFormats[i].first) && ~( physicalFormats[i].second & kAudioFormatFlagIsPacked ) ) + testDescription.mBytesPerFrame = 4 * testDescription.mChannelsPerFrame; + else + testDescription.mBytesPerFrame = testDescription.mBitsPerChannel/8 * testDescription.mChannelsPerFrame; + testDescription.mBytesPerPacket = testDescription.mBytesPerFrame * testDescription.mFramesPerPacket; + result = AudioObjectSetPropertyData( id, &property, 0, NULL, dataSize, &testDescription ); + if ( result == noErr ) { + setPhysicalFormat = true; + //std::cout << "Updated physical stream format:" << std::endl; + //std::cout << " mBitsPerChan = " << testDescription.mBitsPerChannel << std::endl; + //std::cout << " aligned high = " << (testDescription.mFormatFlags & kAudioFormatFlagIsAlignedHigh) << ", isPacked = " << (testDescription.mFormatFlags & kAudioFormatFlagIsPacked) << std::endl; + //std::cout << " bytesPerFrame = " << testDescription.mBytesPerFrame << std::endl; + //std::cout << " sample rate = " << testDescription.mSampleRate << std::endl; + break; + } + } + + if ( !setPhysicalFormat ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") setting physical data format for device (" << device << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + } // done setting virtual/physical formats. + + // Get the stream / device latency. + UInt32 latency; + dataSize = sizeof( UInt32 ); + property.mSelector = kAudioDevicePropertyLatency; + if ( AudioObjectHasProperty( id, &property ) == true ) { + result = AudioObjectGetPropertyData( id, &property, 0, NULL, &dataSize, &latency ); + if ( result == kAudioHardwareNoError ) stream_.latency[ mode ] = latency; + else { + errorStream_ << "RtApiCore::probeDeviceOpen: system error (" << getErrorCode( result ) << ") getting device latency for device (" << device << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + } + } + + // Byte-swapping: According to AudioHardware.h, the stream data will + // always be presented in native-endian format, so we should never + // need to byte swap. + stream_.doByteSwap[mode] = false; + + // From the CoreAudio documentation, PCM data must be supplied as + // 32-bit floats. + stream_.userFormat = format; + stream_.deviceFormat[mode] = RTAUDIO_FLOAT32; + + if ( streamCount == 1 ) + stream_.nDeviceChannels[mode] = description.mChannelsPerFrame; + else // multiple streams + stream_.nDeviceChannels[mode] = channels; + stream_.nUserChannels[mode] = channels; + stream_.channelOffset[mode] = channelOffset; // offset within a CoreAudio stream + if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false; + else stream_.userInterleaved = true; + stream_.deviceInterleaved[mode] = true; + if ( monoMode == true ) stream_.deviceInterleaved[mode] = false; + + // Set flags for buffer conversion. + stream_.doConvertBuffer[mode] = false; + if ( stream_.userFormat != stream_.deviceFormat[mode] ) + stream_.doConvertBuffer[mode] = true; + if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] ) + stream_.doConvertBuffer[mode] = true; + if ( streamCount == 1 ) { + if ( stream_.nUserChannels[mode] > 1 && + stream_.userInterleaved != stream_.deviceInterleaved[mode] ) + stream_.doConvertBuffer[mode] = true; + } + else if ( monoMode && stream_.userInterleaved ) + stream_.doConvertBuffer[mode] = true; + + // Allocate our CoreHandle structure for the stream. + CoreHandle *handle = 0; + if ( stream_.apiHandle == 0 ) { + try { + handle = new CoreHandle; + } + catch ( std::bad_alloc& ) { + errorText_ = "RtApiCore::probeDeviceOpen: error allocating CoreHandle memory."; + goto error; + } + + if ( pthread_cond_init( &handle->condition, NULL ) ) { + errorText_ = "RtApiCore::probeDeviceOpen: error initializing pthread condition variable."; + goto error; + } + stream_.apiHandle = (void *) handle; + } + else + handle = (CoreHandle *) stream_.apiHandle; + handle->iStream[mode] = firstStream; + handle->nStreams[mode] = streamCount; + handle->id[mode] = id; + + // Allocate necessary internal buffers. + unsigned long bufferBytes; + bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat ); + // stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 ); + stream_.userBuffer[mode] = (char *) malloc( bufferBytes * sizeof(char) ); + memset( stream_.userBuffer[mode], 0, bufferBytes * sizeof(char) ); + if ( stream_.userBuffer[mode] == NULL ) { + errorText_ = "RtApiCore::probeDeviceOpen: error allocating user buffer memory."; + goto error; + } + + // If possible, we will make use of the CoreAudio stream buffers as + // "device buffers". However, we can't do this if using multiple + // streams. + if ( stream_.doConvertBuffer[mode] && handle->nStreams[mode] > 1 ) { + + bool makeBuffer = true; + bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] ); + if ( mode == INPUT ) { + if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) { + unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] ); + if ( bufferBytes <= bytesOut ) makeBuffer = false; + } + } + + if ( makeBuffer ) { + bufferBytes *= *bufferSize; + if ( stream_.deviceBuffer ) free( stream_.deviceBuffer ); + stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 ); + if ( stream_.deviceBuffer == NULL ) { + errorText_ = "RtApiCore::probeDeviceOpen: error allocating device buffer memory."; + goto error; + } + } + } + + stream_.sampleRate = sampleRate; + stream_.device[mode] = device; + stream_.state = STREAM_STOPPED; + stream_.callbackInfo.object = (void *) this; + + // Setup the buffer conversion information structure. + if ( stream_.doConvertBuffer[mode] ) { + if ( streamCount > 1 ) setConvertInfo( mode, 0 ); + else setConvertInfo( mode, channelOffset ); + } + + if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device ) + // Only one callback procedure per device. + stream_.mode = DUPLEX; + else { +#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 ) + result = AudioDeviceCreateIOProcID( id, callbackHandler, (void *) &stream_.callbackInfo, &handle->procId[mode] ); +#else + // deprecated in favor of AudioDeviceCreateIOProcID() + result = AudioDeviceAddIOProc( id, callbackHandler, (void *) &stream_.callbackInfo ); +#endif + if ( result != noErr ) { + errorStream_ << "RtApiCore::probeDeviceOpen: system error setting callback for device (" << device << ")."; + errorText_ = errorStream_.str(); + goto error; + } + if ( stream_.mode == OUTPUT && mode == INPUT ) + stream_.mode = DUPLEX; + else + stream_.mode = mode; + } + + // Setup the device property listener for over/underload. + property.mSelector = kAudioDeviceProcessorOverload; + property.mScope = kAudioObjectPropertyScopeGlobal; + result = AudioObjectAddPropertyListener( id, &property, xrunListener, (void *) handle ); + + return SUCCESS; + + error: + if ( handle ) { + pthread_cond_destroy( &handle->condition ); + delete handle; + stream_.apiHandle = 0; + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + stream_.state = STREAM_CLOSED; + return FAILURE; +} + +void RtApiCore :: closeStream( void ) +{ + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiCore::closeStream(): no open stream to close!"; + error( RtAudioError::WARNING ); + return; + } + + CoreHandle *handle = (CoreHandle *) stream_.apiHandle; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + if (handle) { + AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster }; + + property.mSelector = kAudioDeviceProcessorOverload; + property.mScope = kAudioObjectPropertyScopeGlobal; + if (AudioObjectRemovePropertyListener( handle->id[0], &property, xrunListener, (void *) handle ) != noErr) { + errorText_ = "RtApiCore::closeStream(): error removing property listener!"; + error( RtAudioError::WARNING ); + } + } + if ( stream_.state == STREAM_RUNNING ) + AudioDeviceStop( handle->id[0], callbackHandler ); +#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 ) + AudioDeviceDestroyIOProcID( handle->id[0], handle->procId[0] ); +#else + // deprecated in favor of AudioDeviceDestroyIOProcID() + AudioDeviceRemoveIOProc( handle->id[0], callbackHandler ); +#endif + } + + if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) { + if (handle) { + AudioObjectPropertyAddress property = { kAudioHardwarePropertyDevices, + kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyElementMaster }; + + property.mSelector = kAudioDeviceProcessorOverload; + property.mScope = kAudioObjectPropertyScopeGlobal; + if (AudioObjectRemovePropertyListener( handle->id[1], &property, xrunListener, (void *) handle ) != noErr) { + errorText_ = "RtApiCore::closeStream(): error removing property listener!"; + error( RtAudioError::WARNING ); + } + } + if ( stream_.state == STREAM_RUNNING ) + AudioDeviceStop( handle->id[1], callbackHandler ); +#if defined( MAC_OS_X_VERSION_10_5 ) && ( MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_5 ) + AudioDeviceDestroyIOProcID( handle->id[1], handle->procId[1] ); +#else + // deprecated in favor of AudioDeviceDestroyIOProcID() + AudioDeviceRemoveIOProc( handle->id[1], callbackHandler ); +#endif + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + // Destroy pthread condition variable. + pthread_cond_destroy( &handle->condition ); + delete handle; + stream_.apiHandle = 0; + + stream_.mode = UNINITIALIZED; + stream_.state = STREAM_CLOSED; +} + +void RtApiCore :: startStream( void ) +{ + verifyStream(); + if ( stream_.state == STREAM_RUNNING ) { + errorText_ = "RtApiCore::startStream(): the stream is already running!"; + error( RtAudioError::WARNING ); + return; + } + + OSStatus result = noErr; + CoreHandle *handle = (CoreHandle *) stream_.apiHandle; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + + result = AudioDeviceStart( handle->id[0], callbackHandler ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::startStream: system error (" << getErrorCode( result ) << ") starting callback procedure on device (" << stream_.device[0] << ")."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + if ( stream_.mode == INPUT || + ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) { + + result = AudioDeviceStart( handle->id[1], callbackHandler ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::startStream: system error starting input callback procedure on device (" << stream_.device[1] << ")."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + handle->drainCounter = 0; + handle->internalDrain = false; + stream_.state = STREAM_RUNNING; + + unlock: + if ( result == noErr ) return; + error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiCore :: stopStream( void ) +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiCore::stopStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + OSStatus result = noErr; + CoreHandle *handle = (CoreHandle *) stream_.apiHandle; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + + if ( handle->drainCounter == 0 ) { + handle->drainCounter = 2; + pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled + } + + result = AudioDeviceStop( handle->id[0], callbackHandler ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping callback procedure on device (" << stream_.device[0] << ")."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && stream_.device[0] != stream_.device[1] ) ) { + + result = AudioDeviceStop( handle->id[1], callbackHandler ); + if ( result != noErr ) { + errorStream_ << "RtApiCore::stopStream: system error (" << getErrorCode( result ) << ") stopping input callback procedure on device (" << stream_.device[1] << ")."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + stream_.state = STREAM_STOPPED; + + unlock: + if ( result == noErr ) return; + error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiCore :: abortStream( void ) +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiCore::abortStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + CoreHandle *handle = (CoreHandle *) stream_.apiHandle; + handle->drainCounter = 2; + + stopStream(); +} + +// This function will be called by a spawned thread when the user +// callback function signals that the stream should be stopped or +// aborted. It is better to handle it this way because the +// callbackEvent() function probably should return before the AudioDeviceStop() +// function is called. +static void *coreStopStream( void *ptr ) +{ + CallbackInfo *info = (CallbackInfo *) ptr; + RtApiCore *object = (RtApiCore *) info->object; + + object->stopStream(); + pthread_exit( NULL ); +} + +bool RtApiCore :: callbackEvent( AudioDeviceID deviceId, + const AudioBufferList *inBufferList, + const AudioBufferList *outBufferList ) +{ + if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS; + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!"; + error( RtAudioError::WARNING ); + return FAILURE; + } + + CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo; + CoreHandle *handle = (CoreHandle *) stream_.apiHandle; + + // Check if we were draining the stream and signal is finished. + if ( handle->drainCounter > 3 ) { + ThreadHandle threadId; + + stream_.state = STREAM_STOPPING; + if ( handle->internalDrain == true ) + pthread_create( &threadId, NULL, coreStopStream, info ); + else // external call to stopStream() + pthread_cond_signal( &handle->condition ); + return SUCCESS; + } + + AudioDeviceID outputDevice = handle->id[0]; + + // Invoke user callback to get fresh output data UNLESS we are + // draining stream or duplex mode AND the input/output devices are + // different AND this function is called for the input device. + if ( handle->drainCounter == 0 && ( stream_.mode != DUPLEX || deviceId == outputDevice ) ) { + RtAudioCallback callback = (RtAudioCallback) info->callback; + double streamTime = getStreamTime(); + RtAudioStreamStatus status = 0; + if ( stream_.mode != INPUT && handle->xrun[0] == true ) { + status |= RTAUDIO_OUTPUT_UNDERFLOW; + handle->xrun[0] = false; + } + if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) { + status |= RTAUDIO_INPUT_OVERFLOW; + handle->xrun[1] = false; + } + + int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1], + stream_.bufferSize, streamTime, status, info->userData ); + if ( cbReturnValue == 2 ) { + stream_.state = STREAM_STOPPING; + handle->drainCounter = 2; + abortStream(); + return SUCCESS; + } + else if ( cbReturnValue == 1 ) { + handle->drainCounter = 1; + handle->internalDrain = true; + } + } + + if ( stream_.mode == OUTPUT || ( stream_.mode == DUPLEX && deviceId == outputDevice ) ) { + + if ( handle->drainCounter > 1 ) { // write zeros to the output stream + + if ( handle->nStreams[0] == 1 ) { + memset( outBufferList->mBuffers[handle->iStream[0]].mData, + 0, + outBufferList->mBuffers[handle->iStream[0]].mDataByteSize ); + } + else { // fill multiple streams with zeros + for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) { + memset( outBufferList->mBuffers[handle->iStream[0]+i].mData, + 0, + outBufferList->mBuffers[handle->iStream[0]+i].mDataByteSize ); + } + } + } + else if ( handle->nStreams[0] == 1 ) { + if ( stream_.doConvertBuffer[0] ) { // convert directly to CoreAudio stream buffer + convertBuffer( (char *) outBufferList->mBuffers[handle->iStream[0]].mData, + stream_.userBuffer[0], stream_.convertInfo[0] ); + } + else { // copy from user buffer + memcpy( outBufferList->mBuffers[handle->iStream[0]].mData, + stream_.userBuffer[0], + outBufferList->mBuffers[handle->iStream[0]].mDataByteSize ); + } + } + else { // fill multiple streams + Float32 *inBuffer = (Float32 *) stream_.userBuffer[0]; + if ( stream_.doConvertBuffer[0] ) { + convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] ); + inBuffer = (Float32 *) stream_.deviceBuffer; + } + + if ( stream_.deviceInterleaved[0] == false ) { // mono mode + UInt32 bufferBytes = outBufferList->mBuffers[handle->iStream[0]].mDataByteSize; + for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) { + memcpy( outBufferList->mBuffers[handle->iStream[0]+i].mData, + (void *)&inBuffer[i*stream_.bufferSize], bufferBytes ); + } + } + else { // fill multiple multi-channel streams with interleaved data + UInt32 streamChannels, channelsLeft, inJump, outJump, inOffset; + Float32 *out, *in; + + bool inInterleaved = ( stream_.userInterleaved ) ? true : false; + UInt32 inChannels = stream_.nUserChannels[0]; + if ( stream_.doConvertBuffer[0] ) { + inInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode + inChannels = stream_.nDeviceChannels[0]; + } + + if ( inInterleaved ) inOffset = 1; + else inOffset = stream_.bufferSize; + + channelsLeft = inChannels; + for ( unsigned int i=0; i<handle->nStreams[0]; i++ ) { + in = inBuffer; + out = (Float32 *) outBufferList->mBuffers[handle->iStream[0]+i].mData; + streamChannels = outBufferList->mBuffers[handle->iStream[0]+i].mNumberChannels; + + outJump = 0; + // Account for possible channel offset in first stream + if ( i == 0 && stream_.channelOffset[0] > 0 ) { + streamChannels -= stream_.channelOffset[0]; + outJump = stream_.channelOffset[0]; + out += outJump; + } + + // Account for possible unfilled channels at end of the last stream + if ( streamChannels > channelsLeft ) { + outJump = streamChannels - channelsLeft; + streamChannels = channelsLeft; + } + + // Determine input buffer offsets and skips + if ( inInterleaved ) { + inJump = inChannels; + in += inChannels - channelsLeft; + } + else { + inJump = 1; + in += (inChannels - channelsLeft) * inOffset; + } + + for ( unsigned int i=0; i<stream_.bufferSize; i++ ) { + for ( unsigned int j=0; j<streamChannels; j++ ) { + *out++ = in[j*inOffset]; + } + out += outJump; + in += inJump; + } + channelsLeft -= streamChannels; + } + } + } + } + + // Don't bother draining input + if ( handle->drainCounter ) { + handle->drainCounter++; + goto unlock; + } + + AudioDeviceID inputDevice; + inputDevice = handle->id[1]; + if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && deviceId == inputDevice ) ) { + + if ( handle->nStreams[1] == 1 ) { + if ( stream_.doConvertBuffer[1] ) { // convert directly from CoreAudio stream buffer + convertBuffer( stream_.userBuffer[1], + (char *) inBufferList->mBuffers[handle->iStream[1]].mData, + stream_.convertInfo[1] ); + } + else { // copy to user buffer + memcpy( stream_.userBuffer[1], + inBufferList->mBuffers[handle->iStream[1]].mData, + inBufferList->mBuffers[handle->iStream[1]].mDataByteSize ); + } + } + else { // read from multiple streams + Float32 *outBuffer = (Float32 *) stream_.userBuffer[1]; + if ( stream_.doConvertBuffer[1] ) outBuffer = (Float32 *) stream_.deviceBuffer; + + if ( stream_.deviceInterleaved[1] == false ) { // mono mode + UInt32 bufferBytes = inBufferList->mBuffers[handle->iStream[1]].mDataByteSize; + for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) { + memcpy( (void *)&outBuffer[i*stream_.bufferSize], + inBufferList->mBuffers[handle->iStream[1]+i].mData, bufferBytes ); + } + } + else { // read from multiple multi-channel streams + UInt32 streamChannels, channelsLeft, inJump, outJump, outOffset; + Float32 *out, *in; + + bool outInterleaved = ( stream_.userInterleaved ) ? true : false; + UInt32 outChannels = stream_.nUserChannels[1]; + if ( stream_.doConvertBuffer[1] ) { + outInterleaved = true; // device buffer will always be interleaved for nStreams > 1 and not mono mode + outChannels = stream_.nDeviceChannels[1]; + } + + if ( outInterleaved ) outOffset = 1; + else outOffset = stream_.bufferSize; + + channelsLeft = outChannels; + for ( unsigned int i=0; i<handle->nStreams[1]; i++ ) { + out = outBuffer; + in = (Float32 *) inBufferList->mBuffers[handle->iStream[1]+i].mData; + streamChannels = inBufferList->mBuffers[handle->iStream[1]+i].mNumberChannels; + + inJump = 0; + // Account for possible channel offset in first stream + if ( i == 0 && stream_.channelOffset[1] > 0 ) { + streamChannels -= stream_.channelOffset[1]; + inJump = stream_.channelOffset[1]; + in += inJump; + } + + // Account for possible unread channels at end of the last stream + if ( streamChannels > channelsLeft ) { + inJump = streamChannels - channelsLeft; + streamChannels = channelsLeft; + } + + // Determine output buffer offsets and skips + if ( outInterleaved ) { + outJump = outChannels; + out += outChannels - channelsLeft; + } + else { + outJump = 1; + out += (outChannels - channelsLeft) * outOffset; + } + + for ( unsigned int i=0; i<stream_.bufferSize; i++ ) { + for ( unsigned int j=0; j<streamChannels; j++ ) { + out[j*outOffset] = *in++; + } + out += outJump; + in += inJump; + } + channelsLeft -= streamChannels; + } + } + + if ( stream_.doConvertBuffer[1] ) { // convert from our internal "device" buffer + convertBuffer( stream_.userBuffer[1], + stream_.deviceBuffer, + stream_.convertInfo[1] ); + } + } + } + + unlock: + //MUTEX_UNLOCK( &stream_.mutex ); + + RtApi::tickStreamTime(); + return SUCCESS; +} + +const char* RtApiCore :: getErrorCode( OSStatus code ) +{ + switch( code ) { + + case kAudioHardwareNotRunningError: + return "kAudioHardwareNotRunningError"; + + case kAudioHardwareUnspecifiedError: + return "kAudioHardwareUnspecifiedError"; + + case kAudioHardwareUnknownPropertyError: + return "kAudioHardwareUnknownPropertyError"; + + case kAudioHardwareBadPropertySizeError: + return "kAudioHardwareBadPropertySizeError"; + + case kAudioHardwareIllegalOperationError: + return "kAudioHardwareIllegalOperationError"; + + case kAudioHardwareBadObjectError: + return "kAudioHardwareBadObjectError"; + + case kAudioHardwareBadDeviceError: + return "kAudioHardwareBadDeviceError"; + + case kAudioHardwareBadStreamError: + return "kAudioHardwareBadStreamError"; + + case kAudioHardwareUnsupportedOperationError: + return "kAudioHardwareUnsupportedOperationError"; + + case kAudioDeviceUnsupportedFormatError: + return "kAudioDeviceUnsupportedFormatError"; + + case kAudioDevicePermissionsError: + return "kAudioDevicePermissionsError"; + + default: + return "CoreAudio unknown error"; + } +} + + //******************** End of __MACOSX_CORE__ *********************// +#endif + +#if defined(__UNIX_JACK__) + +// JACK is a low-latency audio server, originally written for the +// GNU/Linux operating system and now also ported to OS-X. It can +// connect a number of different applications to an audio device, as +// well as allowing them to share audio between themselves. +// +// When using JACK with RtAudio, "devices" refer to JACK clients that +// have ports connected to the server. The JACK server is typically +// started in a terminal as follows: +// +// .jackd -d alsa -d hw:0 +// +// or through an interface program such as qjackctl. Many of the +// parameters normally set for a stream are fixed by the JACK server +// and can be specified when the JACK server is started. In +// particular, +// +// .jackd -d alsa -d hw:0 -r 44100 -p 512 -n 4 +// +// specifies a sample rate of 44100 Hz, a buffer size of 512 sample +// frames, and number of buffers = 4. Once the server is running, it +// is not possible to override these values. If the values are not +// specified in the command-line, the JACK server uses default values. +// +// The JACK server does not have to be running when an instance of +// RtApiJack is created, though the function getDeviceCount() will +// report 0 devices found until JACK has been started. When no +// devices are available (i.e., the JACK server is not running), a +// stream cannot be opened. + +#include <jack/jack.h> +#include <unistd.h> +#include <cstdio> + +// A structure to hold various information related to the Jack API +// implementation. +struct JackHandle { + jack_client_t *client; + jack_port_t **ports[2]; + std::string deviceName[2]; + bool xrun[2]; + pthread_cond_t condition; + int drainCounter; // Tracks callback counts when draining + bool internalDrain; // Indicates if stop is initiated from callback or not. + + JackHandle() + :client(0), drainCounter(0), internalDrain(false) { ports[0] = 0; ports[1] = 0; xrun[0] = false; xrun[1] = false; } +}; + +static void jackSilentError( const char * ) {}; + +RtApiJack :: RtApiJack() +{ + // Nothing to do here. +#if !defined(__RTAUDIO_DEBUG__) + // Turn off Jack's internal error reporting. + jack_set_error_function( &jackSilentError ); +#endif +} + +RtApiJack :: ~RtApiJack() +{ + if ( stream_.state != STREAM_CLOSED ) closeStream(); +} + +unsigned int RtApiJack :: getDeviceCount( void ) +{ + // See if we can become a jack client. + jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption; + jack_status_t *status = NULL; + jack_client_t *client = jack_client_open( "RtApiJackCount", options, status ); + if ( client == 0 ) return 0; + + const char **ports; + std::string port, previousPort; + unsigned int nChannels = 0, nDevices = 0; + ports = jack_get_ports( client, NULL, NULL, 0 ); + if ( ports ) { + // Parse the port names up to the first colon (:). + size_t iColon = 0; + do { + port = (char *) ports[ nChannels ]; + iColon = port.find(":"); + if ( iColon != std::string::npos ) { + port = port.substr( 0, iColon + 1 ); + if ( port != previousPort ) { + nDevices++; + previousPort = port; + } + } + } while ( ports[++nChannels] ); + free( ports ); + } + + jack_client_close( client ); + return nDevices; +} + +RtAudio::DeviceInfo RtApiJack :: getDeviceInfo( unsigned int device ) +{ + RtAudio::DeviceInfo info; + info.probed = false; + + jack_options_t options = (jack_options_t) ( JackNoStartServer ); //JackNullOption + jack_status_t *status = NULL; + jack_client_t *client = jack_client_open( "RtApiJackInfo", options, status ); + if ( client == 0 ) { + errorText_ = "RtApiJack::getDeviceInfo: Jack server not found or connection error!"; + error( RtAudioError::WARNING ); + return info; + } + + const char **ports; + std::string port, previousPort; + unsigned int nPorts = 0, nDevices = 0; + ports = jack_get_ports( client, NULL, NULL, 0 ); + if ( ports ) { + // Parse the port names up to the first colon (:). + size_t iColon = 0; + do { + port = (char *) ports[ nPorts ]; + iColon = port.find(":"); + if ( iColon != std::string::npos ) { + port = port.substr( 0, iColon ); + if ( port != previousPort ) { + if ( nDevices == device ) info.name = port; + nDevices++; + previousPort = port; + } + } + } while ( ports[++nPorts] ); + free( ports ); + } + + if ( device >= nDevices ) { + jack_client_close( client ); + errorText_ = "RtApiJack::getDeviceInfo: device ID is invalid!"; + error( RtAudioError::INVALID_USE ); + return info; + } + + // Get the current jack server sample rate. + info.sampleRates.clear(); + + info.preferredSampleRate = jack_get_sample_rate( client ); + info.sampleRates.push_back( info.preferredSampleRate ); + + // Count the available ports containing the client name as device + // channels. Jack "input ports" equal RtAudio output channels. + unsigned int nChannels = 0; + ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsInput ); + if ( ports ) { + while ( ports[ nChannels ] ) nChannels++; + free( ports ); + info.outputChannels = nChannels; + } + + // Jack "output ports" equal RtAudio input channels. + nChannels = 0; + ports = jack_get_ports( client, info.name.c_str(), NULL, JackPortIsOutput ); + if ( ports ) { + while ( ports[ nChannels ] ) nChannels++; + free( ports ); + info.inputChannels = nChannels; + } + + if ( info.outputChannels == 0 && info.inputChannels == 0 ) { + jack_client_close(client); + errorText_ = "RtApiJack::getDeviceInfo: error determining Jack input/output channels!"; + error( RtAudioError::WARNING ); + return info; + } + + // If device opens for both playback and capture, we determine the channels. + if ( info.outputChannels > 0 && info.inputChannels > 0 ) + info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels; + + // Jack always uses 32-bit floats. + info.nativeFormats = RTAUDIO_FLOAT32; + + // Jack doesn't provide default devices so we'll use the first available one. + if ( device == 0 && info.outputChannels > 0 ) + info.isDefaultOutput = true; + if ( device == 0 && info.inputChannels > 0 ) + info.isDefaultInput = true; + + jack_client_close(client); + info.probed = true; + return info; +} + +static int jackCallbackHandler( jack_nframes_t nframes, void *infoPointer ) +{ + CallbackInfo *info = (CallbackInfo *) infoPointer; + + RtApiJack *object = (RtApiJack *) info->object; + if ( object->callbackEvent( (unsigned long) nframes ) == false ) return 1; + + return 0; +} + +// This function will be called by a spawned thread when the Jack +// server signals that it is shutting down. It is necessary to handle +// it this way because the jackShutdown() function must return before +// the jack_deactivate() function (in closeStream()) will return. +static void *jackCloseStream( void *ptr ) +{ + CallbackInfo *info = (CallbackInfo *) ptr; + RtApiJack *object = (RtApiJack *) info->object; + + object->closeStream(); + + pthread_exit( NULL ); +} +static void jackShutdown( void *infoPointer ) +{ + CallbackInfo *info = (CallbackInfo *) infoPointer; + RtApiJack *object = (RtApiJack *) info->object; + + // Check current stream state. If stopped, then we'll assume this + // was called as a result of a call to RtApiJack::stopStream (the + // deactivation of a client handle causes this function to be called). + // If not, we'll assume the Jack server is shutting down or some + // other problem occurred and we should close the stream. + if ( object->isStreamRunning() == false ) return; + + ThreadHandle threadId; + pthread_create( &threadId, NULL, jackCloseStream, info ); + std::cerr << "\nRtApiJack: the Jack server is shutting down this client ... stream stopped and closed!!\n" << std::endl; +} + +static int jackXrun( void *infoPointer ) +{ + JackHandle *handle = (JackHandle *) infoPointer; + + if ( handle->ports[0] ) handle->xrun[0] = true; + if ( handle->ports[1] ) handle->xrun[1] = true; + + return 0; +} + +bool RtApiJack :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels, + unsigned int firstChannel, unsigned int sampleRate, + RtAudioFormat format, unsigned int *bufferSize, + RtAudio::StreamOptions *options ) +{ + JackHandle *handle = (JackHandle *) stream_.apiHandle; + + // Look for jack server and try to become a client (only do once per stream). + jack_client_t *client = 0; + if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) { + jack_options_t jackoptions = (jack_options_t) ( JackNoStartServer ); //JackNullOption; + jack_status_t *status = NULL; + if ( options && !options->streamName.empty() ) + client = jack_client_open( options->streamName.c_str(), jackoptions, status ); + else + client = jack_client_open( "RtApiJack", jackoptions, status ); + if ( client == 0 ) { + errorText_ = "RtApiJack::probeDeviceOpen: Jack server not found or connection error!"; + error( RtAudioError::WARNING ); + return FAILURE; + } + } + else { + // The handle must have been created on an earlier pass. + client = handle->client; + } + + const char **ports; + std::string port, previousPort, deviceName; + unsigned int nPorts = 0, nDevices = 0; + ports = jack_get_ports( client, NULL, NULL, 0 ); + if ( ports ) { + // Parse the port names up to the first colon (:). + size_t iColon = 0; + do { + port = (char *) ports[ nPorts ]; + iColon = port.find(":"); + if ( iColon != std::string::npos ) { + port = port.substr( 0, iColon ); + if ( port != previousPort ) { + if ( nDevices == device ) deviceName = port; + nDevices++; + previousPort = port; + } + } + } while ( ports[++nPorts] ); + free( ports ); + } + + if ( device >= nDevices ) { + errorText_ = "RtApiJack::probeDeviceOpen: device ID is invalid!"; + return FAILURE; + } + + // Count the available ports containing the client name as device + // channels. Jack "input ports" equal RtAudio output channels. + unsigned int nChannels = 0; + unsigned long flag = JackPortIsInput; + if ( mode == INPUT ) flag = JackPortIsOutput; + ports = jack_get_ports( client, deviceName.c_str(), NULL, flag ); + if ( ports ) { + while ( ports[ nChannels ] ) nChannels++; + free( ports ); + } + + // Compare the jack ports for specified client to the requested number of channels. + if ( nChannels < (channels + firstChannel) ) { + errorStream_ << "RtApiJack::probeDeviceOpen: requested number of channels (" << channels << ") + offset (" << firstChannel << ") not found for specified device (" << device << ":" << deviceName << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Check the jack server sample rate. + unsigned int jackRate = jack_get_sample_rate( client ); + if ( sampleRate != jackRate ) { + jack_client_close( client ); + errorStream_ << "RtApiJack::probeDeviceOpen: the requested sample rate (" << sampleRate << ") is different than the JACK server rate (" << jackRate << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + stream_.sampleRate = jackRate; + + // Get the latency of the JACK port. + ports = jack_get_ports( client, deviceName.c_str(), NULL, flag ); + if ( ports[ firstChannel ] ) { + // Added by Ge Wang + jack_latency_callback_mode_t cbmode = (mode == INPUT ? JackCaptureLatency : JackPlaybackLatency); + // the range (usually the min and max are equal) + jack_latency_range_t latrange; latrange.min = latrange.max = 0; + // get the latency range + jack_port_get_latency_range( jack_port_by_name( client, ports[firstChannel] ), cbmode, &latrange ); + // be optimistic, use the min! + stream_.latency[mode] = latrange.min; + //stream_.latency[mode] = jack_port_get_latency( jack_port_by_name( client, ports[ firstChannel ] ) ); + } + free( ports ); + + // The jack server always uses 32-bit floating-point data. + stream_.deviceFormat[mode] = RTAUDIO_FLOAT32; + stream_.userFormat = format; + + if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false; + else stream_.userInterleaved = true; + + // Jack always uses non-interleaved buffers. + stream_.deviceInterleaved[mode] = false; + + // Jack always provides host byte-ordered data. + stream_.doByteSwap[mode] = false; + + // Get the buffer size. The buffer size and number of buffers + // (periods) is set when the jack server is started. + stream_.bufferSize = (int) jack_get_buffer_size( client ); + *bufferSize = stream_.bufferSize; + + stream_.nDeviceChannels[mode] = channels; + stream_.nUserChannels[mode] = channels; + + // Set flags for buffer conversion. + stream_.doConvertBuffer[mode] = false; + if ( stream_.userFormat != stream_.deviceFormat[mode] ) + stream_.doConvertBuffer[mode] = true; + if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] && + stream_.nUserChannels[mode] > 1 ) + stream_.doConvertBuffer[mode] = true; + + // Allocate our JackHandle structure for the stream. + if ( handle == 0 ) { + try { + handle = new JackHandle; + } + catch ( std::bad_alloc& ) { + errorText_ = "RtApiJack::probeDeviceOpen: error allocating JackHandle memory."; + goto error; + } + + if ( pthread_cond_init(&handle->condition, NULL) ) { + errorText_ = "RtApiJack::probeDeviceOpen: error initializing pthread condition variable."; + goto error; + } + stream_.apiHandle = (void *) handle; + handle->client = client; + } + handle->deviceName[mode] = deviceName; + + // Allocate necessary internal buffers. + unsigned long bufferBytes; + bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat ); + stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 ); + if ( stream_.userBuffer[mode] == NULL ) { + errorText_ = "RtApiJack::probeDeviceOpen: error allocating user buffer memory."; + goto error; + } + + if ( stream_.doConvertBuffer[mode] ) { + + bool makeBuffer = true; + if ( mode == OUTPUT ) + bufferBytes = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] ); + else { // mode == INPUT + bufferBytes = stream_.nDeviceChannels[1] * formatBytes( stream_.deviceFormat[1] ); + if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) { + unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes(stream_.deviceFormat[0]); + if ( bufferBytes < bytesOut ) makeBuffer = false; + } + } + + if ( makeBuffer ) { + bufferBytes *= *bufferSize; + if ( stream_.deviceBuffer ) free( stream_.deviceBuffer ); + stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 ); + if ( stream_.deviceBuffer == NULL ) { + errorText_ = "RtApiJack::probeDeviceOpen: error allocating device buffer memory."; + goto error; + } + } + } + + // Allocate memory for the Jack ports (channels) identifiers. + handle->ports[mode] = (jack_port_t **) malloc ( sizeof (jack_port_t *) * channels ); + if ( handle->ports[mode] == NULL ) { + errorText_ = "RtApiJack::probeDeviceOpen: error allocating port memory."; + goto error; + } + + stream_.device[mode] = device; + stream_.channelOffset[mode] = firstChannel; + stream_.state = STREAM_STOPPED; + stream_.callbackInfo.object = (void *) this; + + if ( stream_.mode == OUTPUT && mode == INPUT ) + // We had already set up the stream for output. + stream_.mode = DUPLEX; + else { + stream_.mode = mode; + jack_set_process_callback( handle->client, jackCallbackHandler, (void *) &stream_.callbackInfo ); + jack_set_xrun_callback( handle->client, jackXrun, (void *) &handle ); + jack_on_shutdown( handle->client, jackShutdown, (void *) &stream_.callbackInfo ); + } + + // Register our ports. + char label[64]; + if ( mode == OUTPUT ) { + for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) { + snprintf( label, 64, "outport %d", i ); + handle->ports[0][i] = jack_port_register( handle->client, (const char *)label, + JACK_DEFAULT_AUDIO_TYPE, JackPortIsOutput, 0 ); + } + } + else { + for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) { + snprintf( label, 64, "inport %d", i ); + handle->ports[1][i] = jack_port_register( handle->client, (const char *)label, + JACK_DEFAULT_AUDIO_TYPE, JackPortIsInput, 0 ); + } + } + + // Setup the buffer conversion information structure. We don't use + // buffers to do channel offsets, so we override that parameter + // here. + if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 ); + + return SUCCESS; + + error: + if ( handle ) { + pthread_cond_destroy( &handle->condition ); + jack_client_close( handle->client ); + + if ( handle->ports[0] ) free( handle->ports[0] ); + if ( handle->ports[1] ) free( handle->ports[1] ); + + delete handle; + stream_.apiHandle = 0; + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + return FAILURE; +} + +void RtApiJack :: closeStream( void ) +{ + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiJack::closeStream(): no open stream to close!"; + error( RtAudioError::WARNING ); + return; + } + + JackHandle *handle = (JackHandle *) stream_.apiHandle; + if ( handle ) { + + if ( stream_.state == STREAM_RUNNING ) + jack_deactivate( handle->client ); + + jack_client_close( handle->client ); + } + + if ( handle ) { + if ( handle->ports[0] ) free( handle->ports[0] ); + if ( handle->ports[1] ) free( handle->ports[1] ); + pthread_cond_destroy( &handle->condition ); + delete handle; + stream_.apiHandle = 0; + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + stream_.mode = UNINITIALIZED; + stream_.state = STREAM_CLOSED; +} + +void RtApiJack :: startStream( void ) +{ + verifyStream(); + if ( stream_.state == STREAM_RUNNING ) { + errorText_ = "RtApiJack::startStream(): the stream is already running!"; + error( RtAudioError::WARNING ); + return; + } + + JackHandle *handle = (JackHandle *) stream_.apiHandle; + int result = jack_activate( handle->client ); + if ( result ) { + errorText_ = "RtApiJack::startStream(): unable to activate JACK client!"; + goto unlock; + } + + const char **ports; + + // Get the list of available ports. + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + result = 1; + ports = jack_get_ports( handle->client, handle->deviceName[0].c_str(), NULL, JackPortIsInput); + if ( ports == NULL) { + errorText_ = "RtApiJack::startStream(): error determining available JACK input ports!"; + goto unlock; + } + + // Now make the port connections. Since RtAudio wasn't designed to + // allow the user to select particular channels of a device, we'll + // just open the first "nChannels" ports with offset. + for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) { + result = 1; + if ( ports[ stream_.channelOffset[0] + i ] ) + result = jack_connect( handle->client, jack_port_name( handle->ports[0][i] ), ports[ stream_.channelOffset[0] + i ] ); + if ( result ) { + free( ports ); + errorText_ = "RtApiJack::startStream(): error connecting output ports!"; + goto unlock; + } + } + free(ports); + } + + if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) { + result = 1; + ports = jack_get_ports( handle->client, handle->deviceName[1].c_str(), NULL, JackPortIsOutput ); + if ( ports == NULL) { + errorText_ = "RtApiJack::startStream(): error determining available JACK output ports!"; + goto unlock; + } + + // Now make the port connections. See note above. + for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) { + result = 1; + if ( ports[ stream_.channelOffset[1] + i ] ) + result = jack_connect( handle->client, ports[ stream_.channelOffset[1] + i ], jack_port_name( handle->ports[1][i] ) ); + if ( result ) { + free( ports ); + errorText_ = "RtApiJack::startStream(): error connecting input ports!"; + goto unlock; + } + } + free(ports); + } + + handle->drainCounter = 0; + handle->internalDrain = false; + stream_.state = STREAM_RUNNING; + + unlock: + if ( result == 0 ) return; + error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiJack :: stopStream( void ) +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiJack::stopStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + JackHandle *handle = (JackHandle *) stream_.apiHandle; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + + if ( handle->drainCounter == 0 ) { + handle->drainCounter = 2; + pthread_cond_wait( &handle->condition, &stream_.mutex ); // block until signaled + } + } + + jack_deactivate( handle->client ); + stream_.state = STREAM_STOPPED; +} + +void RtApiJack :: abortStream( void ) +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiJack::abortStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + JackHandle *handle = (JackHandle *) stream_.apiHandle; + handle->drainCounter = 2; + + stopStream(); +} + +// This function will be called by a spawned thread when the user +// callback function signals that the stream should be stopped or +// aborted. It is necessary to handle it this way because the +// callbackEvent() function must return before the jack_deactivate() +// function will return. +static void *jackStopStream( void *ptr ) +{ + CallbackInfo *info = (CallbackInfo *) ptr; + RtApiJack *object = (RtApiJack *) info->object; + + object->stopStream(); + pthread_exit( NULL ); +} + +bool RtApiJack :: callbackEvent( unsigned long nframes ) +{ + if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS; + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiCore::callbackEvent(): the stream is closed ... this shouldn't happen!"; + error( RtAudioError::WARNING ); + return FAILURE; + } + if ( stream_.bufferSize != nframes ) { + errorText_ = "RtApiCore::callbackEvent(): the JACK buffer size has changed ... cannot process!"; + error( RtAudioError::WARNING ); + return FAILURE; + } + + CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo; + JackHandle *handle = (JackHandle *) stream_.apiHandle; + + // Check if we were draining the stream and signal is finished. + if ( handle->drainCounter > 3 ) { + ThreadHandle threadId; + + stream_.state = STREAM_STOPPING; + if ( handle->internalDrain == true ) + pthread_create( &threadId, NULL, jackStopStream, info ); + else + pthread_cond_signal( &handle->condition ); + return SUCCESS; + } + + // Invoke user callback first, to get fresh output data. + if ( handle->drainCounter == 0 ) { + RtAudioCallback callback = (RtAudioCallback) info->callback; + double streamTime = getStreamTime(); + RtAudioStreamStatus status = 0; + if ( stream_.mode != INPUT && handle->xrun[0] == true ) { + status |= RTAUDIO_OUTPUT_UNDERFLOW; + handle->xrun[0] = false; + } + if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) { + status |= RTAUDIO_INPUT_OVERFLOW; + handle->xrun[1] = false; + } + int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1], + stream_.bufferSize, streamTime, status, info->userData ); + if ( cbReturnValue == 2 ) { + stream_.state = STREAM_STOPPING; + handle->drainCounter = 2; + ThreadHandle id; + pthread_create( &id, NULL, jackStopStream, info ); + return SUCCESS; + } + else if ( cbReturnValue == 1 ) { + handle->drainCounter = 1; + handle->internalDrain = true; + } + } + + jack_default_audio_sample_t *jackbuffer; + unsigned long bufferBytes = nframes * sizeof( jack_default_audio_sample_t ); + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + + if ( handle->drainCounter > 1 ) { // write zeros to the output stream + + for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) { + jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes ); + memset( jackbuffer, 0, bufferBytes ); + } + + } + else if ( stream_.doConvertBuffer[0] ) { + + convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] ); + + for ( unsigned int i=0; i<stream_.nDeviceChannels[0]; i++ ) { + jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes ); + memcpy( jackbuffer, &stream_.deviceBuffer[i*bufferBytes], bufferBytes ); + } + } + else { // no buffer conversion + for ( unsigned int i=0; i<stream_.nUserChannels[0]; i++ ) { + jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[0][i], (jack_nframes_t) nframes ); + memcpy( jackbuffer, &stream_.userBuffer[0][i*bufferBytes], bufferBytes ); + } + } + } + + // Don't bother draining input + if ( handle->drainCounter ) { + handle->drainCounter++; + goto unlock; + } + + if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) { + + if ( stream_.doConvertBuffer[1] ) { + for ( unsigned int i=0; i<stream_.nDeviceChannels[1]; i++ ) { + jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes ); + memcpy( &stream_.deviceBuffer[i*bufferBytes], jackbuffer, bufferBytes ); + } + convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] ); + } + else { // no buffer conversion + for ( unsigned int i=0; i<stream_.nUserChannels[1]; i++ ) { + jackbuffer = (jack_default_audio_sample_t *) jack_port_get_buffer( handle->ports[1][i], (jack_nframes_t) nframes ); + memcpy( &stream_.userBuffer[1][i*bufferBytes], jackbuffer, bufferBytes ); + } + } + } + + unlock: + RtApi::tickStreamTime(); + return SUCCESS; +} + //******************** End of __UNIX_JACK__ *********************// +#endif + +#if defined(__WINDOWS_ASIO__) // ASIO API on Windows + +// The ASIO API is designed around a callback scheme, so this +// implementation is similar to that used for OS-X CoreAudio and Linux +// Jack. The primary constraint with ASIO is that it only allows +// access to a single driver at a time. Thus, it is not possible to +// have more than one simultaneous RtAudio stream. +// +// This implementation also requires a number of external ASIO files +// and a few global variables. The ASIO callback scheme does not +// allow for the passing of user data, so we must create a global +// pointer to our callbackInfo structure. +// +// On unix systems, we make use of a pthread condition variable. +// Since there is no equivalent in Windows, I hacked something based +// on information found in +// http://www.cs.wustl.edu/~schmidt/win32-cv-1.html. + +#include "asiosys.h" +#include "asio.h" +#include "iasiothiscallresolver.h" +#include "asiodrivers.h" +#include <cmath> + +static AsioDrivers drivers; +static ASIOCallbacks asioCallbacks; +static ASIODriverInfo driverInfo; +static CallbackInfo *asioCallbackInfo; +static bool asioXRun; + +struct AsioHandle { + int drainCounter; // Tracks callback counts when draining + bool internalDrain; // Indicates if stop is initiated from callback or not. + ASIOBufferInfo *bufferInfos; + HANDLE condition; + + AsioHandle() + :drainCounter(0), internalDrain(false), bufferInfos(0) {} +}; + +// Function declarations (definitions at end of section) +static const char* getAsioErrorString( ASIOError result ); +static void sampleRateChanged( ASIOSampleRate sRate ); +static long asioMessages( long selector, long value, void* message, double* opt ); + +RtApiAsio :: RtApiAsio() +{ + // ASIO cannot run on a multi-threaded appartment. You can call + // CoInitialize beforehand, but it must be for appartment threading + // (in which case, CoInitilialize will return S_FALSE here). + coInitialized_ = false; + HRESULT hr = CoInitialize( NULL ); + if ( FAILED(hr) ) { + errorText_ = "RtApiAsio::ASIO requires a single-threaded appartment. Call CoInitializeEx(0,COINIT_APARTMENTTHREADED)"; + error( RtAudioError::WARNING ); + } + coInitialized_ = true; + + drivers.removeCurrentDriver(); + driverInfo.asioVersion = 2; + + // See note in DirectSound implementation about GetDesktopWindow(). + driverInfo.sysRef = GetForegroundWindow(); +} + +RtApiAsio :: ~RtApiAsio() +{ + if ( stream_.state != STREAM_CLOSED ) closeStream(); + if ( coInitialized_ ) CoUninitialize(); +} + +unsigned int RtApiAsio :: getDeviceCount( void ) +{ + return (unsigned int) drivers.asioGetNumDev(); +} + +RtAudio::DeviceInfo RtApiAsio :: getDeviceInfo( unsigned int device ) +{ + RtAudio::DeviceInfo info; + info.probed = false; + + // Get device ID + unsigned int nDevices = getDeviceCount(); + if ( nDevices == 0 ) { + errorText_ = "RtApiAsio::getDeviceInfo: no devices found!"; + error( RtAudioError::INVALID_USE ); + return info; + } + + if ( device >= nDevices ) { + errorText_ = "RtApiAsio::getDeviceInfo: device ID is invalid!"; + error( RtAudioError::INVALID_USE ); + return info; + } + + // If a stream is already open, we cannot probe other devices. Thus, use the saved results. + if ( stream_.state != STREAM_CLOSED ) { + if ( device >= devices_.size() ) { + errorText_ = "RtApiAsio::getDeviceInfo: device ID was not present before stream was opened."; + error( RtAudioError::WARNING ); + return info; + } + return devices_[ device ]; + } + + char driverName[32]; + ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::getDeviceInfo: unable to get driver name (" << getAsioErrorString( result ) << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + info.name = driverName; + + if ( !drivers.loadDriver( driverName ) ) { + errorStream_ << "RtApiAsio::getDeviceInfo: unable to load driver (" << driverName << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + result = ASIOInit( &driverInfo ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Determine the device channel information. + long inputChannels, outputChannels; + result = ASIOGetChannels( &inputChannels, &outputChannels ); + if ( result != ASE_OK ) { + drivers.removeCurrentDriver(); + errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + info.outputChannels = outputChannels; + info.inputChannels = inputChannels; + if ( info.outputChannels > 0 && info.inputChannels > 0 ) + info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels; + + // Determine the supported sample rates. + info.sampleRates.clear(); + for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) { + result = ASIOCanSampleRate( (ASIOSampleRate) SAMPLE_RATES[i] ); + if ( result == ASE_OK ) { + info.sampleRates.push_back( SAMPLE_RATES[i] ); + + if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) ) + info.preferredSampleRate = SAMPLE_RATES[i]; + } + } + + // Determine supported data types ... just check first channel and assume rest are the same. + ASIOChannelInfo channelInfo; + channelInfo.channel = 0; + channelInfo.isInput = true; + if ( info.inputChannels <= 0 ) channelInfo.isInput = false; + result = ASIOGetChannelInfo( &channelInfo ); + if ( result != ASE_OK ) { + drivers.removeCurrentDriver(); + errorStream_ << "RtApiAsio::getDeviceInfo: error (" << getAsioErrorString( result ) << ") getting driver channel info (" << driverName << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + info.nativeFormats = 0; + if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB ) + info.nativeFormats |= RTAUDIO_SINT16; + else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB ) + info.nativeFormats |= RTAUDIO_SINT32; + else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB ) + info.nativeFormats |= RTAUDIO_FLOAT32; + else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB ) + info.nativeFormats |= RTAUDIO_FLOAT64; + else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB ) + info.nativeFormats |= RTAUDIO_SINT24; + + if ( info.outputChannels > 0 ) + if ( getDefaultOutputDevice() == device ) info.isDefaultOutput = true; + if ( info.inputChannels > 0 ) + if ( getDefaultInputDevice() == device ) info.isDefaultInput = true; + + info.probed = true; + drivers.removeCurrentDriver(); + return info; +} + +static void bufferSwitch( long index, ASIOBool /*processNow*/ ) +{ + RtApiAsio *object = (RtApiAsio *) asioCallbackInfo->object; + object->callbackEvent( index ); +} + +void RtApiAsio :: saveDeviceInfo( void ) +{ + devices_.clear(); + + unsigned int nDevices = getDeviceCount(); + devices_.resize( nDevices ); + for ( unsigned int i=0; i<nDevices; i++ ) + devices_[i] = getDeviceInfo( i ); +} + +bool RtApiAsio :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels, + unsigned int firstChannel, unsigned int sampleRate, + RtAudioFormat format, unsigned int *bufferSize, + RtAudio::StreamOptions *options ) +{//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + bool isDuplexInput = mode == INPUT && stream_.mode == OUTPUT; + + // For ASIO, a duplex stream MUST use the same driver. + if ( isDuplexInput && stream_.device[0] != device ) { + errorText_ = "RtApiAsio::probeDeviceOpen: an ASIO duplex stream must use the same device for input and output!"; + return FAILURE; + } + + char driverName[32]; + ASIOError result = drivers.asioGetDriverName( (int) device, driverName, 32 ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: unable to get driver name (" << getAsioErrorString( result ) << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Only load the driver once for duplex stream. + if ( !isDuplexInput ) { + // The getDeviceInfo() function will not work when a stream is open + // because ASIO does not allow multiple devices to run at the same + // time. Thus, we'll probe the system before opening a stream and + // save the results for use by getDeviceInfo(). + this->saveDeviceInfo(); + + if ( !drivers.loadDriver( driverName ) ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: unable to load driver (" << driverName << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + result = ASIOInit( &driverInfo ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") initializing driver (" << driverName << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + } + + // keep them before any "goto error", they are used for error cleanup + goto device boundary checks + bool buffersAllocated = false; + AsioHandle *handle = (AsioHandle *) stream_.apiHandle; + unsigned int nChannels; + + + // Check the device channel count. + long inputChannels, outputChannels; + result = ASIOGetChannels( &inputChannels, &outputChannels ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: error (" << getAsioErrorString( result ) << ") getting channel count (" << driverName << ")."; + errorText_ = errorStream_.str(); + goto error; + } + + if ( ( mode == OUTPUT && (channels+firstChannel) > (unsigned int) outputChannels) || + ( mode == INPUT && (channels+firstChannel) > (unsigned int) inputChannels) ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested channel count (" << channels << ") + offset (" << firstChannel << ")."; + errorText_ = errorStream_.str(); + goto error; + } + stream_.nDeviceChannels[mode] = channels; + stream_.nUserChannels[mode] = channels; + stream_.channelOffset[mode] = firstChannel; + + // Verify the sample rate is supported. + result = ASIOCanSampleRate( (ASIOSampleRate) sampleRate ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") does not support requested sample rate (" << sampleRate << ")."; + errorText_ = errorStream_.str(); + goto error; + } + + // Get the current sample rate + ASIOSampleRate currentRate; + result = ASIOGetSampleRate( ¤tRate ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error getting sample rate."; + errorText_ = errorStream_.str(); + goto error; + } + + // Set the sample rate only if necessary + if ( currentRate != sampleRate ) { + result = ASIOSetSampleRate( (ASIOSampleRate) sampleRate ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error setting sample rate (" << sampleRate << ")."; + errorText_ = errorStream_.str(); + goto error; + } + } + + // Determine the driver data type. + ASIOChannelInfo channelInfo; + channelInfo.channel = 0; + if ( mode == OUTPUT ) channelInfo.isInput = false; + else channelInfo.isInput = true; + result = ASIOGetChannelInfo( &channelInfo ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting data format."; + errorText_ = errorStream_.str(); + goto error; + } + + // Assuming WINDOWS host is always little-endian. + stream_.doByteSwap[mode] = false; + stream_.userFormat = format; + stream_.deviceFormat[mode] = 0; + if ( channelInfo.type == ASIOSTInt16MSB || channelInfo.type == ASIOSTInt16LSB ) { + stream_.deviceFormat[mode] = RTAUDIO_SINT16; + if ( channelInfo.type == ASIOSTInt16MSB ) stream_.doByteSwap[mode] = true; + } + else if ( channelInfo.type == ASIOSTInt32MSB || channelInfo.type == ASIOSTInt32LSB ) { + stream_.deviceFormat[mode] = RTAUDIO_SINT32; + if ( channelInfo.type == ASIOSTInt32MSB ) stream_.doByteSwap[mode] = true; + } + else if ( channelInfo.type == ASIOSTFloat32MSB || channelInfo.type == ASIOSTFloat32LSB ) { + stream_.deviceFormat[mode] = RTAUDIO_FLOAT32; + if ( channelInfo.type == ASIOSTFloat32MSB ) stream_.doByteSwap[mode] = true; + } + else if ( channelInfo.type == ASIOSTFloat64MSB || channelInfo.type == ASIOSTFloat64LSB ) { + stream_.deviceFormat[mode] = RTAUDIO_FLOAT64; + if ( channelInfo.type == ASIOSTFloat64MSB ) stream_.doByteSwap[mode] = true; + } + else if ( channelInfo.type == ASIOSTInt24MSB || channelInfo.type == ASIOSTInt24LSB ) { + stream_.deviceFormat[mode] = RTAUDIO_SINT24; + if ( channelInfo.type == ASIOSTInt24MSB ) stream_.doByteSwap[mode] = true; + } + + if ( stream_.deviceFormat[mode] == 0 ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") data format not supported by RtAudio."; + errorText_ = errorStream_.str(); + goto error; + } + + // Set the buffer size. For a duplex stream, this will end up + // setting the buffer size based on the input constraints, which + // should be ok. + long minSize, maxSize, preferSize, granularity; + result = ASIOGetBufferSize( &minSize, &maxSize, &preferSize, &granularity ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting buffer size."; + errorText_ = errorStream_.str(); + goto error; + } + + if ( isDuplexInput ) { + // When this is the duplex input (output was opened before), then we have to use the same + // buffersize as the output, because it might use the preferred buffer size, which most + // likely wasn't passed as input to this. The buffer sizes have to be identically anyway, + // So instead of throwing an error, make them equal. The caller uses the reference + // to the "bufferSize" param as usual to set up processing buffers. + + *bufferSize = stream_.bufferSize; + + } else { + if ( *bufferSize == 0 ) *bufferSize = preferSize; + else if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize; + else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize; + else if ( granularity == -1 ) { + // Make sure bufferSize is a power of two. + int log2_of_min_size = 0; + int log2_of_max_size = 0; + + for ( unsigned int i = 0; i < sizeof(long) * 8; i++ ) { + if ( minSize & ((long)1 << i) ) log2_of_min_size = i; + if ( maxSize & ((long)1 << i) ) log2_of_max_size = i; + } + + long min_delta = std::abs( (long)*bufferSize - ((long)1 << log2_of_min_size) ); + int min_delta_num = log2_of_min_size; + + for (int i = log2_of_min_size + 1; i <= log2_of_max_size; i++) { + long current_delta = std::abs( (long)*bufferSize - ((long)1 << i) ); + if (current_delta < min_delta) { + min_delta = current_delta; + min_delta_num = i; + } + } + + *bufferSize = ( (unsigned int)1 << min_delta_num ); + if ( *bufferSize < (unsigned int) minSize ) *bufferSize = (unsigned int) minSize; + else if ( *bufferSize > (unsigned int) maxSize ) *bufferSize = (unsigned int) maxSize; + } + else if ( granularity != 0 ) { + // Set to an even multiple of granularity, rounding up. + *bufferSize = (*bufferSize + granularity-1) / granularity * granularity; + } + } + + /* + // we don't use it anymore, see above! + // Just left it here for the case... + if ( isDuplexInput && stream_.bufferSize != *bufferSize ) { + errorText_ = "RtApiAsio::probeDeviceOpen: input/output buffersize discrepancy!"; + goto error; + } + */ + + stream_.bufferSize = *bufferSize; + stream_.nBuffers = 2; + + if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false; + else stream_.userInterleaved = true; + + // ASIO always uses non-interleaved buffers. + stream_.deviceInterleaved[mode] = false; + + // Allocate, if necessary, our AsioHandle structure for the stream. + if ( handle == 0 ) { + try { + handle = new AsioHandle; + } + catch ( std::bad_alloc& ) { + errorText_ = "RtApiAsio::probeDeviceOpen: error allocating AsioHandle memory."; + goto error; + } + handle->bufferInfos = 0; + + // Create a manual-reset event. + handle->condition = CreateEvent( NULL, // no security + TRUE, // manual-reset + FALSE, // non-signaled initially + NULL ); // unnamed + stream_.apiHandle = (void *) handle; + } + + // Create the ASIO internal buffers. Since RtAudio sets up input + // and output separately, we'll have to dispose of previously + // created output buffers for a duplex stream. + if ( mode == INPUT && stream_.mode == OUTPUT ) { + ASIODisposeBuffers(); + if ( handle->bufferInfos ) free( handle->bufferInfos ); + } + + // Allocate, initialize, and save the bufferInfos in our stream callbackInfo structure. + unsigned int i; + nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1]; + handle->bufferInfos = (ASIOBufferInfo *) malloc( nChannels * sizeof(ASIOBufferInfo) ); + if ( handle->bufferInfos == NULL ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: error allocating bufferInfo memory for driver (" << driverName << ")."; + errorText_ = errorStream_.str(); + goto error; + } + + ASIOBufferInfo *infos; + infos = handle->bufferInfos; + for ( i=0; i<stream_.nDeviceChannels[0]; i++, infos++ ) { + infos->isInput = ASIOFalse; + infos->channelNum = i + stream_.channelOffset[0]; + infos->buffers[0] = infos->buffers[1] = 0; + } + for ( i=0; i<stream_.nDeviceChannels[1]; i++, infos++ ) { + infos->isInput = ASIOTrue; + infos->channelNum = i + stream_.channelOffset[1]; + infos->buffers[0] = infos->buffers[1] = 0; + } + + // prepare for callbacks + stream_.sampleRate = sampleRate; + stream_.device[mode] = device; + stream_.mode = isDuplexInput ? DUPLEX : mode; + + // store this class instance before registering callbacks, that are going to use it + asioCallbackInfo = &stream_.callbackInfo; + stream_.callbackInfo.object = (void *) this; + + // Set up the ASIO callback structure and create the ASIO data buffers. + asioCallbacks.bufferSwitch = &bufferSwitch; + asioCallbacks.sampleRateDidChange = &sampleRateChanged; + asioCallbacks.asioMessage = &asioMessages; + asioCallbacks.bufferSwitchTimeInfo = NULL; + result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks ); + if ( result != ASE_OK ) { + // Standard method failed. This can happen with strict/misbehaving drivers that return valid buffer size ranges + // but only accept the preferred buffer size as parameter for ASIOCreateBuffers. eg. Creatives ASIO driver + // in that case, let's be naïve and try that instead + *bufferSize = preferSize; + stream_.bufferSize = *bufferSize; + result = ASIOCreateBuffers( handle->bufferInfos, nChannels, stream_.bufferSize, &asioCallbacks ); + } + + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") creating buffers."; + errorText_ = errorStream_.str(); + goto error; + } + buffersAllocated = true; + stream_.state = STREAM_STOPPED; + + // Set flags for buffer conversion. + stream_.doConvertBuffer[mode] = false; + if ( stream_.userFormat != stream_.deviceFormat[mode] ) + stream_.doConvertBuffer[mode] = true; + if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] && + stream_.nUserChannels[mode] > 1 ) + stream_.doConvertBuffer[mode] = true; + + // Allocate necessary internal buffers + unsigned long bufferBytes; + bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat ); + stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 ); + if ( stream_.userBuffer[mode] == NULL ) { + errorText_ = "RtApiAsio::probeDeviceOpen: error allocating user buffer memory."; + goto error; + } + + if ( stream_.doConvertBuffer[mode] ) { + + bool makeBuffer = true; + bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] ); + if ( isDuplexInput && stream_.deviceBuffer ) { + unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] ); + if ( bufferBytes <= bytesOut ) makeBuffer = false; + } + + if ( makeBuffer ) { + bufferBytes *= *bufferSize; + if ( stream_.deviceBuffer ) free( stream_.deviceBuffer ); + stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 ); + if ( stream_.deviceBuffer == NULL ) { + errorText_ = "RtApiAsio::probeDeviceOpen: error allocating device buffer memory."; + goto error; + } + } + } + + // Determine device latencies + long inputLatency, outputLatency; + result = ASIOGetLatencies( &inputLatency, &outputLatency ); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::probeDeviceOpen: driver (" << driverName << ") error (" << getAsioErrorString( result ) << ") getting latency."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING); // warn but don't fail + } + else { + stream_.latency[0] = outputLatency; + stream_.latency[1] = inputLatency; + } + + // Setup the buffer conversion information structure. We don't use + // buffers to do channel offsets, so we override that parameter + // here. + if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, 0 ); + + return SUCCESS; + + error: + if ( !isDuplexInput ) { + // the cleanup for error in the duplex input, is done by RtApi::openStream + // So we clean up for single channel only + + if ( buffersAllocated ) + ASIODisposeBuffers(); + + drivers.removeCurrentDriver(); + + if ( handle ) { + CloseHandle( handle->condition ); + if ( handle->bufferInfos ) + free( handle->bufferInfos ); + + delete handle; + stream_.apiHandle = 0; + } + + + if ( stream_.userBuffer[mode] ) { + free( stream_.userBuffer[mode] ); + stream_.userBuffer[mode] = 0; + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + } + + return FAILURE; +}//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +void RtApiAsio :: closeStream() +{ + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiAsio::closeStream(): no open stream to close!"; + error( RtAudioError::WARNING ); + return; + } + + if ( stream_.state == STREAM_RUNNING ) { + stream_.state = STREAM_STOPPED; + ASIOStop(); + } + ASIODisposeBuffers(); + drivers.removeCurrentDriver(); + + AsioHandle *handle = (AsioHandle *) stream_.apiHandle; + if ( handle ) { + CloseHandle( handle->condition ); + if ( handle->bufferInfos ) + free( handle->bufferInfos ); + delete handle; + stream_.apiHandle = 0; + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + stream_.mode = UNINITIALIZED; + stream_.state = STREAM_CLOSED; +} + +bool stopThreadCalled = false; + +void RtApiAsio :: startStream() +{ + verifyStream(); + if ( stream_.state == STREAM_RUNNING ) { + errorText_ = "RtApiAsio::startStream(): the stream is already running!"; + error( RtAudioError::WARNING ); + return; + } + + AsioHandle *handle = (AsioHandle *) stream_.apiHandle; + ASIOError result = ASIOStart(); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::startStream: error (" << getAsioErrorString( result ) << ") starting device."; + errorText_ = errorStream_.str(); + goto unlock; + } + + handle->drainCounter = 0; + handle->internalDrain = false; + ResetEvent( handle->condition ); + stream_.state = STREAM_RUNNING; + asioXRun = false; + + unlock: + stopThreadCalled = false; + + if ( result == ASE_OK ) return; + error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiAsio :: stopStream() +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiAsio::stopStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + AsioHandle *handle = (AsioHandle *) stream_.apiHandle; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + if ( handle->drainCounter == 0 ) { + handle->drainCounter = 2; + WaitForSingleObject( handle->condition, INFINITE ); // block until signaled + } + } + + stream_.state = STREAM_STOPPED; + + ASIOError result = ASIOStop(); + if ( result != ASE_OK ) { + errorStream_ << "RtApiAsio::stopStream: error (" << getAsioErrorString( result ) << ") stopping device."; + errorText_ = errorStream_.str(); + } + + if ( result == ASE_OK ) return; + error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiAsio :: abortStream() +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiAsio::abortStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + // The following lines were commented-out because some behavior was + // noted where the device buffers need to be zeroed to avoid + // continuing sound, even when the device buffers are completely + // disposed. So now, calling abort is the same as calling stop. + // AsioHandle *handle = (AsioHandle *) stream_.apiHandle; + // handle->drainCounter = 2; + stopStream(); +} + +// This function will be called by a spawned thread when the user +// callback function signals that the stream should be stopped or +// aborted. It is necessary to handle it this way because the +// callbackEvent() function must return before the ASIOStop() +// function will return. +static unsigned __stdcall asioStopStream( void *ptr ) +{ + CallbackInfo *info = (CallbackInfo *) ptr; + RtApiAsio *object = (RtApiAsio *) info->object; + + object->stopStream(); + _endthreadex( 0 ); + return 0; +} + +bool RtApiAsio :: callbackEvent( long bufferIndex ) +{ + if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) return SUCCESS; + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiAsio::callbackEvent(): the stream is closed ... this shouldn't happen!"; + error( RtAudioError::WARNING ); + return FAILURE; + } + + CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo; + AsioHandle *handle = (AsioHandle *) stream_.apiHandle; + + // Check if we were draining the stream and signal if finished. + if ( handle->drainCounter > 3 ) { + + stream_.state = STREAM_STOPPING; + if ( handle->internalDrain == false ) + SetEvent( handle->condition ); + else { // spawn a thread to stop the stream + unsigned threadId; + stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream, + &stream_.callbackInfo, 0, &threadId ); + } + return SUCCESS; + } + + // Invoke user callback to get fresh output data UNLESS we are + // draining stream. + if ( handle->drainCounter == 0 ) { + RtAudioCallback callback = (RtAudioCallback) info->callback; + double streamTime = getStreamTime(); + RtAudioStreamStatus status = 0; + if ( stream_.mode != INPUT && asioXRun == true ) { + status |= RTAUDIO_OUTPUT_UNDERFLOW; + asioXRun = false; + } + if ( stream_.mode != OUTPUT && asioXRun == true ) { + status |= RTAUDIO_INPUT_OVERFLOW; + asioXRun = false; + } + int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1], + stream_.bufferSize, streamTime, status, info->userData ); + if ( cbReturnValue == 2 ) { + stream_.state = STREAM_STOPPING; + handle->drainCounter = 2; + unsigned threadId; + stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &asioStopStream, + &stream_.callbackInfo, 0, &threadId ); + return SUCCESS; + } + else if ( cbReturnValue == 1 ) { + handle->drainCounter = 1; + handle->internalDrain = true; + } + } + + unsigned int nChannels, bufferBytes, i, j; + nChannels = stream_.nDeviceChannels[0] + stream_.nDeviceChannels[1]; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + + bufferBytes = stream_.bufferSize * formatBytes( stream_.deviceFormat[0] ); + + if ( handle->drainCounter > 1 ) { // write zeros to the output stream + + for ( i=0, j=0; i<nChannels; i++ ) { + if ( handle->bufferInfos[i].isInput != ASIOTrue ) + memset( handle->bufferInfos[i].buffers[bufferIndex], 0, bufferBytes ); + } + + } + else if ( stream_.doConvertBuffer[0] ) { + + convertBuffer( stream_.deviceBuffer, stream_.userBuffer[0], stream_.convertInfo[0] ); + if ( stream_.doByteSwap[0] ) + byteSwapBuffer( stream_.deviceBuffer, + stream_.bufferSize * stream_.nDeviceChannels[0], + stream_.deviceFormat[0] ); + + for ( i=0, j=0; i<nChannels; i++ ) { + if ( handle->bufferInfos[i].isInput != ASIOTrue ) + memcpy( handle->bufferInfos[i].buffers[bufferIndex], + &stream_.deviceBuffer[j++*bufferBytes], bufferBytes ); + } + + } + else { + + if ( stream_.doByteSwap[0] ) + byteSwapBuffer( stream_.userBuffer[0], + stream_.bufferSize * stream_.nUserChannels[0], + stream_.userFormat ); + + for ( i=0, j=0; i<nChannels; i++ ) { + if ( handle->bufferInfos[i].isInput != ASIOTrue ) + memcpy( handle->bufferInfos[i].buffers[bufferIndex], + &stream_.userBuffer[0][bufferBytes*j++], bufferBytes ); + } + + } + } + + // Don't bother draining input + if ( handle->drainCounter ) { + handle->drainCounter++; + goto unlock; + } + + if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) { + + bufferBytes = stream_.bufferSize * formatBytes(stream_.deviceFormat[1]); + + if (stream_.doConvertBuffer[1]) { + + // Always interleave ASIO input data. + for ( i=0, j=0; i<nChannels; i++ ) { + if ( handle->bufferInfos[i].isInput == ASIOTrue ) + memcpy( &stream_.deviceBuffer[j++*bufferBytes], + handle->bufferInfos[i].buffers[bufferIndex], + bufferBytes ); + } + + if ( stream_.doByteSwap[1] ) + byteSwapBuffer( stream_.deviceBuffer, + stream_.bufferSize * stream_.nDeviceChannels[1], + stream_.deviceFormat[1] ); + convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] ); + + } + else { + for ( i=0, j=0; i<nChannels; i++ ) { + if ( handle->bufferInfos[i].isInput == ASIOTrue ) { + memcpy( &stream_.userBuffer[1][bufferBytes*j++], + handle->bufferInfos[i].buffers[bufferIndex], + bufferBytes ); + } + } + + if ( stream_.doByteSwap[1] ) + byteSwapBuffer( stream_.userBuffer[1], + stream_.bufferSize * stream_.nUserChannels[1], + stream_.userFormat ); + } + } + + unlock: + // The following call was suggested by Malte Clasen. While the API + // documentation indicates it should not be required, some device + // drivers apparently do not function correctly without it. + ASIOOutputReady(); + + RtApi::tickStreamTime(); + return SUCCESS; +} + +static void sampleRateChanged( ASIOSampleRate sRate ) +{ + // The ASIO documentation says that this usually only happens during + // external sync. Audio processing is not stopped by the driver, + // actual sample rate might not have even changed, maybe only the + // sample rate status of an AES/EBU or S/PDIF digital input at the + // audio device. + + RtApi *object = (RtApi *) asioCallbackInfo->object; + try { + object->stopStream(); + } + catch ( RtAudioError &exception ) { + std::cerr << "\nRtApiAsio: sampleRateChanged() error (" << exception.getMessage() << ")!\n" << std::endl; + return; + } + + std::cerr << "\nRtApiAsio: driver reports sample rate changed to " << sRate << " ... stream stopped!!!\n" << std::endl; +} + +static long asioMessages( long selector, long value, void* /*message*/, double* /*opt*/ ) +{ + long ret = 0; + + switch( selector ) { + case kAsioSelectorSupported: + if ( value == kAsioResetRequest + || value == kAsioEngineVersion + || value == kAsioResyncRequest + || value == kAsioLatenciesChanged + // The following three were added for ASIO 2.0, you don't + // necessarily have to support them. + || value == kAsioSupportsTimeInfo + || value == kAsioSupportsTimeCode + || value == kAsioSupportsInputMonitor) + ret = 1L; + break; + case kAsioResetRequest: + // Defer the task and perform the reset of the driver during the + // next "safe" situation. You cannot reset the driver right now, + // as this code is called from the driver. Reset the driver is + // done by completely destruct is. I.e. ASIOStop(), + // ASIODisposeBuffers(), Destruction Afterwards you initialize the + // driver again. + std::cerr << "\nRtApiAsio: driver reset requested!!!" << std::endl; + ret = 1L; + break; + case kAsioResyncRequest: + // This informs the application that the driver encountered some + // non-fatal data loss. It is used for synchronization purposes + // of different media. Added mainly to work around the Win16Mutex + // problems in Windows 95/98 with the Windows Multimedia system, + // which could lose data because the Mutex was held too long by + // another thread. However a driver can issue it in other + // situations, too. + // std::cerr << "\nRtApiAsio: driver resync requested!!!" << std::endl; + asioXRun = true; + ret = 1L; + break; + case kAsioLatenciesChanged: + // This will inform the host application that the drivers were + // latencies changed. Beware, it this does not mean that the + // buffer sizes have changed! You might need to update internal + // delay data. + std::cerr << "\nRtApiAsio: driver latency may have changed!!!" << std::endl; + ret = 1L; + break; + case kAsioEngineVersion: + // Return the supported ASIO version of the host application. If + // a host application does not implement this selector, ASIO 1.0 + // is assumed by the driver. + ret = 2L; + break; + case kAsioSupportsTimeInfo: + // Informs the driver whether the + // asioCallbacks.bufferSwitchTimeInfo() callback is supported. + // For compatibility with ASIO 1.0 drivers the host application + // should always support the "old" bufferSwitch method, too. + ret = 0; + break; + case kAsioSupportsTimeCode: + // Informs the driver whether application is interested in time + // code info. If an application does not need to know about time + // code, the driver has less work to do. + ret = 0; + break; + } + return ret; +} + +static const char* getAsioErrorString( ASIOError result ) +{ + struct Messages + { + ASIOError value; + const char*message; + }; + + static const Messages m[] = + { + { ASE_NotPresent, "Hardware input or output is not present or available." }, + { ASE_HWMalfunction, "Hardware is malfunctioning." }, + { ASE_InvalidParameter, "Invalid input parameter." }, + { ASE_InvalidMode, "Invalid mode." }, + { ASE_SPNotAdvancing, "Sample position not advancing." }, + { ASE_NoClock, "Sample clock or rate cannot be determined or is not present." }, + { ASE_NoMemory, "Not enough memory to complete the request." } + }; + + for ( unsigned int i = 0; i < sizeof(m)/sizeof(m[0]); ++i ) + if ( m[i].value == result ) return m[i].message; + + return "Unknown error."; +} + +//******************** End of __WINDOWS_ASIO__ *********************// +#endif + + +#if defined(__WINDOWS_WASAPI__) // Windows WASAPI API + +// Authored by Marcus Tomlinson <themarcustomlinson@gmail.com>, April 2014 +// - Introduces support for the Windows WASAPI API +// - Aims to deliver bit streams to and from hardware at the lowest possible latency, via the absolute minimum buffer sizes required +// - Provides flexible stream configuration to an otherwise strict and inflexible WASAPI interface +// - Includes automatic internal conversion of sample rate and buffer size between hardware and the user + +#ifndef INITGUID + #define INITGUID +#endif +#include <audioclient.h> +#include <avrt.h> +#include <mmdeviceapi.h> +#include <functiondiscoverykeys_devpkey.h> + +//============================================================================= + +#define SAFE_RELEASE( objectPtr )\ +if ( objectPtr )\ +{\ + objectPtr->Release();\ + objectPtr = NULL;\ +} + +typedef HANDLE ( __stdcall *TAvSetMmThreadCharacteristicsPtr )( LPCWSTR TaskName, LPDWORD TaskIndex ); + +//----------------------------------------------------------------------------- + +// WASAPI dictates stream sample rate, format, channel count, and in some cases, buffer size. +// Therefore we must perform all necessary conversions to user buffers in order to satisfy these +// requirements. WasapiBuffer ring buffers are used between HwIn->UserIn and UserOut->HwOut to +// provide intermediate storage for read / write synchronization. +class WasapiBuffer +{ +public: + WasapiBuffer() + : buffer_( NULL ), + bufferSize_( 0 ), + inIndex_( 0 ), + outIndex_( 0 ) {} + + ~WasapiBuffer() { + free( buffer_ ); + } + + // sets the length of the internal ring buffer + void setBufferSize( unsigned int bufferSize, unsigned int formatBytes ) { + free( buffer_ ); + + buffer_ = ( char* ) calloc( bufferSize, formatBytes ); + + bufferSize_ = bufferSize; + inIndex_ = 0; + outIndex_ = 0; + } + + // attempt to push a buffer into the ring buffer at the current "in" index + bool pushBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format ) + { + if ( !buffer || // incoming buffer is NULL + bufferSize == 0 || // incoming buffer has no data + bufferSize > bufferSize_ ) // incoming buffer too large + { + return false; + } + + unsigned int relOutIndex = outIndex_; + unsigned int inIndexEnd = inIndex_ + bufferSize; + if ( relOutIndex < inIndex_ && inIndexEnd >= bufferSize_ ) { + relOutIndex += bufferSize_; + } + + // "in" index can end on the "out" index but cannot begin at it + if ( inIndex_ <= relOutIndex && inIndexEnd > relOutIndex ) { + return false; // not enough space between "in" index and "out" index + } + + // copy buffer from external to internal + int fromZeroSize = inIndex_ + bufferSize - bufferSize_; + fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize; + int fromInSize = bufferSize - fromZeroSize; + + switch( format ) + { + case RTAUDIO_SINT8: + memcpy( &( ( char* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( char ) ); + memcpy( buffer_, &( ( char* ) buffer )[fromInSize], fromZeroSize * sizeof( char ) ); + break; + case RTAUDIO_SINT16: + memcpy( &( ( short* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( short ) ); + memcpy( buffer_, &( ( short* ) buffer )[fromInSize], fromZeroSize * sizeof( short ) ); + break; + case RTAUDIO_SINT24: + memcpy( &( ( S24* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( S24 ) ); + memcpy( buffer_, &( ( S24* ) buffer )[fromInSize], fromZeroSize * sizeof( S24 ) ); + break; + case RTAUDIO_SINT32: + memcpy( &( ( int* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( int ) ); + memcpy( buffer_, &( ( int* ) buffer )[fromInSize], fromZeroSize * sizeof( int ) ); + break; + case RTAUDIO_FLOAT32: + memcpy( &( ( float* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( float ) ); + memcpy( buffer_, &( ( float* ) buffer )[fromInSize], fromZeroSize * sizeof( float ) ); + break; + case RTAUDIO_FLOAT64: + memcpy( &( ( double* ) buffer_ )[inIndex_], buffer, fromInSize * sizeof( double ) ); + memcpy( buffer_, &( ( double* ) buffer )[fromInSize], fromZeroSize * sizeof( double ) ); + break; + } + + // update "in" index + inIndex_ += bufferSize; + inIndex_ %= bufferSize_; + + return true; + } + + // attempt to pull a buffer from the ring buffer from the current "out" index + bool pullBuffer( char* buffer, unsigned int bufferSize, RtAudioFormat format ) + { + if ( !buffer || // incoming buffer is NULL + bufferSize == 0 || // incoming buffer has no data + bufferSize > bufferSize_ ) // incoming buffer too large + { + return false; + } + + unsigned int relInIndex = inIndex_; + unsigned int outIndexEnd = outIndex_ + bufferSize; + if ( relInIndex < outIndex_ && outIndexEnd >= bufferSize_ ) { + relInIndex += bufferSize_; + } + + // "out" index can begin at and end on the "in" index + if ( outIndex_ < relInIndex && outIndexEnd > relInIndex ) { + return false; // not enough space between "out" index and "in" index + } + + // copy buffer from internal to external + int fromZeroSize = outIndex_ + bufferSize - bufferSize_; + fromZeroSize = fromZeroSize < 0 ? 0 : fromZeroSize; + int fromOutSize = bufferSize - fromZeroSize; + + switch( format ) + { + case RTAUDIO_SINT8: + memcpy( buffer, &( ( char* ) buffer_ )[outIndex_], fromOutSize * sizeof( char ) ); + memcpy( &( ( char* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( char ) ); + break; + case RTAUDIO_SINT16: + memcpy( buffer, &( ( short* ) buffer_ )[outIndex_], fromOutSize * sizeof( short ) ); + memcpy( &( ( short* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( short ) ); + break; + case RTAUDIO_SINT24: + memcpy( buffer, &( ( S24* ) buffer_ )[outIndex_], fromOutSize * sizeof( S24 ) ); + memcpy( &( ( S24* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( S24 ) ); + break; + case RTAUDIO_SINT32: + memcpy( buffer, &( ( int* ) buffer_ )[outIndex_], fromOutSize * sizeof( int ) ); + memcpy( &( ( int* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( int ) ); + break; + case RTAUDIO_FLOAT32: + memcpy( buffer, &( ( float* ) buffer_ )[outIndex_], fromOutSize * sizeof( float ) ); + memcpy( &( ( float* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( float ) ); + break; + case RTAUDIO_FLOAT64: + memcpy( buffer, &( ( double* ) buffer_ )[outIndex_], fromOutSize * sizeof( double ) ); + memcpy( &( ( double* ) buffer )[fromOutSize], buffer_, fromZeroSize * sizeof( double ) ); + break; + } + + // update "out" index + outIndex_ += bufferSize; + outIndex_ %= bufferSize_; + + return true; + } + +private: + char* buffer_; + unsigned int bufferSize_; + unsigned int inIndex_; + unsigned int outIndex_; +}; + +//----------------------------------------------------------------------------- + +// In order to satisfy WASAPI's buffer requirements, we need a means of converting sample rate +// between HW and the user. The convertBufferWasapi function is used to perform this conversion +// between HwIn->UserIn and UserOut->HwOut during the stream callback loop. +// This sample rate converter favors speed over quality, and works best with conversions between +// one rate and its multiple. +void convertBufferWasapi( char* outBuffer, + const char* inBuffer, + const unsigned int& channelCount, + const unsigned int& inSampleRate, + const unsigned int& outSampleRate, + const unsigned int& inSampleCount, + unsigned int& outSampleCount, + const RtAudioFormat& format ) +{ + // calculate the new outSampleCount and relative sampleStep + float sampleRatio = ( float ) outSampleRate / inSampleRate; + float sampleStep = 1.0f / sampleRatio; + float inSampleFraction = 0.0f; + + outSampleCount = ( unsigned int ) roundf( inSampleCount * sampleRatio ); + + // frame-by-frame, copy each relative input sample into it's corresponding output sample + for ( unsigned int outSample = 0; outSample < outSampleCount; outSample++ ) + { + unsigned int inSample = ( unsigned int ) inSampleFraction; + + switch ( format ) + { + case RTAUDIO_SINT8: + memcpy( &( ( char* ) outBuffer )[ outSample * channelCount ], &( ( char* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( char ) ); + break; + case RTAUDIO_SINT16: + memcpy( &( ( short* ) outBuffer )[ outSample * channelCount ], &( ( short* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( short ) ); + break; + case RTAUDIO_SINT24: + memcpy( &( ( S24* ) outBuffer )[ outSample * channelCount ], &( ( S24* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( S24 ) ); + break; + case RTAUDIO_SINT32: + memcpy( &( ( int* ) outBuffer )[ outSample * channelCount ], &( ( int* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( int ) ); + break; + case RTAUDIO_FLOAT32: + memcpy( &( ( float* ) outBuffer )[ outSample * channelCount ], &( ( float* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( float ) ); + break; + case RTAUDIO_FLOAT64: + memcpy( &( ( double* ) outBuffer )[ outSample * channelCount ], &( ( double* ) inBuffer )[ inSample * channelCount ], channelCount * sizeof( double ) ); + break; + } + + // jump to next in sample + inSampleFraction += sampleStep; + } +} + +//----------------------------------------------------------------------------- + +// A structure to hold various information related to the WASAPI implementation. +struct WasapiHandle +{ + IAudioClient* captureAudioClient; + IAudioClient* renderAudioClient; + IAudioCaptureClient* captureClient; + IAudioRenderClient* renderClient; + HANDLE captureEvent; + HANDLE renderEvent; + + WasapiHandle() + : captureAudioClient( NULL ), + renderAudioClient( NULL ), + captureClient( NULL ), + renderClient( NULL ), + captureEvent( NULL ), + renderEvent( NULL ) {} +}; + +//============================================================================= + +RtApiWasapi::RtApiWasapi() + : coInitialized_( false ), deviceEnumerator_( NULL ) +{ + // WASAPI can run either apartment or multi-threaded + HRESULT hr = CoInitialize( NULL ); + if ( !FAILED( hr ) ) + coInitialized_ = true; + + // Instantiate device enumerator + hr = CoCreateInstance( __uuidof( MMDeviceEnumerator ), NULL, + CLSCTX_ALL, __uuidof( IMMDeviceEnumerator ), + ( void** ) &deviceEnumerator_ ); + + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::RtApiWasapi: Unable to instantiate device enumerator"; + error( RtAudioError::DRIVER_ERROR ); + } +} + +//----------------------------------------------------------------------------- + +RtApiWasapi::~RtApiWasapi() +{ + if ( stream_.state != STREAM_CLOSED ) + closeStream(); + + SAFE_RELEASE( deviceEnumerator_ ); + + // If this object previously called CoInitialize() + if ( coInitialized_ ) + CoUninitialize(); +} + +//============================================================================= + +unsigned int RtApiWasapi::getDeviceCount( void ) +{ + unsigned int captureDeviceCount = 0; + unsigned int renderDeviceCount = 0; + + IMMDeviceCollection* captureDevices = NULL; + IMMDeviceCollection* renderDevices = NULL; + + // Count capture devices + errorText_.clear(); + HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device collection."; + goto Exit; + } + + hr = captureDevices->GetCount( &captureDeviceCount ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve capture device count."; + goto Exit; + } + + // Count render devices + hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device collection."; + goto Exit; + } + + hr = renderDevices->GetCount( &renderDeviceCount ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceCount: Unable to retrieve render device count."; + goto Exit; + } + +Exit: + // release all references + SAFE_RELEASE( captureDevices ); + SAFE_RELEASE( renderDevices ); + + if ( errorText_.empty() ) + return captureDeviceCount + renderDeviceCount; + + error( RtAudioError::DRIVER_ERROR ); + return 0; +} + +//----------------------------------------------------------------------------- + +RtAudio::DeviceInfo RtApiWasapi::getDeviceInfo( unsigned int device ) +{ + RtAudio::DeviceInfo info; + unsigned int captureDeviceCount = 0; + unsigned int renderDeviceCount = 0; + std::string defaultDeviceName; + bool isCaptureDevice = false; + + PROPVARIANT deviceNameProp; + PROPVARIANT defaultDeviceNameProp; + + IMMDeviceCollection* captureDevices = NULL; + IMMDeviceCollection* renderDevices = NULL; + IMMDevice* devicePtr = NULL; + IMMDevice* defaultDevicePtr = NULL; + IAudioClient* audioClient = NULL; + IPropertyStore* devicePropStore = NULL; + IPropertyStore* defaultDevicePropStore = NULL; + + WAVEFORMATEX* deviceFormat = NULL; + WAVEFORMATEX* closestMatchFormat = NULL; + + // probed + info.probed = false; + + // Count capture devices + errorText_.clear(); + RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR; + HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device collection."; + goto Exit; + } + + hr = captureDevices->GetCount( &captureDeviceCount ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device count."; + goto Exit; + } + + // Count render devices + hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device collection."; + goto Exit; + } + + hr = renderDevices->GetCount( &renderDeviceCount ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device count."; + goto Exit; + } + + // validate device index + if ( device >= captureDeviceCount + renderDeviceCount ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Invalid device index."; + errorType = RtAudioError::INVALID_USE; + goto Exit; + } + + // determine whether index falls within capture or render devices + if ( device >= renderDeviceCount ) { + hr = captureDevices->Item( device - renderDeviceCount, &devicePtr ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve capture device handle."; + goto Exit; + } + isCaptureDevice = true; + } + else { + hr = renderDevices->Item( device, &devicePtr ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve render device handle."; + goto Exit; + } + isCaptureDevice = false; + } + + // get default device name + if ( isCaptureDevice ) { + hr = deviceEnumerator_->GetDefaultAudioEndpoint( eCapture, eConsole, &defaultDevicePtr ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default capture device handle."; + goto Exit; + } + } + else { + hr = deviceEnumerator_->GetDefaultAudioEndpoint( eRender, eConsole, &defaultDevicePtr ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default render device handle."; + goto Exit; + } + } + + hr = defaultDevicePtr->OpenPropertyStore( STGM_READ, &defaultDevicePropStore ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open default device property store."; + goto Exit; + } + PropVariantInit( &defaultDeviceNameProp ); + + hr = defaultDevicePropStore->GetValue( PKEY_Device_FriendlyName, &defaultDeviceNameProp ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve default device property: PKEY_Device_FriendlyName."; + goto Exit; + } + + defaultDeviceName = convertCharPointerToStdString(defaultDeviceNameProp.pwszVal); + + // name + hr = devicePtr->OpenPropertyStore( STGM_READ, &devicePropStore ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to open device property store."; + goto Exit; + } + + PropVariantInit( &deviceNameProp ); + + hr = devicePropStore->GetValue( PKEY_Device_FriendlyName, &deviceNameProp ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device property: PKEY_Device_FriendlyName."; + goto Exit; + } + + info.name =convertCharPointerToStdString(deviceNameProp.pwszVal); + + // is default + if ( isCaptureDevice ) { + info.isDefaultInput = info.name == defaultDeviceName; + info.isDefaultOutput = false; + } + else { + info.isDefaultInput = false; + info.isDefaultOutput = info.name == defaultDeviceName; + } + + // channel count + hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL, NULL, ( void** ) &audioClient ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device audio client."; + goto Exit; + } + + hr = audioClient->GetMixFormat( &deviceFormat ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::getDeviceInfo: Unable to retrieve device mix format."; + goto Exit; + } + + if ( isCaptureDevice ) { + info.inputChannels = deviceFormat->nChannels; + info.outputChannels = 0; + info.duplexChannels = 0; + } + else { + info.inputChannels = 0; + info.outputChannels = deviceFormat->nChannels; + info.duplexChannels = 0; + } + + // sample rates + info.sampleRates.clear(); + + // allow support for all sample rates as we have a built-in sample rate converter + for ( unsigned int i = 0; i < MAX_SAMPLE_RATES; i++ ) { + info.sampleRates.push_back( SAMPLE_RATES[i] ); + } + info.preferredSampleRate = deviceFormat->nSamplesPerSec; + + // native format + info.nativeFormats = 0; + + if ( deviceFormat->wFormatTag == WAVE_FORMAT_IEEE_FLOAT || + ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE && + ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ) ) + { + if ( deviceFormat->wBitsPerSample == 32 ) { + info.nativeFormats |= RTAUDIO_FLOAT32; + } + else if ( deviceFormat->wBitsPerSample == 64 ) { + info.nativeFormats |= RTAUDIO_FLOAT64; + } + } + else if ( deviceFormat->wFormatTag == WAVE_FORMAT_PCM || + ( deviceFormat->wFormatTag == WAVE_FORMAT_EXTENSIBLE && + ( ( WAVEFORMATEXTENSIBLE* ) deviceFormat )->SubFormat == KSDATAFORMAT_SUBTYPE_PCM ) ) + { + if ( deviceFormat->wBitsPerSample == 8 ) { + info.nativeFormats |= RTAUDIO_SINT8; + } + else if ( deviceFormat->wBitsPerSample == 16 ) { + info.nativeFormats |= RTAUDIO_SINT16; + } + else if ( deviceFormat->wBitsPerSample == 24 ) { + info.nativeFormats |= RTAUDIO_SINT24; + } + else if ( deviceFormat->wBitsPerSample == 32 ) { + info.nativeFormats |= RTAUDIO_SINT32; + } + } + + // probed + info.probed = true; + +Exit: + // release all references + PropVariantClear( &deviceNameProp ); + PropVariantClear( &defaultDeviceNameProp ); + + SAFE_RELEASE( captureDevices ); + SAFE_RELEASE( renderDevices ); + SAFE_RELEASE( devicePtr ); + SAFE_RELEASE( defaultDevicePtr ); + SAFE_RELEASE( audioClient ); + SAFE_RELEASE( devicePropStore ); + SAFE_RELEASE( defaultDevicePropStore ); + + CoTaskMemFree( deviceFormat ); + CoTaskMemFree( closestMatchFormat ); + + if ( !errorText_.empty() ) + error( errorType ); + return info; +} + +//----------------------------------------------------------------------------- + +unsigned int RtApiWasapi::getDefaultOutputDevice( void ) +{ + for ( unsigned int i = 0; i < getDeviceCount(); i++ ) { + if ( getDeviceInfo( i ).isDefaultOutput ) { + return i; + } + } + + return 0; +} + +//----------------------------------------------------------------------------- + +unsigned int RtApiWasapi::getDefaultInputDevice( void ) +{ + for ( unsigned int i = 0; i < getDeviceCount(); i++ ) { + if ( getDeviceInfo( i ).isDefaultInput ) { + return i; + } + } + + return 0; +} + +//----------------------------------------------------------------------------- + +void RtApiWasapi::closeStream( void ) +{ + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiWasapi::closeStream: No open stream to close."; + error( RtAudioError::WARNING ); + return; + } + + if ( stream_.state != STREAM_STOPPED ) + stopStream(); + + // clean up stream memory + SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) + SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) + + SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->captureClient ) + SAFE_RELEASE( ( ( WasapiHandle* ) stream_.apiHandle )->renderClient ) + + if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent ) + CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent ); + + if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent ) + CloseHandle( ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent ); + + delete ( WasapiHandle* ) stream_.apiHandle; + stream_.apiHandle = NULL; + + for ( int i = 0; i < 2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + // update stream state + stream_.state = STREAM_CLOSED; +} + +//----------------------------------------------------------------------------- + +void RtApiWasapi::startStream( void ) +{ + verifyStream(); + + if ( stream_.state == STREAM_RUNNING ) { + errorText_ = "RtApiWasapi::startStream: The stream is already running."; + error( RtAudioError::WARNING ); + return; + } + + // update stream state + stream_.state = STREAM_RUNNING; + + // create WASAPI stream thread + stream_.callbackInfo.thread = ( ThreadHandle ) CreateThread( NULL, 0, runWasapiThread, this, CREATE_SUSPENDED, NULL ); + + if ( !stream_.callbackInfo.thread ) { + errorText_ = "RtApiWasapi::startStream: Unable to instantiate callback thread."; + error( RtAudioError::THREAD_ERROR ); + } + else { + SetThreadPriority( ( void* ) stream_.callbackInfo.thread, stream_.callbackInfo.priority ); + ResumeThread( ( void* ) stream_.callbackInfo.thread ); + } +} + +//----------------------------------------------------------------------------- + +void RtApiWasapi::stopStream( void ) +{ + verifyStream(); + + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiWasapi::stopStream: The stream is already stopped."; + error( RtAudioError::WARNING ); + return; + } + + // inform stream thread by setting stream state to STREAM_STOPPING + stream_.state = STREAM_STOPPING; + + // wait until stream thread is stopped + while( stream_.state != STREAM_STOPPED ) { + Sleep( 1 ); + } + + // Wait for the last buffer to play before stopping. + Sleep( 1000 * stream_.bufferSize / stream_.sampleRate ); + + // stop capture client if applicable + if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) { + HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop(); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::stopStream: Unable to stop capture stream."; + error( RtAudioError::DRIVER_ERROR ); + return; + } + } + + // stop render client if applicable + if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) { + HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop(); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::stopStream: Unable to stop render stream."; + error( RtAudioError::DRIVER_ERROR ); + return; + } + } + + // close thread handle + if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) { + errorText_ = "RtApiWasapi::stopStream: Unable to close callback thread."; + error( RtAudioError::THREAD_ERROR ); + return; + } + + stream_.callbackInfo.thread = (ThreadHandle) NULL; +} + +//----------------------------------------------------------------------------- + +void RtApiWasapi::abortStream( void ) +{ + verifyStream(); + + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiWasapi::abortStream: The stream is already stopped."; + error( RtAudioError::WARNING ); + return; + } + + // inform stream thread by setting stream state to STREAM_STOPPING + stream_.state = STREAM_STOPPING; + + // wait until stream thread is stopped + while ( stream_.state != STREAM_STOPPED ) { + Sleep( 1 ); + } + + // stop capture client if applicable + if ( ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient ) { + HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient->Stop(); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::abortStream: Unable to stop capture stream."; + error( RtAudioError::DRIVER_ERROR ); + return; + } + } + + // stop render client if applicable + if ( ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient ) { + HRESULT hr = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient->Stop(); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::abortStream: Unable to stop render stream."; + error( RtAudioError::DRIVER_ERROR ); + return; + } + } + + // close thread handle + if ( stream_.callbackInfo.thread && !CloseHandle( ( void* ) stream_.callbackInfo.thread ) ) { + errorText_ = "RtApiWasapi::abortStream: Unable to close callback thread."; + error( RtAudioError::THREAD_ERROR ); + return; + } + + stream_.callbackInfo.thread = (ThreadHandle) NULL; +} + +//----------------------------------------------------------------------------- + +bool RtApiWasapi::probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels, + unsigned int firstChannel, unsigned int sampleRate, + RtAudioFormat format, unsigned int* bufferSize, + RtAudio::StreamOptions* options ) +{ + bool methodResult = FAILURE; + unsigned int captureDeviceCount = 0; + unsigned int renderDeviceCount = 0; + + IMMDeviceCollection* captureDevices = NULL; + IMMDeviceCollection* renderDevices = NULL; + IMMDevice* devicePtr = NULL; + WAVEFORMATEX* deviceFormat = NULL; + unsigned int bufferBytes; + stream_.state = STREAM_STOPPED; + + // create API Handle if not already created + if ( !stream_.apiHandle ) + stream_.apiHandle = ( void* ) new WasapiHandle(); + + // Count capture devices + errorText_.clear(); + RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR; + HRESULT hr = deviceEnumerator_->EnumAudioEndpoints( eCapture, DEVICE_STATE_ACTIVE, &captureDevices ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device collection."; + goto Exit; + } + + hr = captureDevices->GetCount( &captureDeviceCount ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device count."; + goto Exit; + } + + // Count render devices + hr = deviceEnumerator_->EnumAudioEndpoints( eRender, DEVICE_STATE_ACTIVE, &renderDevices ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device collection."; + goto Exit; + } + + hr = renderDevices->GetCount( &renderDeviceCount ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device count."; + goto Exit; + } + + // validate device index + if ( device >= captureDeviceCount + renderDeviceCount ) { + errorType = RtAudioError::INVALID_USE; + errorText_ = "RtApiWasapi::probeDeviceOpen: Invalid device index."; + goto Exit; + } + + // determine whether index falls within capture or render devices + if ( device >= renderDeviceCount ) { + if ( mode != INPUT ) { + errorType = RtAudioError::INVALID_USE; + errorText_ = "RtApiWasapi::probeDeviceOpen: Capture device selected as output device."; + goto Exit; + } + + // retrieve captureAudioClient from devicePtr + IAudioClient*& captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient; + + hr = captureDevices->Item( device - renderDeviceCount, &devicePtr ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve capture device handle."; + goto Exit; + } + + hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL, + NULL, ( void** ) &captureAudioClient ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client."; + goto Exit; + } + + hr = captureAudioClient->GetMixFormat( &deviceFormat ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format."; + goto Exit; + } + + stream_.nDeviceChannels[mode] = deviceFormat->nChannels; + captureAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] ); + } + else { + if ( mode != OUTPUT ) { + errorType = RtAudioError::INVALID_USE; + errorText_ = "RtApiWasapi::probeDeviceOpen: Render device selected as input device."; + goto Exit; + } + + // retrieve renderAudioClient from devicePtr + IAudioClient*& renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient; + + hr = renderDevices->Item( device, &devicePtr ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve render device handle."; + goto Exit; + } + + hr = devicePtr->Activate( __uuidof( IAudioClient ), CLSCTX_ALL, + NULL, ( void** ) &renderAudioClient ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device audio client."; + goto Exit; + } + + hr = renderAudioClient->GetMixFormat( &deviceFormat ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::probeDeviceOpen: Unable to retrieve device mix format."; + goto Exit; + } + + stream_.nDeviceChannels[mode] = deviceFormat->nChannels; + renderAudioClient->GetStreamLatency( ( long long* ) &stream_.latency[mode] ); + } + + // fill stream data + if ( ( stream_.mode == OUTPUT && mode == INPUT ) || + ( stream_.mode == INPUT && mode == OUTPUT ) ) { + stream_.mode = DUPLEX; + } + else { + stream_.mode = mode; + } + + stream_.device[mode] = device; + stream_.doByteSwap[mode] = false; + stream_.sampleRate = sampleRate; + stream_.bufferSize = *bufferSize; + stream_.nBuffers = 1; + stream_.nUserChannels[mode] = channels; + stream_.channelOffset[mode] = firstChannel; + stream_.userFormat = format; + stream_.deviceFormat[mode] = getDeviceInfo( device ).nativeFormats; + + if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) + stream_.userInterleaved = false; + else + stream_.userInterleaved = true; + stream_.deviceInterleaved[mode] = true; + + // Set flags for buffer conversion. + stream_.doConvertBuffer[mode] = false; + if ( stream_.userFormat != stream_.deviceFormat[mode] || + stream_.nUserChannels != stream_.nDeviceChannels ) + stream_.doConvertBuffer[mode] = true; + else if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] && + stream_.nUserChannels[mode] > 1 ) + stream_.doConvertBuffer[mode] = true; + + if ( stream_.doConvertBuffer[mode] ) + setConvertInfo( mode, 0 ); + + // Allocate necessary internal buffers + bufferBytes = stream_.nUserChannels[mode] * stream_.bufferSize * formatBytes( stream_.userFormat ); + + stream_.userBuffer[mode] = ( char* ) calloc( bufferBytes, 1 ); + if ( !stream_.userBuffer[mode] ) { + errorType = RtAudioError::MEMORY_ERROR; + errorText_ = "RtApiWasapi::probeDeviceOpen: Error allocating user buffer memory."; + goto Exit; + } + + if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) + stream_.callbackInfo.priority = 15; + else + stream_.callbackInfo.priority = 0; + + ///! TODO: RTAUDIO_MINIMIZE_LATENCY // Provide stream buffers directly to callback + ///! TODO: RTAUDIO_HOG_DEVICE // Exclusive mode + + methodResult = SUCCESS; + +Exit: + //clean up + SAFE_RELEASE( captureDevices ); + SAFE_RELEASE( renderDevices ); + SAFE_RELEASE( devicePtr ); + CoTaskMemFree( deviceFormat ); + + // if method failed, close the stream + if ( methodResult == FAILURE ) + closeStream(); + + if ( !errorText_.empty() ) + error( errorType ); + return methodResult; +} + +//============================================================================= + +DWORD WINAPI RtApiWasapi::runWasapiThread( void* wasapiPtr ) +{ + if ( wasapiPtr ) + ( ( RtApiWasapi* ) wasapiPtr )->wasapiThread(); + + return 0; +} + +DWORD WINAPI RtApiWasapi::stopWasapiThread( void* wasapiPtr ) +{ + if ( wasapiPtr ) + ( ( RtApiWasapi* ) wasapiPtr )->stopStream(); + + return 0; +} + +DWORD WINAPI RtApiWasapi::abortWasapiThread( void* wasapiPtr ) +{ + if ( wasapiPtr ) + ( ( RtApiWasapi* ) wasapiPtr )->abortStream(); + + return 0; +} + +//----------------------------------------------------------------------------- + +void RtApiWasapi::wasapiThread() +{ + // as this is a new thread, we must CoInitialize it + CoInitialize( NULL ); + + HRESULT hr; + + IAudioClient* captureAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureAudioClient; + IAudioClient* renderAudioClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderAudioClient; + IAudioCaptureClient* captureClient = ( ( WasapiHandle* ) stream_.apiHandle )->captureClient; + IAudioRenderClient* renderClient = ( ( WasapiHandle* ) stream_.apiHandle )->renderClient; + HANDLE captureEvent = ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent; + HANDLE renderEvent = ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent; + + WAVEFORMATEX* captureFormat = NULL; + WAVEFORMATEX* renderFormat = NULL; + float captureSrRatio = 0.0f; + float renderSrRatio = 0.0f; + WasapiBuffer captureBuffer; + WasapiBuffer renderBuffer; + + // declare local stream variables + RtAudioCallback callback = ( RtAudioCallback ) stream_.callbackInfo.callback; + BYTE* streamBuffer = NULL; + unsigned long captureFlags = 0; + unsigned int bufferFrameCount = 0; + unsigned int numFramesPadding = 0; + unsigned int convBufferSize = 0; + bool callbackPushed = false; + bool callbackPulled = false; + bool callbackStopped = false; + int callbackResult = 0; + + // convBuffer is used to store converted buffers between WASAPI and the user + char* convBuffer = NULL; + unsigned int convBuffSize = 0; + unsigned int deviceBuffSize = 0; + + errorText_.clear(); + RtAudioError::Type errorType = RtAudioError::DRIVER_ERROR; + + // Attempt to assign "Pro Audio" characteristic to thread + HMODULE AvrtDll = LoadLibrary( (LPCTSTR) "AVRT.dll" ); + if ( AvrtDll ) { + DWORD taskIndex = 0; + TAvSetMmThreadCharacteristicsPtr AvSetMmThreadCharacteristicsPtr = ( TAvSetMmThreadCharacteristicsPtr ) GetProcAddress( AvrtDll, "AvSetMmThreadCharacteristicsW" ); + AvSetMmThreadCharacteristicsPtr( L"Pro Audio", &taskIndex ); + FreeLibrary( AvrtDll ); + } + + // start capture stream if applicable + if ( captureAudioClient ) { + hr = captureAudioClient->GetMixFormat( &captureFormat ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format."; + goto Exit; + } + + captureSrRatio = ( ( float ) captureFormat->nSamplesPerSec / stream_.sampleRate ); + + // initialize capture stream according to desire buffer size + float desiredBufferSize = stream_.bufferSize * captureSrRatio; + REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / captureFormat->nSamplesPerSec ); + + if ( !captureClient ) { + hr = captureAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED, + AUDCLNT_STREAMFLAGS_EVENTCALLBACK, + desiredBufferPeriod, + desiredBufferPeriod, + captureFormat, + NULL ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize capture audio client."; + goto Exit; + } + + hr = captureAudioClient->GetService( __uuidof( IAudioCaptureClient ), + ( void** ) &captureClient ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture client handle."; + goto Exit; + } + + // configure captureEvent to trigger on every available capture buffer + captureEvent = CreateEvent( NULL, FALSE, FALSE, NULL ); + if ( !captureEvent ) { + errorType = RtAudioError::SYSTEM_ERROR; + errorText_ = "RtApiWasapi::wasapiThread: Unable to create capture event."; + goto Exit; + } + + hr = captureAudioClient->SetEventHandle( captureEvent ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to set capture event handle."; + goto Exit; + } + + ( ( WasapiHandle* ) stream_.apiHandle )->captureClient = captureClient; + ( ( WasapiHandle* ) stream_.apiHandle )->captureEvent = captureEvent; + } + + unsigned int inBufferSize = 0; + hr = captureAudioClient->GetBufferSize( &inBufferSize ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to get capture buffer size."; + goto Exit; + } + + // scale outBufferSize according to stream->user sample rate ratio + unsigned int outBufferSize = ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT]; + inBufferSize *= stream_.nDeviceChannels[INPUT]; + + // set captureBuffer size + captureBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[INPUT] ) ); + + // reset the capture stream + hr = captureAudioClient->Reset(); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to reset capture stream."; + goto Exit; + } + + // start the capture stream + hr = captureAudioClient->Start(); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to start capture stream."; + goto Exit; + } + } + + // start render stream if applicable + if ( renderAudioClient ) { + hr = renderAudioClient->GetMixFormat( &renderFormat ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve device mix format."; + goto Exit; + } + + renderSrRatio = ( ( float ) renderFormat->nSamplesPerSec / stream_.sampleRate ); + + // initialize render stream according to desire buffer size + float desiredBufferSize = stream_.bufferSize * renderSrRatio; + REFERENCE_TIME desiredBufferPeriod = ( REFERENCE_TIME ) ( ( float ) desiredBufferSize * 10000000 / renderFormat->nSamplesPerSec ); + + if ( !renderClient ) { + hr = renderAudioClient->Initialize( AUDCLNT_SHAREMODE_SHARED, + AUDCLNT_STREAMFLAGS_EVENTCALLBACK, + desiredBufferPeriod, + desiredBufferPeriod, + renderFormat, + NULL ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to initialize render audio client."; + goto Exit; + } + + hr = renderAudioClient->GetService( __uuidof( IAudioRenderClient ), + ( void** ) &renderClient ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render client handle."; + goto Exit; + } + + // configure renderEvent to trigger on every available render buffer + renderEvent = CreateEvent( NULL, FALSE, FALSE, NULL ); + if ( !renderEvent ) { + errorType = RtAudioError::SYSTEM_ERROR; + errorText_ = "RtApiWasapi::wasapiThread: Unable to create render event."; + goto Exit; + } + + hr = renderAudioClient->SetEventHandle( renderEvent ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to set render event handle."; + goto Exit; + } + + ( ( WasapiHandle* ) stream_.apiHandle )->renderClient = renderClient; + ( ( WasapiHandle* ) stream_.apiHandle )->renderEvent = renderEvent; + } + + unsigned int outBufferSize = 0; + hr = renderAudioClient->GetBufferSize( &outBufferSize ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to get render buffer size."; + goto Exit; + } + + // scale inBufferSize according to user->stream sample rate ratio + unsigned int inBufferSize = ( unsigned int ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT]; + outBufferSize *= stream_.nDeviceChannels[OUTPUT]; + + // set renderBuffer size + renderBuffer.setBufferSize( inBufferSize + outBufferSize, formatBytes( stream_.deviceFormat[OUTPUT] ) ); + + // reset the render stream + hr = renderAudioClient->Reset(); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to reset render stream."; + goto Exit; + } + + // start the render stream + hr = renderAudioClient->Start(); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to start render stream."; + goto Exit; + } + } + + if ( stream_.mode == INPUT ) { + convBuffSize = ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ); + deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ); + } + else if ( stream_.mode == OUTPUT ) { + convBuffSize = ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ); + deviceBuffSize = stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ); + } + else if ( stream_.mode == DUPLEX ) { + convBuffSize = std::max( ( size_t ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ), + ( size_t ) ( stream_.bufferSize * renderSrRatio ) * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) ); + deviceBuffSize = std::max( stream_.bufferSize * stream_.nDeviceChannels[INPUT] * formatBytes( stream_.deviceFormat[INPUT] ), + stream_.bufferSize * stream_.nDeviceChannels[OUTPUT] * formatBytes( stream_.deviceFormat[OUTPUT] ) ); + } + + convBuffer = ( char* ) malloc( convBuffSize ); + stream_.deviceBuffer = ( char* ) malloc( deviceBuffSize ); + if ( !convBuffer || !stream_.deviceBuffer ) { + errorType = RtAudioError::MEMORY_ERROR; + errorText_ = "RtApiWasapi::wasapiThread: Error allocating device buffer memory."; + goto Exit; + } + + // stream process loop + while ( stream_.state != STREAM_STOPPING ) { + if ( !callbackPulled ) { + // Callback Input + // ============== + // 1. Pull callback buffer from inputBuffer + // 2. If 1. was successful: Convert callback buffer to user sample rate and channel count + // Convert callback buffer to user format + + if ( captureAudioClient ) { + // Pull callback buffer from inputBuffer + callbackPulled = captureBuffer.pullBuffer( convBuffer, + ( unsigned int ) ( stream_.bufferSize * captureSrRatio ) * stream_.nDeviceChannels[INPUT], + stream_.deviceFormat[INPUT] ); + + if ( callbackPulled ) { + // Convert callback buffer to user sample rate + convertBufferWasapi( stream_.deviceBuffer, + convBuffer, + stream_.nDeviceChannels[INPUT], + captureFormat->nSamplesPerSec, + stream_.sampleRate, + ( unsigned int ) ( stream_.bufferSize * captureSrRatio ), + convBufferSize, + stream_.deviceFormat[INPUT] ); + + if ( stream_.doConvertBuffer[INPUT] ) { + // Convert callback buffer to user format + convertBuffer( stream_.userBuffer[INPUT], + stream_.deviceBuffer, + stream_.convertInfo[INPUT] ); + } + else { + // no further conversion, simple copy deviceBuffer to userBuffer + memcpy( stream_.userBuffer[INPUT], + stream_.deviceBuffer, + stream_.bufferSize * stream_.nUserChannels[INPUT] * formatBytes( stream_.userFormat ) ); + } + } + } + else { + // if there is no capture stream, set callbackPulled flag + callbackPulled = true; + } + + // Execute Callback + // ================ + // 1. Execute user callback method + // 2. Handle return value from callback + + // if callback has not requested the stream to stop + if ( callbackPulled && !callbackStopped ) { + // Execute user callback method + callbackResult = callback( stream_.userBuffer[OUTPUT], + stream_.userBuffer[INPUT], + stream_.bufferSize, + getStreamTime(), + captureFlags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY ? RTAUDIO_INPUT_OVERFLOW : 0, + stream_.callbackInfo.userData ); + + // Handle return value from callback + if ( callbackResult == 1 ) { + // instantiate a thread to stop this thread + HANDLE threadHandle = CreateThread( NULL, 0, stopWasapiThread, this, 0, NULL ); + if ( !threadHandle ) { + errorType = RtAudioError::THREAD_ERROR; + errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream stop thread."; + goto Exit; + } + else if ( !CloseHandle( threadHandle ) ) { + errorType = RtAudioError::THREAD_ERROR; + errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream stop thread handle."; + goto Exit; + } + + callbackStopped = true; + } + else if ( callbackResult == 2 ) { + // instantiate a thread to stop this thread + HANDLE threadHandle = CreateThread( NULL, 0, abortWasapiThread, this, 0, NULL ); + if ( !threadHandle ) { + errorType = RtAudioError::THREAD_ERROR; + errorText_ = "RtApiWasapi::wasapiThread: Unable to instantiate stream abort thread."; + goto Exit; + } + else if ( !CloseHandle( threadHandle ) ) { + errorType = RtAudioError::THREAD_ERROR; + errorText_ = "RtApiWasapi::wasapiThread: Unable to close stream abort thread handle."; + goto Exit; + } + + callbackStopped = true; + } + } + } + + // Callback Output + // =============== + // 1. Convert callback buffer to stream format + // 2. Convert callback buffer to stream sample rate and channel count + // 3. Push callback buffer into outputBuffer + + if ( renderAudioClient && callbackPulled ) { + if ( stream_.doConvertBuffer[OUTPUT] ) { + // Convert callback buffer to stream format + convertBuffer( stream_.deviceBuffer, + stream_.userBuffer[OUTPUT], + stream_.convertInfo[OUTPUT] ); + + } + + // Convert callback buffer to stream sample rate + convertBufferWasapi( convBuffer, + stream_.deviceBuffer, + stream_.nDeviceChannels[OUTPUT], + stream_.sampleRate, + renderFormat->nSamplesPerSec, + stream_.bufferSize, + convBufferSize, + stream_.deviceFormat[OUTPUT] ); + + // Push callback buffer into outputBuffer + callbackPushed = renderBuffer.pushBuffer( convBuffer, + convBufferSize * stream_.nDeviceChannels[OUTPUT], + stream_.deviceFormat[OUTPUT] ); + } + else { + // if there is no render stream, set callbackPushed flag + callbackPushed = true; + } + + // Stream Capture + // ============== + // 1. Get capture buffer from stream + // 2. Push capture buffer into inputBuffer + // 3. If 2. was successful: Release capture buffer + + if ( captureAudioClient ) { + // if the callback input buffer was not pulled from captureBuffer, wait for next capture event + if ( !callbackPulled ) { + WaitForSingleObject( captureEvent, INFINITE ); + } + + // Get capture buffer from stream + hr = captureClient->GetBuffer( &streamBuffer, + &bufferFrameCount, + &captureFlags, NULL, NULL ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve capture buffer."; + goto Exit; + } + + if ( bufferFrameCount != 0 ) { + // Push capture buffer into inputBuffer + if ( captureBuffer.pushBuffer( ( char* ) streamBuffer, + bufferFrameCount * stream_.nDeviceChannels[INPUT], + stream_.deviceFormat[INPUT] ) ) + { + // Release capture buffer + hr = captureClient->ReleaseBuffer( bufferFrameCount ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer."; + goto Exit; + } + } + else + { + // Inform WASAPI that capture was unsuccessful + hr = captureClient->ReleaseBuffer( 0 ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer."; + goto Exit; + } + } + } + else + { + // Inform WASAPI that capture was unsuccessful + hr = captureClient->ReleaseBuffer( 0 ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to release capture buffer."; + goto Exit; + } + } + } + + // Stream Render + // ============= + // 1. Get render buffer from stream + // 2. Pull next buffer from outputBuffer + // 3. If 2. was successful: Fill render buffer with next buffer + // Release render buffer + + if ( renderAudioClient ) { + // if the callback output buffer was not pushed to renderBuffer, wait for next render event + if ( callbackPulled && !callbackPushed ) { + WaitForSingleObject( renderEvent, INFINITE ); + } + + // Get render buffer from stream + hr = renderAudioClient->GetBufferSize( &bufferFrameCount ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer size."; + goto Exit; + } + + hr = renderAudioClient->GetCurrentPadding( &numFramesPadding ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer padding."; + goto Exit; + } + + bufferFrameCount -= numFramesPadding; + + if ( bufferFrameCount != 0 ) { + hr = renderClient->GetBuffer( bufferFrameCount, &streamBuffer ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to retrieve render buffer."; + goto Exit; + } + + // Pull next buffer from outputBuffer + // Fill render buffer with next buffer + if ( renderBuffer.pullBuffer( ( char* ) streamBuffer, + bufferFrameCount * stream_.nDeviceChannels[OUTPUT], + stream_.deviceFormat[OUTPUT] ) ) + { + // Release render buffer + hr = renderClient->ReleaseBuffer( bufferFrameCount, 0 ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer."; + goto Exit; + } + } + else + { + // Inform WASAPI that render was unsuccessful + hr = renderClient->ReleaseBuffer( 0, 0 ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer."; + goto Exit; + } + } + } + else + { + // Inform WASAPI that render was unsuccessful + hr = renderClient->ReleaseBuffer( 0, 0 ); + if ( FAILED( hr ) ) { + errorText_ = "RtApiWasapi::wasapiThread: Unable to release render buffer."; + goto Exit; + } + } + } + + // if the callback buffer was pushed renderBuffer reset callbackPulled flag + if ( callbackPushed ) { + callbackPulled = false; + } + + // tick stream time + RtApi::tickStreamTime(); + } + +Exit: + // clean up + CoTaskMemFree( captureFormat ); + CoTaskMemFree( renderFormat ); + + free ( convBuffer ); + + CoUninitialize(); + + // update stream state + stream_.state = STREAM_STOPPED; + + if ( errorText_.empty() ) + return; + else + error( errorType ); +} + +//******************** End of __WINDOWS_WASAPI__ *********************// +#endif + + +#if defined(__WINDOWS_DS__) // Windows DirectSound API + +// Modified by Robin Davies, October 2005 +// - Improvements to DirectX pointer chasing. +// - Bug fix for non-power-of-two Asio granularity used by Edirol PCR-A30. +// - Auto-call CoInitialize for DSOUND and ASIO platforms. +// Various revisions for RtAudio 4.0 by Gary Scavone, April 2007 +// Changed device query structure for RtAudio 4.0.7, January 2010 + +#include <dsound.h> +#include <assert.h> +#include <algorithm> + +#if defined(__MINGW32__) + // missing from latest mingw winapi +#define WAVE_FORMAT_96M08 0x00010000 /* 96 kHz, Mono, 8-bit */ +#define WAVE_FORMAT_96S08 0x00020000 /* 96 kHz, Stereo, 8-bit */ +#define WAVE_FORMAT_96M16 0x00040000 /* 96 kHz, Mono, 16-bit */ +#define WAVE_FORMAT_96S16 0x00080000 /* 96 kHz, Stereo, 16-bit */ +#endif + +#define MINIMUM_DEVICE_BUFFER_SIZE 32768 + +#ifdef _MSC_VER // if Microsoft Visual C++ +#pragma comment( lib, "winmm.lib" ) // then, auto-link winmm.lib. Otherwise, it has to be added manually. +#endif + +static inline DWORD dsPointerBetween( DWORD pointer, DWORD laterPointer, DWORD earlierPointer, DWORD bufferSize ) +{ + if ( pointer > bufferSize ) pointer -= bufferSize; + if ( laterPointer < earlierPointer ) laterPointer += bufferSize; + if ( pointer < earlierPointer ) pointer += bufferSize; + return pointer >= earlierPointer && pointer < laterPointer; +} + +// A structure to hold various information related to the DirectSound +// API implementation. +struct DsHandle { + unsigned int drainCounter; // Tracks callback counts when draining + bool internalDrain; // Indicates if stop is initiated from callback or not. + void *id[2]; + void *buffer[2]; + bool xrun[2]; + UINT bufferPointer[2]; + DWORD dsBufferSize[2]; + DWORD dsPointerLeadTime[2]; // the number of bytes ahead of the safe pointer to lead by. + HANDLE condition; + + DsHandle() + :drainCounter(0), internalDrain(false) { id[0] = 0; id[1] = 0; buffer[0] = 0; buffer[1] = 0; xrun[0] = false; xrun[1] = false; bufferPointer[0] = 0; bufferPointer[1] = 0; } +}; + +// Declarations for utility functions, callbacks, and structures +// specific to the DirectSound implementation. +static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid, + LPCTSTR description, + LPCTSTR module, + LPVOID lpContext ); + +static const char* getErrorString( int code ); + +static unsigned __stdcall callbackHandler( void *ptr ); + +struct DsDevice { + LPGUID id[2]; + bool validId[2]; + bool found; + std::string name; + + DsDevice() + : found(false) { validId[0] = false; validId[1] = false; } +}; + +struct DsProbeData { + bool isInput; + std::vector<struct DsDevice>* dsDevices; +}; + +RtApiDs :: RtApiDs() +{ + // Dsound will run both-threaded. If CoInitialize fails, then just + // accept whatever the mainline chose for a threading model. + coInitialized_ = false; + HRESULT hr = CoInitialize( NULL ); + if ( !FAILED( hr ) ) coInitialized_ = true; +} + +RtApiDs :: ~RtApiDs() +{ + if ( coInitialized_ ) CoUninitialize(); // balanced call. + if ( stream_.state != STREAM_CLOSED ) closeStream(); +} + +// The DirectSound default output is always the first device. +unsigned int RtApiDs :: getDefaultOutputDevice( void ) +{ + return 0; +} + +// The DirectSound default input is always the first input device, +// which is the first capture device enumerated. +unsigned int RtApiDs :: getDefaultInputDevice( void ) +{ + return 0; +} + +unsigned int RtApiDs :: getDeviceCount( void ) +{ + // Set query flag for previously found devices to false, so that we + // can check for any devices that have disappeared. + for ( unsigned int i=0; i<dsDevices.size(); i++ ) + dsDevices[i].found = false; + + // Query DirectSound devices. + struct DsProbeData probeInfo; + probeInfo.isInput = false; + probeInfo.dsDevices = &dsDevices; + HRESULT result = DirectSoundEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating output devices!"; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + } + + // Query DirectSoundCapture devices. + probeInfo.isInput = true; + result = DirectSoundCaptureEnumerate( (LPDSENUMCALLBACK) deviceQueryCallback, &probeInfo ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::getDeviceCount: error (" << getErrorString( result ) << ") enumerating input devices!"; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + } + + // Clean out any devices that may have disappeared (code update submitted by Eli Zehngut). + for ( unsigned int i=0; i<dsDevices.size(); ) { + if ( dsDevices[i].found == false ) dsDevices.erase( dsDevices.begin() + i ); + else i++; + } + + return static_cast<unsigned int>(dsDevices.size()); +} + +RtAudio::DeviceInfo RtApiDs :: getDeviceInfo( unsigned int device ) +{ + RtAudio::DeviceInfo info; + info.probed = false; + + if ( dsDevices.size() == 0 ) { + // Force a query of all devices + getDeviceCount(); + if ( dsDevices.size() == 0 ) { + errorText_ = "RtApiDs::getDeviceInfo: no devices found!"; + error( RtAudioError::INVALID_USE ); + return info; + } + } + + if ( device >= dsDevices.size() ) { + errorText_ = "RtApiDs::getDeviceInfo: device ID is invalid!"; + error( RtAudioError::INVALID_USE ); + return info; + } + + HRESULT result; + if ( dsDevices[ device ].validId[0] == false ) goto probeInput; + + LPDIRECTSOUND output; + DSCAPS outCaps; + result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + goto probeInput; + } + + outCaps.dwSize = sizeof( outCaps ); + result = output->GetCaps( &outCaps ); + if ( FAILED( result ) ) { + output->Release(); + errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting capabilities!"; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + goto probeInput; + } + + // Get output channel information. + info.outputChannels = ( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ? 2 : 1; + + // Get sample rate information. + info.sampleRates.clear(); + for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) { + if ( SAMPLE_RATES[k] >= (unsigned int) outCaps.dwMinSecondarySampleRate && + SAMPLE_RATES[k] <= (unsigned int) outCaps.dwMaxSecondarySampleRate ) { + info.sampleRates.push_back( SAMPLE_RATES[k] ); + + if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) ) + info.preferredSampleRate = SAMPLE_RATES[k]; + } + } + + // Get format information. + if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT ) info.nativeFormats |= RTAUDIO_SINT16; + if ( outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) info.nativeFormats |= RTAUDIO_SINT8; + + output->Release(); + + if ( getDefaultOutputDevice() == device ) + info.isDefaultOutput = true; + + if ( dsDevices[ device ].validId[1] == false ) { + info.name = dsDevices[ device ].name; + info.probed = true; + return info; + } + + probeInput: + + LPDIRECTSOUNDCAPTURE input; + result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + DSCCAPS inCaps; + inCaps.dwSize = sizeof( inCaps ); + result = input->GetCaps( &inCaps ); + if ( FAILED( result ) ) { + input->Release(); + errorStream_ << "RtApiDs::getDeviceInfo: error (" << getErrorString( result ) << ") getting object capabilities (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Get input channel information. + info.inputChannels = inCaps.dwChannels; + + // Get sample rate and format information. + std::vector<unsigned int> rates; + if ( inCaps.dwChannels >= 2 ) { + if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) info.nativeFormats |= RTAUDIO_SINT16; + if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) info.nativeFormats |= RTAUDIO_SINT16; + if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) info.nativeFormats |= RTAUDIO_SINT16; + if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) info.nativeFormats |= RTAUDIO_SINT16; + if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) info.nativeFormats |= RTAUDIO_SINT8; + if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) info.nativeFormats |= RTAUDIO_SINT8; + if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) info.nativeFormats |= RTAUDIO_SINT8; + if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) info.nativeFormats |= RTAUDIO_SINT8; + + if ( info.nativeFormats & RTAUDIO_SINT16 ) { + if ( inCaps.dwFormats & WAVE_FORMAT_1S16 ) rates.push_back( 11025 ); + if ( inCaps.dwFormats & WAVE_FORMAT_2S16 ) rates.push_back( 22050 ); + if ( inCaps.dwFormats & WAVE_FORMAT_4S16 ) rates.push_back( 44100 ); + if ( inCaps.dwFormats & WAVE_FORMAT_96S16 ) rates.push_back( 96000 ); + } + else if ( info.nativeFormats & RTAUDIO_SINT8 ) { + if ( inCaps.dwFormats & WAVE_FORMAT_1S08 ) rates.push_back( 11025 ); + if ( inCaps.dwFormats & WAVE_FORMAT_2S08 ) rates.push_back( 22050 ); + if ( inCaps.dwFormats & WAVE_FORMAT_4S08 ) rates.push_back( 44100 ); + if ( inCaps.dwFormats & WAVE_FORMAT_96S08 ) rates.push_back( 96000 ); + } + } + else if ( inCaps.dwChannels == 1 ) { + if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) info.nativeFormats |= RTAUDIO_SINT16; + if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) info.nativeFormats |= RTAUDIO_SINT16; + if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) info.nativeFormats |= RTAUDIO_SINT16; + if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) info.nativeFormats |= RTAUDIO_SINT16; + if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) info.nativeFormats |= RTAUDIO_SINT8; + if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) info.nativeFormats |= RTAUDIO_SINT8; + if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) info.nativeFormats |= RTAUDIO_SINT8; + if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) info.nativeFormats |= RTAUDIO_SINT8; + + if ( info.nativeFormats & RTAUDIO_SINT16 ) { + if ( inCaps.dwFormats & WAVE_FORMAT_1M16 ) rates.push_back( 11025 ); + if ( inCaps.dwFormats & WAVE_FORMAT_2M16 ) rates.push_back( 22050 ); + if ( inCaps.dwFormats & WAVE_FORMAT_4M16 ) rates.push_back( 44100 ); + if ( inCaps.dwFormats & WAVE_FORMAT_96M16 ) rates.push_back( 96000 ); + } + else if ( info.nativeFormats & RTAUDIO_SINT8 ) { + if ( inCaps.dwFormats & WAVE_FORMAT_1M08 ) rates.push_back( 11025 ); + if ( inCaps.dwFormats & WAVE_FORMAT_2M08 ) rates.push_back( 22050 ); + if ( inCaps.dwFormats & WAVE_FORMAT_4M08 ) rates.push_back( 44100 ); + if ( inCaps.dwFormats & WAVE_FORMAT_96M08 ) rates.push_back( 96000 ); + } + } + else info.inputChannels = 0; // technically, this would be an error + + input->Release(); + + if ( info.inputChannels == 0 ) return info; + + // Copy the supported rates to the info structure but avoid duplication. + bool found; + for ( unsigned int i=0; i<rates.size(); i++ ) { + found = false; + for ( unsigned int j=0; j<info.sampleRates.size(); j++ ) { + if ( rates[i] == info.sampleRates[j] ) { + found = true; + break; + } + } + if ( found == false ) info.sampleRates.push_back( rates[i] ); + } + std::sort( info.sampleRates.begin(), info.sampleRates.end() ); + + // If device opens for both playback and capture, we determine the channels. + if ( info.outputChannels > 0 && info.inputChannels > 0 ) + info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels; + + if ( device == 0 ) info.isDefaultInput = true; + + // Copy name and return. + info.name = dsDevices[ device ].name; + info.probed = true; + return info; +} + +bool RtApiDs :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels, + unsigned int firstChannel, unsigned int sampleRate, + RtAudioFormat format, unsigned int *bufferSize, + RtAudio::StreamOptions *options ) +{ + if ( channels + firstChannel > 2 ) { + errorText_ = "RtApiDs::probeDeviceOpen: DirectSound does not support more than 2 channels per device."; + return FAILURE; + } + + size_t nDevices = dsDevices.size(); + if ( nDevices == 0 ) { + // This should not happen because a check is made before this function is called. + errorText_ = "RtApiDs::probeDeviceOpen: no devices found!"; + return FAILURE; + } + + if ( device >= nDevices ) { + // This should not happen because a check is made before this function is called. + errorText_ = "RtApiDs::probeDeviceOpen: device ID is invalid!"; + return FAILURE; + } + + if ( mode == OUTPUT ) { + if ( dsDevices[ device ].validId[0] == false ) { + errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support output!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + } + else { // mode == INPUT + if ( dsDevices[ device ].validId[1] == false ) { + errorStream_ << "RtApiDs::probeDeviceOpen: device (" << device << ") does not support input!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + } + + // According to a note in PortAudio, using GetDesktopWindow() + // instead of GetForegroundWindow() is supposed to avoid problems + // that occur when the application's window is not the foreground + // window. Also, if the application window closes before the + // DirectSound buffer, DirectSound can crash. In the past, I had + // problems when using GetDesktopWindow() but it seems fine now + // (January 2010). I'll leave it commented here. + // HWND hWnd = GetForegroundWindow(); + HWND hWnd = GetDesktopWindow(); + + // Check the numberOfBuffers parameter and limit the lowest value to + // two. This is a judgement call and a value of two is probably too + // low for capture, but it should work for playback. + int nBuffers = 0; + if ( options ) nBuffers = options->numberOfBuffers; + if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) nBuffers = 2; + if ( nBuffers < 2 ) nBuffers = 3; + + // Check the lower range of the user-specified buffer size and set + // (arbitrarily) to a lower bound of 32. + if ( *bufferSize < 32 ) *bufferSize = 32; + + // Create the wave format structure. The data format setting will + // be determined later. + WAVEFORMATEX waveFormat; + ZeroMemory( &waveFormat, sizeof(WAVEFORMATEX) ); + waveFormat.wFormatTag = WAVE_FORMAT_PCM; + waveFormat.nChannels = channels + firstChannel; + waveFormat.nSamplesPerSec = (unsigned long) sampleRate; + + // Determine the device buffer size. By default, we'll use the value + // defined above (32K), but we will grow it to make allowances for + // very large software buffer sizes. + DWORD dsBufferSize = MINIMUM_DEVICE_BUFFER_SIZE; + DWORD dsPointerLeadTime = 0; + + void *ohandle = 0, *bhandle = 0; + HRESULT result; + if ( mode == OUTPUT ) { + + LPDIRECTSOUND output; + result = DirectSoundCreate( dsDevices[ device ].id[0], &output, NULL ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening output device (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + DSCAPS outCaps; + outCaps.dwSize = sizeof( outCaps ); + result = output->GetCaps( &outCaps ); + if ( FAILED( result ) ) { + output->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting capabilities (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Check channel information. + if ( channels + firstChannel == 2 && !( outCaps.dwFlags & DSCAPS_PRIMARYSTEREO ) ) { + errorStream_ << "RtApiDs::getDeviceInfo: the output device (" << dsDevices[ device ].name << ") does not support stereo playback."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Check format information. Use 16-bit format unless not + // supported or user requests 8-bit. + if ( outCaps.dwFlags & DSCAPS_PRIMARY16BIT && + !( format == RTAUDIO_SINT8 && outCaps.dwFlags & DSCAPS_PRIMARY8BIT ) ) { + waveFormat.wBitsPerSample = 16; + stream_.deviceFormat[mode] = RTAUDIO_SINT16; + } + else { + waveFormat.wBitsPerSample = 8; + stream_.deviceFormat[mode] = RTAUDIO_SINT8; + } + stream_.userFormat = format; + + // Update wave format structure and buffer information. + waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8; + waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign; + dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels; + + // If the user wants an even bigger buffer, increase the device buffer size accordingly. + while ( dsPointerLeadTime * 2U > dsBufferSize ) + dsBufferSize *= 2; + + // Set cooperative level to DSSCL_EXCLUSIVE ... sound stops when window focus changes. + // result = output->SetCooperativeLevel( hWnd, DSSCL_EXCLUSIVE ); + // Set cooperative level to DSSCL_PRIORITY ... sound remains when window focus changes. + result = output->SetCooperativeLevel( hWnd, DSSCL_PRIORITY ); + if ( FAILED( result ) ) { + output->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting cooperative level (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Even though we will write to the secondary buffer, we need to + // access the primary buffer to set the correct output format + // (since the default is 8-bit, 22 kHz!). Setup the DS primary + // buffer description. + DSBUFFERDESC bufferDescription; + ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) ); + bufferDescription.dwSize = sizeof( DSBUFFERDESC ); + bufferDescription.dwFlags = DSBCAPS_PRIMARYBUFFER; + + // Obtain the primary buffer + LPDIRECTSOUNDBUFFER buffer; + result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL ); + if ( FAILED( result ) ) { + output->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") accessing primary buffer (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Set the primary DS buffer sound format. + result = buffer->SetFormat( &waveFormat ); + if ( FAILED( result ) ) { + output->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") setting primary buffer format (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Setup the secondary DS buffer description. + ZeroMemory( &bufferDescription, sizeof( DSBUFFERDESC ) ); + bufferDescription.dwSize = sizeof( DSBUFFERDESC ); + bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS | + DSBCAPS_GLOBALFOCUS | + DSBCAPS_GETCURRENTPOSITION2 | + DSBCAPS_LOCHARDWARE ); // Force hardware mixing + bufferDescription.dwBufferBytes = dsBufferSize; + bufferDescription.lpwfxFormat = &waveFormat; + + // Try to create the secondary DS buffer. If that doesn't work, + // try to use software mixing. Otherwise, there's a problem. + result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL ); + if ( FAILED( result ) ) { + bufferDescription.dwFlags = ( DSBCAPS_STICKYFOCUS | + DSBCAPS_GLOBALFOCUS | + DSBCAPS_GETCURRENTPOSITION2 | + DSBCAPS_LOCSOFTWARE ); // Force software mixing + result = output->CreateSoundBuffer( &bufferDescription, &buffer, NULL ); + if ( FAILED( result ) ) { + output->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating secondary buffer (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + } + + // Get the buffer size ... might be different from what we specified. + DSBCAPS dsbcaps; + dsbcaps.dwSize = sizeof( DSBCAPS ); + result = buffer->GetCaps( &dsbcaps ); + if ( FAILED( result ) ) { + output->Release(); + buffer->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + dsBufferSize = dsbcaps.dwBufferBytes; + + // Lock the DS buffer + LPVOID audioPtr; + DWORD dataLen; + result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 ); + if ( FAILED( result ) ) { + output->Release(); + buffer->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking buffer (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Zero the DS buffer + ZeroMemory( audioPtr, dataLen ); + + // Unlock the DS buffer + result = buffer->Unlock( audioPtr, dataLen, NULL, 0 ); + if ( FAILED( result ) ) { + output->Release(); + buffer->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking buffer (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + ohandle = (void *) output; + bhandle = (void *) buffer; + } + + if ( mode == INPUT ) { + + LPDIRECTSOUNDCAPTURE input; + result = DirectSoundCaptureCreate( dsDevices[ device ].id[1], &input, NULL ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") opening input device (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + DSCCAPS inCaps; + inCaps.dwSize = sizeof( inCaps ); + result = input->GetCaps( &inCaps ); + if ( FAILED( result ) ) { + input->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting input capabilities (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Check channel information. + if ( inCaps.dwChannels < channels + firstChannel ) { + errorText_ = "RtApiDs::getDeviceInfo: the input device does not support requested input channels."; + return FAILURE; + } + + // Check format information. Use 16-bit format unless user + // requests 8-bit. + DWORD deviceFormats; + if ( channels + firstChannel == 2 ) { + deviceFormats = WAVE_FORMAT_1S08 | WAVE_FORMAT_2S08 | WAVE_FORMAT_4S08 | WAVE_FORMAT_96S08; + if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) { + waveFormat.wBitsPerSample = 8; + stream_.deviceFormat[mode] = RTAUDIO_SINT8; + } + else { // assume 16-bit is supported + waveFormat.wBitsPerSample = 16; + stream_.deviceFormat[mode] = RTAUDIO_SINT16; + } + } + else { // channel == 1 + deviceFormats = WAVE_FORMAT_1M08 | WAVE_FORMAT_2M08 | WAVE_FORMAT_4M08 | WAVE_FORMAT_96M08; + if ( format == RTAUDIO_SINT8 && inCaps.dwFormats & deviceFormats ) { + waveFormat.wBitsPerSample = 8; + stream_.deviceFormat[mode] = RTAUDIO_SINT8; + } + else { // assume 16-bit is supported + waveFormat.wBitsPerSample = 16; + stream_.deviceFormat[mode] = RTAUDIO_SINT16; + } + } + stream_.userFormat = format; + + // Update wave format structure and buffer information. + waveFormat.nBlockAlign = waveFormat.nChannels * waveFormat.wBitsPerSample / 8; + waveFormat.nAvgBytesPerSec = waveFormat.nSamplesPerSec * waveFormat.nBlockAlign; + dsPointerLeadTime = nBuffers * (*bufferSize) * (waveFormat.wBitsPerSample / 8) * channels; + + // If the user wants an even bigger buffer, increase the device buffer size accordingly. + while ( dsPointerLeadTime * 2U > dsBufferSize ) + dsBufferSize *= 2; + + // Setup the secondary DS buffer description. + DSCBUFFERDESC bufferDescription; + ZeroMemory( &bufferDescription, sizeof( DSCBUFFERDESC ) ); + bufferDescription.dwSize = sizeof( DSCBUFFERDESC ); + bufferDescription.dwFlags = 0; + bufferDescription.dwReserved = 0; + bufferDescription.dwBufferBytes = dsBufferSize; + bufferDescription.lpwfxFormat = &waveFormat; + + // Create the capture buffer. + LPDIRECTSOUNDCAPTUREBUFFER buffer; + result = input->CreateCaptureBuffer( &bufferDescription, &buffer, NULL ); + if ( FAILED( result ) ) { + input->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") creating input buffer (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Get the buffer size ... might be different from what we specified. + DSCBCAPS dscbcaps; + dscbcaps.dwSize = sizeof( DSCBCAPS ); + result = buffer->GetCaps( &dscbcaps ); + if ( FAILED( result ) ) { + input->Release(); + buffer->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") getting buffer settings (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + dsBufferSize = dscbcaps.dwBufferBytes; + + // NOTE: We could have a problem here if this is a duplex stream + // and the play and capture hardware buffer sizes are different + // (I'm actually not sure if that is a problem or not). + // Currently, we are not verifying that. + + // Lock the capture buffer + LPVOID audioPtr; + DWORD dataLen; + result = buffer->Lock( 0, dsBufferSize, &audioPtr, &dataLen, NULL, NULL, 0 ); + if ( FAILED( result ) ) { + input->Release(); + buffer->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") locking input buffer (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Zero the buffer + ZeroMemory( audioPtr, dataLen ); + + // Unlock the buffer + result = buffer->Unlock( audioPtr, dataLen, NULL, 0 ); + if ( FAILED( result ) ) { + input->Release(); + buffer->Release(); + errorStream_ << "RtApiDs::probeDeviceOpen: error (" << getErrorString( result ) << ") unlocking input buffer (" << dsDevices[ device ].name << ")!"; + errorText_ = errorStream_.str(); + return FAILURE; + } + + ohandle = (void *) input; + bhandle = (void *) buffer; + } + + // Set various stream parameters + DsHandle *handle = 0; + stream_.nDeviceChannels[mode] = channels + firstChannel; + stream_.nUserChannels[mode] = channels; + stream_.bufferSize = *bufferSize; + stream_.channelOffset[mode] = firstChannel; + stream_.deviceInterleaved[mode] = true; + if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false; + else stream_.userInterleaved = true; + + // Set flag for buffer conversion + stream_.doConvertBuffer[mode] = false; + if (stream_.nUserChannels[mode] != stream_.nDeviceChannels[mode]) + stream_.doConvertBuffer[mode] = true; + if (stream_.userFormat != stream_.deviceFormat[mode]) + stream_.doConvertBuffer[mode] = true; + if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] && + stream_.nUserChannels[mode] > 1 ) + stream_.doConvertBuffer[mode] = true; + + // Allocate necessary internal buffers + long bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat ); + stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 ); + if ( stream_.userBuffer[mode] == NULL ) { + errorText_ = "RtApiDs::probeDeviceOpen: error allocating user buffer memory."; + goto error; + } + + if ( stream_.doConvertBuffer[mode] ) { + + bool makeBuffer = true; + bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] ); + if ( mode == INPUT ) { + if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) { + unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] ); + if ( bufferBytes <= (long) bytesOut ) makeBuffer = false; + } + } + + if ( makeBuffer ) { + bufferBytes *= *bufferSize; + if ( stream_.deviceBuffer ) free( stream_.deviceBuffer ); + stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 ); + if ( stream_.deviceBuffer == NULL ) { + errorText_ = "RtApiDs::probeDeviceOpen: error allocating device buffer memory."; + goto error; + } + } + } + + // Allocate our DsHandle structures for the stream. + if ( stream_.apiHandle == 0 ) { + try { + handle = new DsHandle; + } + catch ( std::bad_alloc& ) { + errorText_ = "RtApiDs::probeDeviceOpen: error allocating AsioHandle memory."; + goto error; + } + + // Create a manual-reset event. + handle->condition = CreateEvent( NULL, // no security + TRUE, // manual-reset + FALSE, // non-signaled initially + NULL ); // unnamed + stream_.apiHandle = (void *) handle; + } + else + handle = (DsHandle *) stream_.apiHandle; + handle->id[mode] = ohandle; + handle->buffer[mode] = bhandle; + handle->dsBufferSize[mode] = dsBufferSize; + handle->dsPointerLeadTime[mode] = dsPointerLeadTime; + + stream_.device[mode] = device; + stream_.state = STREAM_STOPPED; + if ( stream_.mode == OUTPUT && mode == INPUT ) + // We had already set up an output stream. + stream_.mode = DUPLEX; + else + stream_.mode = mode; + stream_.nBuffers = nBuffers; + stream_.sampleRate = sampleRate; + + // Setup the buffer conversion information structure. + if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel ); + + // Setup the callback thread. + if ( stream_.callbackInfo.isRunning == false ) { + unsigned threadId; + stream_.callbackInfo.isRunning = true; + stream_.callbackInfo.object = (void *) this; + stream_.callbackInfo.thread = _beginthreadex( NULL, 0, &callbackHandler, + &stream_.callbackInfo, 0, &threadId ); + if ( stream_.callbackInfo.thread == 0 ) { + errorText_ = "RtApiDs::probeDeviceOpen: error creating callback thread!"; + goto error; + } + + // Boost DS thread priority + SetThreadPriority( (HANDLE) stream_.callbackInfo.thread, THREAD_PRIORITY_HIGHEST ); + } + return SUCCESS; + + error: + if ( handle ) { + if ( handle->buffer[0] ) { // the object pointer can be NULL and valid + LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0]; + LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0]; + if ( buffer ) buffer->Release(); + object->Release(); + } + if ( handle->buffer[1] ) { + LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1]; + LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1]; + if ( buffer ) buffer->Release(); + object->Release(); + } + CloseHandle( handle->condition ); + delete handle; + stream_.apiHandle = 0; + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + stream_.state = STREAM_CLOSED; + return FAILURE; +} + +void RtApiDs :: closeStream() +{ + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiDs::closeStream(): no open stream to close!"; + error( RtAudioError::WARNING ); + return; + } + + // Stop the callback thread. + stream_.callbackInfo.isRunning = false; + WaitForSingleObject( (HANDLE) stream_.callbackInfo.thread, INFINITE ); + CloseHandle( (HANDLE) stream_.callbackInfo.thread ); + + DsHandle *handle = (DsHandle *) stream_.apiHandle; + if ( handle ) { + if ( handle->buffer[0] ) { // the object pointer can be NULL and valid + LPDIRECTSOUND object = (LPDIRECTSOUND) handle->id[0]; + LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0]; + if ( buffer ) { + buffer->Stop(); + buffer->Release(); + } + object->Release(); + } + if ( handle->buffer[1] ) { + LPDIRECTSOUNDCAPTURE object = (LPDIRECTSOUNDCAPTURE) handle->id[1]; + LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1]; + if ( buffer ) { + buffer->Stop(); + buffer->Release(); + } + object->Release(); + } + CloseHandle( handle->condition ); + delete handle; + stream_.apiHandle = 0; + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + stream_.mode = UNINITIALIZED; + stream_.state = STREAM_CLOSED; +} + +void RtApiDs :: startStream() +{ + verifyStream(); + if ( stream_.state == STREAM_RUNNING ) { + errorText_ = "RtApiDs::startStream(): the stream is already running!"; + error( RtAudioError::WARNING ); + return; + } + + DsHandle *handle = (DsHandle *) stream_.apiHandle; + + // Increase scheduler frequency on lesser windows (a side-effect of + // increasing timer accuracy). On greater windows (Win2K or later), + // this is already in effect. + timeBeginPeriod( 1 ); + + buffersRolling = false; + duplexPrerollBytes = 0; + + if ( stream_.mode == DUPLEX ) { + // 0.5 seconds of silence in DUPLEX mode while the devices spin up and synchronize. + duplexPrerollBytes = (int) ( 0.5 * stream_.sampleRate * formatBytes( stream_.deviceFormat[1] ) * stream_.nDeviceChannels[1] ); + } + + HRESULT result = 0; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + + LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0]; + result = buffer->Play( 0, 0, DSBPLAY_LOOPING ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting output buffer!"; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) { + + LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1]; + result = buffer->Start( DSCBSTART_LOOPING ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::startStream: error (" << getErrorString( result ) << ") starting input buffer!"; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + handle->drainCounter = 0; + handle->internalDrain = false; + ResetEvent( handle->condition ); + stream_.state = STREAM_RUNNING; + + unlock: + if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiDs :: stopStream() +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiDs::stopStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + HRESULT result = 0; + LPVOID audioPtr; + DWORD dataLen; + DsHandle *handle = (DsHandle *) stream_.apiHandle; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + if ( handle->drainCounter == 0 ) { + handle->drainCounter = 2; + WaitForSingleObject( handle->condition, INFINITE ); // block until signaled + } + + stream_.state = STREAM_STOPPED; + + MUTEX_LOCK( &stream_.mutex ); + + // Stop the buffer and clear memory + LPDIRECTSOUNDBUFFER buffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0]; + result = buffer->Stop(); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping output buffer!"; + errorText_ = errorStream_.str(); + goto unlock; + } + + // Lock the buffer and clear it so that if we start to play again, + // we won't have old data playing. + result = buffer->Lock( 0, handle->dsBufferSize[0], &audioPtr, &dataLen, NULL, NULL, 0 ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking output buffer!"; + errorText_ = errorStream_.str(); + goto unlock; + } + + // Zero the DS buffer + ZeroMemory( audioPtr, dataLen ); + + // Unlock the DS buffer + result = buffer->Unlock( audioPtr, dataLen, NULL, 0 ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking output buffer!"; + errorText_ = errorStream_.str(); + goto unlock; + } + + // If we start playing again, we must begin at beginning of buffer. + handle->bufferPointer[0] = 0; + } + + if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) { + LPDIRECTSOUNDCAPTUREBUFFER buffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1]; + audioPtr = NULL; + dataLen = 0; + + stream_.state = STREAM_STOPPED; + + if ( stream_.mode != DUPLEX ) + MUTEX_LOCK( &stream_.mutex ); + + result = buffer->Stop(); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") stopping input buffer!"; + errorText_ = errorStream_.str(); + goto unlock; + } + + // Lock the buffer and clear it so that if we start to play again, + // we won't have old data playing. + result = buffer->Lock( 0, handle->dsBufferSize[1], &audioPtr, &dataLen, NULL, NULL, 0 ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") locking input buffer!"; + errorText_ = errorStream_.str(); + goto unlock; + } + + // Zero the DS buffer + ZeroMemory( audioPtr, dataLen ); + + // Unlock the DS buffer + result = buffer->Unlock( audioPtr, dataLen, NULL, 0 ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::stopStream: error (" << getErrorString( result ) << ") unlocking input buffer!"; + errorText_ = errorStream_.str(); + goto unlock; + } + + // If we start recording again, we must begin at beginning of buffer. + handle->bufferPointer[1] = 0; + } + + unlock: + timeEndPeriod( 1 ); // revert to normal scheduler frequency on lesser windows. + MUTEX_UNLOCK( &stream_.mutex ); + + if ( FAILED( result ) ) error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiDs :: abortStream() +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiDs::abortStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + DsHandle *handle = (DsHandle *) stream_.apiHandle; + handle->drainCounter = 2; + + stopStream(); +} + +void RtApiDs :: callbackEvent() +{ + if ( stream_.state == STREAM_STOPPED || stream_.state == STREAM_STOPPING ) { + Sleep( 50 ); // sleep 50 milliseconds + return; + } + + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiDs::callbackEvent(): the stream is closed ... this shouldn't happen!"; + error( RtAudioError::WARNING ); + return; + } + + CallbackInfo *info = (CallbackInfo *) &stream_.callbackInfo; + DsHandle *handle = (DsHandle *) stream_.apiHandle; + + // Check if we were draining the stream and signal is finished. + if ( handle->drainCounter > stream_.nBuffers + 2 ) { + + stream_.state = STREAM_STOPPING; + if ( handle->internalDrain == false ) + SetEvent( handle->condition ); + else + stopStream(); + return; + } + + // Invoke user callback to get fresh output data UNLESS we are + // draining stream. + if ( handle->drainCounter == 0 ) { + RtAudioCallback callback = (RtAudioCallback) info->callback; + double streamTime = getStreamTime(); + RtAudioStreamStatus status = 0; + if ( stream_.mode != INPUT && handle->xrun[0] == true ) { + status |= RTAUDIO_OUTPUT_UNDERFLOW; + handle->xrun[0] = false; + } + if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) { + status |= RTAUDIO_INPUT_OVERFLOW; + handle->xrun[1] = false; + } + int cbReturnValue = callback( stream_.userBuffer[0], stream_.userBuffer[1], + stream_.bufferSize, streamTime, status, info->userData ); + if ( cbReturnValue == 2 ) { + stream_.state = STREAM_STOPPING; + handle->drainCounter = 2; + abortStream(); + return; + } + else if ( cbReturnValue == 1 ) { + handle->drainCounter = 1; + handle->internalDrain = true; + } + } + + HRESULT result; + DWORD currentWritePointer, safeWritePointer; + DWORD currentReadPointer, safeReadPointer; + UINT nextWritePointer; + + LPVOID buffer1 = NULL; + LPVOID buffer2 = NULL; + DWORD bufferSize1 = 0; + DWORD bufferSize2 = 0; + + char *buffer; + long bufferBytes; + + MUTEX_LOCK( &stream_.mutex ); + if ( stream_.state == STREAM_STOPPED ) { + MUTEX_UNLOCK( &stream_.mutex ); + return; + } + + if ( buffersRolling == false ) { + if ( stream_.mode == DUPLEX ) { + //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] ); + + // It takes a while for the devices to get rolling. As a result, + // there's no guarantee that the capture and write device pointers + // will move in lockstep. Wait here for both devices to start + // rolling, and then set our buffer pointers accordingly. + // e.g. Crystal Drivers: the capture buffer starts up 5700 to 9600 + // bytes later than the write buffer. + + // Stub: a serious risk of having a pre-emptive scheduling round + // take place between the two GetCurrentPosition calls... but I'm + // really not sure how to solve the problem. Temporarily boost to + // Realtime priority, maybe; but I'm not sure what priority the + // DirectSound service threads run at. We *should* be roughly + // within a ms or so of correct. + + LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0]; + LPDIRECTSOUNDCAPTUREBUFFER dsCaptureBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1]; + + DWORD startSafeWritePointer, startSafeReadPointer; + + result = dsWriteBuffer->GetCurrentPosition( NULL, &startSafeWritePointer ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + result = dsCaptureBuffer->GetCurrentPosition( NULL, &startSafeReadPointer ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + while ( true ) { + result = dsWriteBuffer->GetCurrentPosition( NULL, &safeWritePointer ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + result = dsCaptureBuffer->GetCurrentPosition( NULL, &safeReadPointer ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + if ( safeWritePointer != startSafeWritePointer && safeReadPointer != startSafeReadPointer ) break; + Sleep( 1 ); + } + + //assert( handle->dsBufferSize[0] == handle->dsBufferSize[1] ); + + handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0]; + if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0]; + handle->bufferPointer[1] = safeReadPointer; + } + else if ( stream_.mode == OUTPUT ) { + + // Set the proper nextWritePosition after initial startup. + LPDIRECTSOUNDBUFFER dsWriteBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0]; + result = dsWriteBuffer->GetCurrentPosition( ¤tWritePointer, &safeWritePointer ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + handle->bufferPointer[0] = safeWritePointer + handle->dsPointerLeadTime[0]; + if ( handle->bufferPointer[0] >= handle->dsBufferSize[0] ) handle->bufferPointer[0] -= handle->dsBufferSize[0]; + } + + buffersRolling = true; + } + + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + + LPDIRECTSOUNDBUFFER dsBuffer = (LPDIRECTSOUNDBUFFER) handle->buffer[0]; + + if ( handle->drainCounter > 1 ) { // write zeros to the output stream + bufferBytes = stream_.bufferSize * stream_.nUserChannels[0]; + bufferBytes *= formatBytes( stream_.userFormat ); + memset( stream_.userBuffer[0], 0, bufferBytes ); + } + + // Setup parameters and do buffer conversion if necessary. + if ( stream_.doConvertBuffer[0] ) { + buffer = stream_.deviceBuffer; + convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] ); + bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[0]; + bufferBytes *= formatBytes( stream_.deviceFormat[0] ); + } + else { + buffer = stream_.userBuffer[0]; + bufferBytes = stream_.bufferSize * stream_.nUserChannels[0]; + bufferBytes *= formatBytes( stream_.userFormat ); + } + + // No byte swapping necessary in DirectSound implementation. + + // Ahhh ... windoze. 16-bit data is signed but 8-bit data is + // unsigned. So, we need to convert our signed 8-bit data here to + // unsigned. + if ( stream_.deviceFormat[0] == RTAUDIO_SINT8 ) + for ( int i=0; i<bufferBytes; i++ ) buffer[i] = (unsigned char) ( buffer[i] + 128 ); + + DWORD dsBufferSize = handle->dsBufferSize[0]; + nextWritePointer = handle->bufferPointer[0]; + + DWORD endWrite, leadPointer; + while ( true ) { + // Find out where the read and "safe write" pointers are. + result = dsBuffer->GetCurrentPosition( ¤tWritePointer, &safeWritePointer ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current write position!"; + errorText_ = errorStream_.str(); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + + // We will copy our output buffer into the region between + // safeWritePointer and leadPointer. If leadPointer is not + // beyond the next endWrite position, wait until it is. + leadPointer = safeWritePointer + handle->dsPointerLeadTime[0]; + //std::cout << "safeWritePointer = " << safeWritePointer << ", leadPointer = " << leadPointer << ", nextWritePointer = " << nextWritePointer << std::endl; + if ( leadPointer > dsBufferSize ) leadPointer -= dsBufferSize; + if ( leadPointer < nextWritePointer ) leadPointer += dsBufferSize; // unwrap offset + endWrite = nextWritePointer + bufferBytes; + + // Check whether the entire write region is behind the play pointer. + if ( leadPointer >= endWrite ) break; + + // If we are here, then we must wait until the leadPointer advances + // beyond the end of our next write region. We use the + // Sleep() function to suspend operation until that happens. + double millis = ( endWrite - leadPointer ) * 1000.0; + millis /= ( formatBytes( stream_.deviceFormat[0]) * stream_.nDeviceChannels[0] * stream_.sampleRate); + if ( millis < 1.0 ) millis = 1.0; + Sleep( (DWORD) millis ); + } + + if ( dsPointerBetween( nextWritePointer, safeWritePointer, currentWritePointer, dsBufferSize ) + || dsPointerBetween( endWrite, safeWritePointer, currentWritePointer, dsBufferSize ) ) { + // We've strayed into the forbidden zone ... resync the read pointer. + handle->xrun[0] = true; + nextWritePointer = safeWritePointer + handle->dsPointerLeadTime[0] - bufferBytes; + if ( nextWritePointer >= dsBufferSize ) nextWritePointer -= dsBufferSize; + handle->bufferPointer[0] = nextWritePointer; + endWrite = nextWritePointer + bufferBytes; + } + + // Lock free space in the buffer + result = dsBuffer->Lock( nextWritePointer, bufferBytes, &buffer1, + &bufferSize1, &buffer2, &bufferSize2, 0 ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking buffer during playback!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + + // Copy our buffer into the DS buffer + CopyMemory( buffer1, buffer, bufferSize1 ); + if ( buffer2 != NULL ) CopyMemory( buffer2, buffer+bufferSize1, bufferSize2 ); + + // Update our buffer offset and unlock sound buffer + dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking buffer during playback!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + nextWritePointer = ( nextWritePointer + bufferSize1 + bufferSize2 ) % dsBufferSize; + handle->bufferPointer[0] = nextWritePointer; + } + + // Don't bother draining input + if ( handle->drainCounter ) { + handle->drainCounter++; + goto unlock; + } + + if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) { + + // Setup parameters. + if ( stream_.doConvertBuffer[1] ) { + buffer = stream_.deviceBuffer; + bufferBytes = stream_.bufferSize * stream_.nDeviceChannels[1]; + bufferBytes *= formatBytes( stream_.deviceFormat[1] ); + } + else { + buffer = stream_.userBuffer[1]; + bufferBytes = stream_.bufferSize * stream_.nUserChannels[1]; + bufferBytes *= formatBytes( stream_.userFormat ); + } + + LPDIRECTSOUNDCAPTUREBUFFER dsBuffer = (LPDIRECTSOUNDCAPTUREBUFFER) handle->buffer[1]; + long nextReadPointer = handle->bufferPointer[1]; + DWORD dsBufferSize = handle->dsBufferSize[1]; + + // Find out where the write and "safe read" pointers are. + result = dsBuffer->GetCurrentPosition( ¤tReadPointer, &safeReadPointer ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + + if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset + DWORD endRead = nextReadPointer + bufferBytes; + + // Handling depends on whether we are INPUT or DUPLEX. + // If we're in INPUT mode then waiting is a good thing. If we're in DUPLEX mode, + // then a wait here will drag the write pointers into the forbidden zone. + // + // In DUPLEX mode, rather than wait, we will back off the read pointer until + // it's in a safe position. This causes dropouts, but it seems to be the only + // practical way to sync up the read and write pointers reliably, given the + // the very complex relationship between phase and increment of the read and write + // pointers. + // + // In order to minimize audible dropouts in DUPLEX mode, we will + // provide a pre-roll period of 0.5 seconds in which we return + // zeros from the read buffer while the pointers sync up. + + if ( stream_.mode == DUPLEX ) { + if ( safeReadPointer < endRead ) { + if ( duplexPrerollBytes <= 0 ) { + // Pre-roll time over. Be more agressive. + int adjustment = endRead-safeReadPointer; + + handle->xrun[1] = true; + // Two cases: + // - large adjustments: we've probably run out of CPU cycles, so just resync exactly, + // and perform fine adjustments later. + // - small adjustments: back off by twice as much. + if ( adjustment >= 2*bufferBytes ) + nextReadPointer = safeReadPointer-2*bufferBytes; + else + nextReadPointer = safeReadPointer-bufferBytes-adjustment; + + if ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize; + + } + else { + // In pre=roll time. Just do it. + nextReadPointer = safeReadPointer - bufferBytes; + while ( nextReadPointer < 0 ) nextReadPointer += dsBufferSize; + } + endRead = nextReadPointer + bufferBytes; + } + } + else { // mode == INPUT + while ( safeReadPointer < endRead && stream_.callbackInfo.isRunning ) { + // See comments for playback. + double millis = (endRead - safeReadPointer) * 1000.0; + millis /= ( formatBytes(stream_.deviceFormat[1]) * stream_.nDeviceChannels[1] * stream_.sampleRate); + if ( millis < 1.0 ) millis = 1.0; + Sleep( (DWORD) millis ); + + // Wake up and find out where we are now. + result = dsBuffer->GetCurrentPosition( ¤tReadPointer, &safeReadPointer ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") getting current read position!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + + if ( safeReadPointer < (DWORD)nextReadPointer ) safeReadPointer += dsBufferSize; // unwrap offset + } + } + + // Lock free space in the buffer + result = dsBuffer->Lock( nextReadPointer, bufferBytes, &buffer1, + &bufferSize1, &buffer2, &bufferSize2, 0 ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") locking capture buffer!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + + if ( duplexPrerollBytes <= 0 ) { + // Copy our buffer into the DS buffer + CopyMemory( buffer, buffer1, bufferSize1 ); + if ( buffer2 != NULL ) CopyMemory( buffer+bufferSize1, buffer2, bufferSize2 ); + } + else { + memset( buffer, 0, bufferSize1 ); + if ( buffer2 != NULL ) memset( buffer + bufferSize1, 0, bufferSize2 ); + duplexPrerollBytes -= bufferSize1 + bufferSize2; + } + + // Update our buffer offset and unlock sound buffer + nextReadPointer = ( nextReadPointer + bufferSize1 + bufferSize2 ) % dsBufferSize; + dsBuffer->Unlock( buffer1, bufferSize1, buffer2, bufferSize2 ); + if ( FAILED( result ) ) { + errorStream_ << "RtApiDs::callbackEvent: error (" << getErrorString( result ) << ") unlocking capture buffer!"; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + handle->bufferPointer[1] = nextReadPointer; + + // No byte swapping necessary in DirectSound implementation. + + // If necessary, convert 8-bit data from unsigned to signed. + if ( stream_.deviceFormat[1] == RTAUDIO_SINT8 ) + for ( int j=0; j<bufferBytes; j++ ) buffer[j] = (signed char) ( buffer[j] - 128 ); + + // Do buffer conversion if necessary. + if ( stream_.doConvertBuffer[1] ) + convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] ); + } + + unlock: + MUTEX_UNLOCK( &stream_.mutex ); + RtApi::tickStreamTime(); +} + +// Definitions for utility functions and callbacks +// specific to the DirectSound implementation. + +static unsigned __stdcall callbackHandler( void *ptr ) +{ + CallbackInfo *info = (CallbackInfo *) ptr; + RtApiDs *object = (RtApiDs *) info->object; + bool* isRunning = &info->isRunning; + + while ( *isRunning == true ) { + object->callbackEvent(); + } + + _endthreadex( 0 ); + return 0; +} + +static BOOL CALLBACK deviceQueryCallback( LPGUID lpguid, + LPCTSTR description, + LPCTSTR /*module*/, + LPVOID lpContext ) +{ + struct DsProbeData& probeInfo = *(struct DsProbeData*) lpContext; + std::vector<struct DsDevice>& dsDevices = *probeInfo.dsDevices; + + HRESULT hr; + bool validDevice = false; + if ( probeInfo.isInput == true ) { + DSCCAPS caps; + LPDIRECTSOUNDCAPTURE object; + + hr = DirectSoundCaptureCreate( lpguid, &object, NULL ); + if ( hr != DS_OK ) return TRUE; + + caps.dwSize = sizeof(caps); + hr = object->GetCaps( &caps ); + if ( hr == DS_OK ) { + if ( caps.dwChannels > 0 && caps.dwFormats > 0 ) + validDevice = true; + } + object->Release(); + } + else { + DSCAPS caps; + LPDIRECTSOUND object; + hr = DirectSoundCreate( lpguid, &object, NULL ); + if ( hr != DS_OK ) return TRUE; + + caps.dwSize = sizeof(caps); + hr = object->GetCaps( &caps ); + if ( hr == DS_OK ) { + if ( caps.dwFlags & DSCAPS_PRIMARYMONO || caps.dwFlags & DSCAPS_PRIMARYSTEREO ) + validDevice = true; + } + object->Release(); + } + + // If good device, then save its name and guid. + std::string name = convertCharPointerToStdString( description ); + //if ( name == "Primary Sound Driver" || name == "Primary Sound Capture Driver" ) + if ( lpguid == NULL ) + name = "Default Device"; + if ( validDevice ) { + for ( unsigned int i=0; i<dsDevices.size(); i++ ) { + if ( dsDevices[i].name == name ) { + dsDevices[i].found = true; + if ( probeInfo.isInput ) { + dsDevices[i].id[1] = lpguid; + dsDevices[i].validId[1] = true; + } + else { + dsDevices[i].id[0] = lpguid; + dsDevices[i].validId[0] = true; + } + return TRUE; + } + } + + DsDevice device; + device.name = name; + device.found = true; + if ( probeInfo.isInput ) { + device.id[1] = lpguid; + device.validId[1] = true; + } + else { + device.id[0] = lpguid; + device.validId[0] = true; + } + dsDevices.push_back( device ); + } + + return TRUE; +} + +static const char* getErrorString( int code ) +{ + switch ( code ) { + + case DSERR_ALLOCATED: + return "Already allocated"; + + case DSERR_CONTROLUNAVAIL: + return "Control unavailable"; + + case DSERR_INVALIDPARAM: + return "Invalid parameter"; + + case DSERR_INVALIDCALL: + return "Invalid call"; + + case DSERR_GENERIC: + return "Generic error"; + + case DSERR_PRIOLEVELNEEDED: + return "Priority level needed"; + + case DSERR_OUTOFMEMORY: + return "Out of memory"; + + case DSERR_BADFORMAT: + return "The sample rate or the channel format is not supported"; + + case DSERR_UNSUPPORTED: + return "Not supported"; + + case DSERR_NODRIVER: + return "No driver"; + + case DSERR_ALREADYINITIALIZED: + return "Already initialized"; + + case DSERR_NOAGGREGATION: + return "No aggregation"; + + case DSERR_BUFFERLOST: + return "Buffer lost"; + + case DSERR_OTHERAPPHASPRIO: + return "Another application already has priority"; + + case DSERR_UNINITIALIZED: + return "Uninitialized"; + + default: + return "DirectSound unknown error"; + } +} +//******************** End of __WINDOWS_DS__ *********************// +#endif + + +#if defined(__LINUX_ALSA__) + +#include <alsa/asoundlib.h> +#include <unistd.h> + + // A structure to hold various information related to the ALSA API + // implementation. +struct AlsaHandle { + snd_pcm_t *handles[2]; + bool synchronized; + bool xrun[2]; + pthread_cond_t runnable_cv; + bool runnable; + + AlsaHandle() + :synchronized(false), runnable(false) { xrun[0] = false; xrun[1] = false; } +}; + +static void *alsaCallbackHandler( void * ptr ); + +RtApiAlsa :: RtApiAlsa() +{ + // Nothing to do here. +} + +RtApiAlsa :: ~RtApiAlsa() +{ + if ( stream_.state != STREAM_CLOSED ) closeStream(); +} + +unsigned int RtApiAlsa :: getDeviceCount( void ) +{ + unsigned nDevices = 0; + int result, subdevice, card; + char name[64]; + snd_ctl_t *handle; + + // Count cards and devices + card = -1; + snd_card_next( &card ); + while ( card >= 0 ) { + sprintf( name, "hw:%d", card ); + result = snd_ctl_open( &handle, name, 0 ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::getDeviceCount: control open, card = " << card << ", " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + goto nextcard; + } + subdevice = -1; + while( 1 ) { + result = snd_ctl_pcm_next_device( handle, &subdevice ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::getDeviceCount: control next device, card = " << card << ", " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + break; + } + if ( subdevice < 0 ) + break; + nDevices++; + } + nextcard: + snd_ctl_close( handle ); + snd_card_next( &card ); + } + + result = snd_ctl_open( &handle, "default", 0 ); + if (result == 0) { + nDevices++; + snd_ctl_close( handle ); + } + + return nDevices; +} + +RtAudio::DeviceInfo RtApiAlsa :: getDeviceInfo( unsigned int device ) +{ + RtAudio::DeviceInfo info; + info.probed = false; + + unsigned nDevices = 0; + int result, subdevice, card; + char name[64]; + snd_ctl_t *chandle; + + // Count cards and devices + card = -1; + subdevice = -1; + snd_card_next( &card ); + while ( card >= 0 ) { + sprintf( name, "hw:%d", card ); + result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::getDeviceInfo: control open, card = " << card << ", " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + goto nextcard; + } + subdevice = -1; + while( 1 ) { + result = snd_ctl_pcm_next_device( chandle, &subdevice ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::getDeviceInfo: control next device, card = " << card << ", " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + break; + } + if ( subdevice < 0 ) break; + if ( nDevices == device ) { + sprintf( name, "hw:%d,%d", card, subdevice ); + goto foundDevice; + } + nDevices++; + } + nextcard: + snd_ctl_close( chandle ); + snd_card_next( &card ); + } + + result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK ); + if ( result == 0 ) { + if ( nDevices == device ) { + strcpy( name, "default" ); + goto foundDevice; + } + nDevices++; + } + + if ( nDevices == 0 ) { + errorText_ = "RtApiAlsa::getDeviceInfo: no devices found!"; + error( RtAudioError::INVALID_USE ); + return info; + } + + if ( device >= nDevices ) { + errorText_ = "RtApiAlsa::getDeviceInfo: device ID is invalid!"; + error( RtAudioError::INVALID_USE ); + return info; + } + + foundDevice: + + // If a stream is already open, we cannot probe the stream devices. + // Thus, use the saved results. + if ( stream_.state != STREAM_CLOSED && + ( stream_.device[0] == device || stream_.device[1] == device ) ) { + snd_ctl_close( chandle ); + if ( device >= devices_.size() ) { + errorText_ = "RtApiAlsa::getDeviceInfo: device ID was not present before stream was opened."; + error( RtAudioError::WARNING ); + return info; + } + return devices_[ device ]; + } + + int openMode = SND_PCM_ASYNC; + snd_pcm_stream_t stream; + snd_pcm_info_t *pcminfo; + snd_pcm_info_alloca( &pcminfo ); + snd_pcm_t *phandle; + snd_pcm_hw_params_t *params; + snd_pcm_hw_params_alloca( ¶ms ); + + // First try for playback unless default device (which has subdev -1) + stream = SND_PCM_STREAM_PLAYBACK; + snd_pcm_info_set_stream( pcminfo, stream ); + if ( subdevice != -1 ) { + snd_pcm_info_set_device( pcminfo, subdevice ); + snd_pcm_info_set_subdevice( pcminfo, 0 ); + + result = snd_ctl_pcm_info( chandle, pcminfo ); + if ( result < 0 ) { + // Device probably doesn't support playback. + goto captureProbe; + } + } + + result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + goto captureProbe; + } + + // The device is open ... fill the parameter structure. + result = snd_pcm_hw_params_any( phandle, params ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + goto captureProbe; + } + + // Get output channel information. + unsigned int value; + result = snd_pcm_hw_params_get_channels_max( params, &value ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") output channels, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + goto captureProbe; + } + info.outputChannels = value; + snd_pcm_close( phandle ); + + captureProbe: + stream = SND_PCM_STREAM_CAPTURE; + snd_pcm_info_set_stream( pcminfo, stream ); + + // Now try for capture unless default device (with subdev = -1) + if ( subdevice != -1 ) { + result = snd_ctl_pcm_info( chandle, pcminfo ); + snd_ctl_close( chandle ); + if ( result < 0 ) { + // Device probably doesn't support capture. + if ( info.outputChannels == 0 ) return info; + goto probeParameters; + } + } + else + snd_ctl_close( chandle ); + + result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + if ( info.outputChannels == 0 ) return info; + goto probeParameters; + } + + // The device is open ... fill the parameter structure. + result = snd_pcm_hw_params_any( phandle, params ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + if ( info.outputChannels == 0 ) return info; + goto probeParameters; + } + + result = snd_pcm_hw_params_get_channels_max( params, &value ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::getDeviceInfo: error getting device (" << name << ") input channels, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + if ( info.outputChannels == 0 ) return info; + goto probeParameters; + } + info.inputChannels = value; + snd_pcm_close( phandle ); + + // If device opens for both playback and capture, we determine the channels. + if ( info.outputChannels > 0 && info.inputChannels > 0 ) + info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels; + + // ALSA doesn't provide default devices so we'll use the first available one. + if ( device == 0 && info.outputChannels > 0 ) + info.isDefaultOutput = true; + if ( device == 0 && info.inputChannels > 0 ) + info.isDefaultInput = true; + + probeParameters: + // At this point, we just need to figure out the supported data + // formats and sample rates. We'll proceed by opening the device in + // the direction with the maximum number of channels, or playback if + // they are equal. This might limit our sample rate options, but so + // be it. + + if ( info.outputChannels >= info.inputChannels ) + stream = SND_PCM_STREAM_PLAYBACK; + else + stream = SND_PCM_STREAM_CAPTURE; + snd_pcm_info_set_stream( pcminfo, stream ); + + result = snd_pcm_open( &phandle, name, stream, openMode | SND_PCM_NONBLOCK); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_open error for device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // The device is open ... fill the parameter structure. + result = snd_pcm_hw_params_any( phandle, params ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::getDeviceInfo: snd_pcm_hw_params error for device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Test our discrete set of sample rate values. + info.sampleRates.clear(); + for ( unsigned int i=0; i<MAX_SAMPLE_RATES; i++ ) { + if ( snd_pcm_hw_params_test_rate( phandle, params, SAMPLE_RATES[i], 0 ) == 0 ) { + info.sampleRates.push_back( SAMPLE_RATES[i] ); + + if ( !info.preferredSampleRate || ( SAMPLE_RATES[i] <= 48000 && SAMPLE_RATES[i] > info.preferredSampleRate ) ) + info.preferredSampleRate = SAMPLE_RATES[i]; + } + } + if ( info.sampleRates.size() == 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::getDeviceInfo: no supported sample rates found for device (" << name << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Probe the supported data formats ... we don't care about endian-ness just yet + snd_pcm_format_t format; + info.nativeFormats = 0; + format = SND_PCM_FORMAT_S8; + if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 ) + info.nativeFormats |= RTAUDIO_SINT8; + format = SND_PCM_FORMAT_S16; + if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 ) + info.nativeFormats |= RTAUDIO_SINT16; + format = SND_PCM_FORMAT_S24; + if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 ) + info.nativeFormats |= RTAUDIO_SINT24; + format = SND_PCM_FORMAT_S32; + if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 ) + info.nativeFormats |= RTAUDIO_SINT32; + format = SND_PCM_FORMAT_FLOAT; + if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 ) + info.nativeFormats |= RTAUDIO_FLOAT32; + format = SND_PCM_FORMAT_FLOAT64; + if ( snd_pcm_hw_params_test_format( phandle, params, format ) == 0 ) + info.nativeFormats |= RTAUDIO_FLOAT64; + + // Check that we have at least one supported format + if ( info.nativeFormats == 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::getDeviceInfo: pcm device (" << name << ") data format not supported by RtAudio."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Get the device name + char *cardname; + result = snd_card_get_name( card, &cardname ); + if ( result >= 0 ) { + sprintf( name, "hw:%s,%d", cardname, subdevice ); + free( cardname ); + } + info.name = name; + + // That's all ... close the device and return + snd_pcm_close( phandle ); + info.probed = true; + return info; +} + +void RtApiAlsa :: saveDeviceInfo( void ) +{ + devices_.clear(); + + unsigned int nDevices = getDeviceCount(); + devices_.resize( nDevices ); + for ( unsigned int i=0; i<nDevices; i++ ) + devices_[i] = getDeviceInfo( i ); +} + +bool RtApiAlsa :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels, + unsigned int firstChannel, unsigned int sampleRate, + RtAudioFormat format, unsigned int *bufferSize, + RtAudio::StreamOptions *options ) + +{ +#if defined(__RTAUDIO_DEBUG__) + snd_output_t *out; + snd_output_stdio_attach(&out, stderr, 0); +#endif + + // I'm not using the "plug" interface ... too much inconsistent behavior. + + unsigned nDevices = 0; + int result, subdevice, card; + char name[64]; + snd_ctl_t *chandle; + + if ( options && options->flags & RTAUDIO_ALSA_USE_DEFAULT ) + snprintf(name, sizeof(name), "%s", "default"); + else { + // Count cards and devices + card = -1; + snd_card_next( &card ); + while ( card >= 0 ) { + sprintf( name, "hw:%d", card ); + result = snd_ctl_open( &chandle, name, SND_CTL_NONBLOCK ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::probeDeviceOpen: control open, card = " << card << ", " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + subdevice = -1; + while( 1 ) { + result = snd_ctl_pcm_next_device( chandle, &subdevice ); + if ( result < 0 ) break; + if ( subdevice < 0 ) break; + if ( nDevices == device ) { + sprintf( name, "hw:%d,%d", card, subdevice ); + snd_ctl_close( chandle ); + goto foundDevice; + } + nDevices++; + } + snd_ctl_close( chandle ); + snd_card_next( &card ); + } + + result = snd_ctl_open( &chandle, "default", SND_CTL_NONBLOCK ); + if ( result == 0 ) { + if ( nDevices == device ) { + strcpy( name, "default" ); + goto foundDevice; + } + nDevices++; + } + + if ( nDevices == 0 ) { + // This should not happen because a check is made before this function is called. + errorText_ = "RtApiAlsa::probeDeviceOpen: no devices found!"; + return FAILURE; + } + + if ( device >= nDevices ) { + // This should not happen because a check is made before this function is called. + errorText_ = "RtApiAlsa::probeDeviceOpen: device ID is invalid!"; + return FAILURE; + } + } + + foundDevice: + + // The getDeviceInfo() function will not work for a device that is + // already open. Thus, we'll probe the system before opening a + // stream and save the results for use by getDeviceInfo(). + if ( mode == OUTPUT || ( mode == INPUT && stream_.mode != OUTPUT ) ) // only do once + this->saveDeviceInfo(); + + snd_pcm_stream_t stream; + if ( mode == OUTPUT ) + stream = SND_PCM_STREAM_PLAYBACK; + else + stream = SND_PCM_STREAM_CAPTURE; + + snd_pcm_t *phandle; + int openMode = SND_PCM_ASYNC; + result = snd_pcm_open( &phandle, name, stream, openMode ); + if ( result < 0 ) { + if ( mode == OUTPUT ) + errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for output."; + else + errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device (" << name << ") won't open for input."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Fill the parameter structure. + snd_pcm_hw_params_t *hw_params; + snd_pcm_hw_params_alloca( &hw_params ); + result = snd_pcm_hw_params_any( phandle, hw_params ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") parameters, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + +#if defined(__RTAUDIO_DEBUG__) + fprintf( stderr, "\nRtApiAlsa: dump hardware params just after device open:\n\n" ); + snd_pcm_hw_params_dump( hw_params, out ); +#endif + + // Set access ... check user preference. + if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) { + stream_.userInterleaved = false; + result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED ); + if ( result < 0 ) { + result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED ); + stream_.deviceInterleaved[mode] = true; + } + else + stream_.deviceInterleaved[mode] = false; + } + else { + stream_.userInterleaved = true; + result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_INTERLEAVED ); + if ( result < 0 ) { + result = snd_pcm_hw_params_set_access( phandle, hw_params, SND_PCM_ACCESS_RW_NONINTERLEAVED ); + stream_.deviceInterleaved[mode] = false; + } + else + stream_.deviceInterleaved[mode] = true; + } + + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") access, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Determine how to set the device format. + stream_.userFormat = format; + snd_pcm_format_t deviceFormat = SND_PCM_FORMAT_UNKNOWN; + + if ( format == RTAUDIO_SINT8 ) + deviceFormat = SND_PCM_FORMAT_S8; + else if ( format == RTAUDIO_SINT16 ) + deviceFormat = SND_PCM_FORMAT_S16; + else if ( format == RTAUDIO_SINT24 ) + deviceFormat = SND_PCM_FORMAT_S24; + else if ( format == RTAUDIO_SINT32 ) + deviceFormat = SND_PCM_FORMAT_S32; + else if ( format == RTAUDIO_FLOAT32 ) + deviceFormat = SND_PCM_FORMAT_FLOAT; + else if ( format == RTAUDIO_FLOAT64 ) + deviceFormat = SND_PCM_FORMAT_FLOAT64; + + if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat) == 0) { + stream_.deviceFormat[mode] = format; + goto setFormat; + } + + // The user requested format is not natively supported by the device. + deviceFormat = SND_PCM_FORMAT_FLOAT64; + if ( snd_pcm_hw_params_test_format( phandle, hw_params, deviceFormat ) == 0 ) { + stream_.deviceFormat[mode] = RTAUDIO_FLOAT64; + goto setFormat; + } + + deviceFormat = SND_PCM_FORMAT_FLOAT; + if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) { + stream_.deviceFormat[mode] = RTAUDIO_FLOAT32; + goto setFormat; + } + + deviceFormat = SND_PCM_FORMAT_S32; + if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) { + stream_.deviceFormat[mode] = RTAUDIO_SINT32; + goto setFormat; + } + + deviceFormat = SND_PCM_FORMAT_S24; + if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) { + stream_.deviceFormat[mode] = RTAUDIO_SINT24; + goto setFormat; + } + + deviceFormat = SND_PCM_FORMAT_S16; + if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) { + stream_.deviceFormat[mode] = RTAUDIO_SINT16; + goto setFormat; + } + + deviceFormat = SND_PCM_FORMAT_S8; + if ( snd_pcm_hw_params_test_format(phandle, hw_params, deviceFormat ) == 0 ) { + stream_.deviceFormat[mode] = RTAUDIO_SINT8; + goto setFormat; + } + + // If we get here, no supported format was found. + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: pcm device " << device << " data format not supported by RtAudio."; + errorText_ = errorStream_.str(); + return FAILURE; + + setFormat: + result = snd_pcm_hw_params_set_format( phandle, hw_params, deviceFormat ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting pcm device (" << name << ") data format, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Determine whether byte-swaping is necessary. + stream_.doByteSwap[mode] = false; + if ( deviceFormat != SND_PCM_FORMAT_S8 ) { + result = snd_pcm_format_cpu_endian( deviceFormat ); + if ( result == 0 ) + stream_.doByteSwap[mode] = true; + else if (result < 0) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting pcm device (" << name << ") endian-ness, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + } + + // Set the sample rate. + result = snd_pcm_hw_params_set_rate_near( phandle, hw_params, (unsigned int*) &sampleRate, 0 ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting sample rate on device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Determine the number of channels for this device. We support a possible + // minimum device channel number > than the value requested by the user. + stream_.nUserChannels[mode] = channels; + unsigned int value; + result = snd_pcm_hw_params_get_channels_max( hw_params, &value ); + unsigned int deviceChannels = value; + if ( result < 0 || deviceChannels < channels + firstChannel ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: requested channel parameters not supported by device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + result = snd_pcm_hw_params_get_channels_min( hw_params, &value ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error getting minimum channels for device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + deviceChannels = value; + if ( deviceChannels < channels + firstChannel ) deviceChannels = channels + firstChannel; + stream_.nDeviceChannels[mode] = deviceChannels; + + // Set the device channels. + result = snd_pcm_hw_params_set_channels( phandle, hw_params, deviceChannels ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting channels for device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Set the buffer (or period) size. + int dir = 0; + snd_pcm_uframes_t periodSize = *bufferSize; + result = snd_pcm_hw_params_set_period_size_near( phandle, hw_params, &periodSize, &dir ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting period size for device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + *bufferSize = periodSize; + + // Set the buffer number, which in ALSA is referred to as the "period". + unsigned int periods = 0; + if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) periods = 2; + if ( options && options->numberOfBuffers > 0 ) periods = options->numberOfBuffers; + if ( periods < 2 ) periods = 4; // a fairly safe default value + result = snd_pcm_hw_params_set_periods_near( phandle, hw_params, &periods, &dir ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error setting periods for device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // If attempting to setup a duplex stream, the bufferSize parameter + // MUST be the same in both directions! + if ( stream_.mode == OUTPUT && mode == INPUT && *bufferSize != stream_.bufferSize ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: system error setting buffer size for duplex stream on device (" << name << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + stream_.bufferSize = *bufferSize; + + // Install the hardware configuration + result = snd_pcm_hw_params( phandle, hw_params ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing hardware configuration on device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + +#if defined(__RTAUDIO_DEBUG__) + fprintf(stderr, "\nRtApiAlsa: dump hardware params after installation:\n\n"); + snd_pcm_hw_params_dump( hw_params, out ); +#endif + + // Set the software configuration to fill buffers with zeros and prevent device stopping on xruns. + snd_pcm_sw_params_t *sw_params = NULL; + snd_pcm_sw_params_alloca( &sw_params ); + snd_pcm_sw_params_current( phandle, sw_params ); + snd_pcm_sw_params_set_start_threshold( phandle, sw_params, *bufferSize ); + snd_pcm_sw_params_set_stop_threshold( phandle, sw_params, ULONG_MAX ); + snd_pcm_sw_params_set_silence_threshold( phandle, sw_params, 0 ); + + // The following two settings were suggested by Theo Veenker + //snd_pcm_sw_params_set_avail_min( phandle, sw_params, *bufferSize ); + //snd_pcm_sw_params_set_xfer_align( phandle, sw_params, 1 ); + + // here are two options for a fix + //snd_pcm_sw_params_set_silence_size( phandle, sw_params, ULONG_MAX ); + snd_pcm_uframes_t val; + snd_pcm_sw_params_get_boundary( sw_params, &val ); + snd_pcm_sw_params_set_silence_size( phandle, sw_params, val ); + + result = snd_pcm_sw_params( phandle, sw_params ); + if ( result < 0 ) { + snd_pcm_close( phandle ); + errorStream_ << "RtApiAlsa::probeDeviceOpen: error installing software configuration on device (" << name << "), " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + return FAILURE; + } + +#if defined(__RTAUDIO_DEBUG__) + fprintf(stderr, "\nRtApiAlsa: dump software params after installation:\n\n"); + snd_pcm_sw_params_dump( sw_params, out ); +#endif + + // Set flags for buffer conversion + stream_.doConvertBuffer[mode] = false; + if ( stream_.userFormat != stream_.deviceFormat[mode] ) + stream_.doConvertBuffer[mode] = true; + if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] ) + stream_.doConvertBuffer[mode] = true; + if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] && + stream_.nUserChannels[mode] > 1 ) + stream_.doConvertBuffer[mode] = true; + + // Allocate the ApiHandle if necessary and then save. + AlsaHandle *apiInfo = 0; + if ( stream_.apiHandle == 0 ) { + try { + apiInfo = (AlsaHandle *) new AlsaHandle; + } + catch ( std::bad_alloc& ) { + errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating AlsaHandle memory."; + goto error; + } + + if ( pthread_cond_init( &apiInfo->runnable_cv, NULL ) ) { + errorText_ = "RtApiAlsa::probeDeviceOpen: error initializing pthread condition variable."; + goto error; + } + + stream_.apiHandle = (void *) apiInfo; + apiInfo->handles[0] = 0; + apiInfo->handles[1] = 0; + } + else { + apiInfo = (AlsaHandle *) stream_.apiHandle; + } + apiInfo->handles[mode] = phandle; + phandle = 0; + + // Allocate necessary internal buffers. + unsigned long bufferBytes; + bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat ); + stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 ); + if ( stream_.userBuffer[mode] == NULL ) { + errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating user buffer memory."; + goto error; + } + + if ( stream_.doConvertBuffer[mode] ) { + + bool makeBuffer = true; + bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] ); + if ( mode == INPUT ) { + if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) { + unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] ); + if ( bufferBytes <= bytesOut ) makeBuffer = false; + } + } + + if ( makeBuffer ) { + bufferBytes *= *bufferSize; + if ( stream_.deviceBuffer ) free( stream_.deviceBuffer ); + stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 ); + if ( stream_.deviceBuffer == NULL ) { + errorText_ = "RtApiAlsa::probeDeviceOpen: error allocating device buffer memory."; + goto error; + } + } + } + + stream_.sampleRate = sampleRate; + stream_.nBuffers = periods; + stream_.device[mode] = device; + stream_.state = STREAM_STOPPED; + + // Setup the buffer conversion information structure. + if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel ); + + // Setup thread if necessary. + if ( stream_.mode == OUTPUT && mode == INPUT ) { + // We had already set up an output stream. + stream_.mode = DUPLEX; + // Link the streams if possible. + apiInfo->synchronized = false; + if ( snd_pcm_link( apiInfo->handles[0], apiInfo->handles[1] ) == 0 ) + apiInfo->synchronized = true; + else { + errorText_ = "RtApiAlsa::probeDeviceOpen: unable to synchronize input and output devices."; + error( RtAudioError::WARNING ); + } + } + else { + stream_.mode = mode; + + // Setup callback thread. + stream_.callbackInfo.object = (void *) this; + + // Set the thread attributes for joinable and realtime scheduling + // priority (optional). The higher priority will only take affect + // if the program is run as root or suid. Note, under Linux + // processes with CAP_SYS_NICE privilege, a user can change + // scheduling policy and priority (thus need not be root). See + // POSIX "capabilities". + pthread_attr_t attr; + pthread_attr_init( &attr ); + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE ); + +#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread) + if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) { + // We previously attempted to increase the audio callback priority + // to SCHED_RR here via the attributes. However, while no errors + // were reported in doing so, it did not work. So, now this is + // done in the alsaCallbackHandler function. + stream_.callbackInfo.doRealtime = true; + int priority = options->priority; + int min = sched_get_priority_min( SCHED_RR ); + int max = sched_get_priority_max( SCHED_RR ); + if ( priority < min ) priority = min; + else if ( priority > max ) priority = max; + stream_.callbackInfo.priority = priority; + } +#endif + + stream_.callbackInfo.isRunning = true; + result = pthread_create( &stream_.callbackInfo.thread, &attr, alsaCallbackHandler, &stream_.callbackInfo ); + pthread_attr_destroy( &attr ); + if ( result ) { + stream_.callbackInfo.isRunning = false; + errorText_ = "RtApiAlsa::error creating callback thread!"; + goto error; + } + } + + return SUCCESS; + + error: + if ( apiInfo ) { + pthread_cond_destroy( &apiInfo->runnable_cv ); + if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] ); + if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] ); + delete apiInfo; + stream_.apiHandle = 0; + } + + if ( phandle) snd_pcm_close( phandle ); + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + stream_.state = STREAM_CLOSED; + return FAILURE; +} + +void RtApiAlsa :: closeStream() +{ + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiAlsa::closeStream(): no open stream to close!"; + error( RtAudioError::WARNING ); + return; + } + + AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle; + stream_.callbackInfo.isRunning = false; + MUTEX_LOCK( &stream_.mutex ); + if ( stream_.state == STREAM_STOPPED ) { + apiInfo->runnable = true; + pthread_cond_signal( &apiInfo->runnable_cv ); + } + MUTEX_UNLOCK( &stream_.mutex ); + pthread_join( stream_.callbackInfo.thread, NULL ); + + if ( stream_.state == STREAM_RUNNING ) { + stream_.state = STREAM_STOPPED; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) + snd_pcm_drop( apiInfo->handles[0] ); + if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) + snd_pcm_drop( apiInfo->handles[1] ); + } + + if ( apiInfo ) { + pthread_cond_destroy( &apiInfo->runnable_cv ); + if ( apiInfo->handles[0] ) snd_pcm_close( apiInfo->handles[0] ); + if ( apiInfo->handles[1] ) snd_pcm_close( apiInfo->handles[1] ); + delete apiInfo; + stream_.apiHandle = 0; + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + stream_.mode = UNINITIALIZED; + stream_.state = STREAM_CLOSED; +} + +void RtApiAlsa :: startStream() +{ + // This method calls snd_pcm_prepare if the device isn't already in that state. + + verifyStream(); + if ( stream_.state == STREAM_RUNNING ) { + errorText_ = "RtApiAlsa::startStream(): the stream is already running!"; + error( RtAudioError::WARNING ); + return; + } + + MUTEX_LOCK( &stream_.mutex ); + + int result = 0; + snd_pcm_state_t state; + AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle; + snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + state = snd_pcm_state( handle[0] ); + if ( state != SND_PCM_STATE_PREPARED ) { + result = snd_pcm_prepare( handle[0] ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::startStream: error preparing output pcm device, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + } + + if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) { + result = snd_pcm_drop(handle[1]); // fix to remove stale data received since device has been open + state = snd_pcm_state( handle[1] ); + if ( state != SND_PCM_STATE_PREPARED ) { + result = snd_pcm_prepare( handle[1] ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::startStream: error preparing input pcm device, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + } + + stream_.state = STREAM_RUNNING; + + unlock: + apiInfo->runnable = true; + pthread_cond_signal( &apiInfo->runnable_cv ); + MUTEX_UNLOCK( &stream_.mutex ); + + if ( result >= 0 ) return; + error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiAlsa :: stopStream() +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiAlsa::stopStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + stream_.state = STREAM_STOPPED; + MUTEX_LOCK( &stream_.mutex ); + + int result = 0; + AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle; + snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + if ( apiInfo->synchronized ) + result = snd_pcm_drop( handle[0] ); + else + result = snd_pcm_drain( handle[0] ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::stopStream: error draining output pcm device, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) { + result = snd_pcm_drop( handle[1] ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::stopStream: error stopping input pcm device, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + unlock: + apiInfo->runnable = false; // fixes high CPU usage when stopped + MUTEX_UNLOCK( &stream_.mutex ); + + if ( result >= 0 ) return; + error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiAlsa :: abortStream() +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiAlsa::abortStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + stream_.state = STREAM_STOPPED; + MUTEX_LOCK( &stream_.mutex ); + + int result = 0; + AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle; + snd_pcm_t **handle = (snd_pcm_t **) apiInfo->handles; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + result = snd_pcm_drop( handle[0] ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::abortStream: error aborting output pcm device, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + if ( ( stream_.mode == INPUT || stream_.mode == DUPLEX ) && !apiInfo->synchronized ) { + result = snd_pcm_drop( handle[1] ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::abortStream: error aborting input pcm device, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + unlock: + apiInfo->runnable = false; // fixes high CPU usage when stopped + MUTEX_UNLOCK( &stream_.mutex ); + + if ( result >= 0 ) return; + error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiAlsa :: callbackEvent() +{ + AlsaHandle *apiInfo = (AlsaHandle *) stream_.apiHandle; + if ( stream_.state == STREAM_STOPPED ) { + MUTEX_LOCK( &stream_.mutex ); + while ( !apiInfo->runnable ) + pthread_cond_wait( &apiInfo->runnable_cv, &stream_.mutex ); + + if ( stream_.state != STREAM_RUNNING ) { + MUTEX_UNLOCK( &stream_.mutex ); + return; + } + MUTEX_UNLOCK( &stream_.mutex ); + } + + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiAlsa::callbackEvent(): the stream is closed ... this shouldn't happen!"; + error( RtAudioError::WARNING ); + return; + } + + int doStopStream = 0; + RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback; + double streamTime = getStreamTime(); + RtAudioStreamStatus status = 0; + if ( stream_.mode != INPUT && apiInfo->xrun[0] == true ) { + status |= RTAUDIO_OUTPUT_UNDERFLOW; + apiInfo->xrun[0] = false; + } + if ( stream_.mode != OUTPUT && apiInfo->xrun[1] == true ) { + status |= RTAUDIO_INPUT_OVERFLOW; + apiInfo->xrun[1] = false; + } + doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1], + stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData ); + + if ( doStopStream == 2 ) { + abortStream(); + return; + } + + MUTEX_LOCK( &stream_.mutex ); + + // The state might change while waiting on a mutex. + if ( stream_.state == STREAM_STOPPED ) goto unlock; + + int result; + char *buffer; + int channels; + snd_pcm_t **handle; + snd_pcm_sframes_t frames; + RtAudioFormat format; + handle = (snd_pcm_t **) apiInfo->handles; + + if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) { + + // Setup parameters. + if ( stream_.doConvertBuffer[1] ) { + buffer = stream_.deviceBuffer; + channels = stream_.nDeviceChannels[1]; + format = stream_.deviceFormat[1]; + } + else { + buffer = stream_.userBuffer[1]; + channels = stream_.nUserChannels[1]; + format = stream_.userFormat; + } + + // Read samples from device in interleaved/non-interleaved format. + if ( stream_.deviceInterleaved[1] ) + result = snd_pcm_readi( handle[1], buffer, stream_.bufferSize ); + else { + void *bufs[channels]; + size_t offset = stream_.bufferSize * formatBytes( format ); + for ( int i=0; i<channels; i++ ) + bufs[i] = (void *) (buffer + (i * offset)); + result = snd_pcm_readn( handle[1], bufs, stream_.bufferSize ); + } + + if ( result < (int) stream_.bufferSize ) { + // Either an error or overrun occured. + if ( result == -EPIPE ) { + snd_pcm_state_t state = snd_pcm_state( handle[1] ); + if ( state == SND_PCM_STATE_XRUN ) { + apiInfo->xrun[1] = true; + result = snd_pcm_prepare( handle[1] ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after overrun, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + } + } + else { + errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + } + } + else { + errorStream_ << "RtApiAlsa::callbackEvent: audio read error, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + } + error( RtAudioError::WARNING ); + goto tryOutput; + } + + // Do byte swapping if necessary. + if ( stream_.doByteSwap[1] ) + byteSwapBuffer( buffer, stream_.bufferSize * channels, format ); + + // Do buffer conversion if necessary. + if ( stream_.doConvertBuffer[1] ) + convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] ); + + // Check stream latency + result = snd_pcm_delay( handle[1], &frames ); + if ( result == 0 && frames > 0 ) stream_.latency[1] = frames; + } + + tryOutput: + + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + + // Setup parameters and do buffer conversion if necessary. + if ( stream_.doConvertBuffer[0] ) { + buffer = stream_.deviceBuffer; + convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] ); + channels = stream_.nDeviceChannels[0]; + format = stream_.deviceFormat[0]; + } + else { + buffer = stream_.userBuffer[0]; + channels = stream_.nUserChannels[0]; + format = stream_.userFormat; + } + + // Do byte swapping if necessary. + if ( stream_.doByteSwap[0] ) + byteSwapBuffer(buffer, stream_.bufferSize * channels, format); + + // Write samples to device in interleaved/non-interleaved format. + if ( stream_.deviceInterleaved[0] ) + result = snd_pcm_writei( handle[0], buffer, stream_.bufferSize ); + else { + void *bufs[channels]; + size_t offset = stream_.bufferSize * formatBytes( format ); + for ( int i=0; i<channels; i++ ) + bufs[i] = (void *) (buffer + (i * offset)); + result = snd_pcm_writen( handle[0], bufs, stream_.bufferSize ); + } + + if ( result < (int) stream_.bufferSize ) { + // Either an error or underrun occured. + if ( result == -EPIPE ) { + snd_pcm_state_t state = snd_pcm_state( handle[0] ); + if ( state == SND_PCM_STATE_XRUN ) { + apiInfo->xrun[0] = true; + result = snd_pcm_prepare( handle[0] ); + if ( result < 0 ) { + errorStream_ << "RtApiAlsa::callbackEvent: error preparing device after underrun, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + } + else + errorText_ = "RtApiAlsa::callbackEvent: audio write error, underrun."; + } + else { + errorStream_ << "RtApiAlsa::callbackEvent: error, current state is " << snd_pcm_state_name( state ) << ", " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + } + } + else { + errorStream_ << "RtApiAlsa::callbackEvent: audio write error, " << snd_strerror( result ) << "."; + errorText_ = errorStream_.str(); + } + error( RtAudioError::WARNING ); + goto unlock; + } + + // Check stream latency + result = snd_pcm_delay( handle[0], &frames ); + if ( result == 0 && frames > 0 ) stream_.latency[0] = frames; + } + + unlock: + MUTEX_UNLOCK( &stream_.mutex ); + + RtApi::tickStreamTime(); + if ( doStopStream == 1 ) this->stopStream(); +} + +static void *alsaCallbackHandler( void *ptr ) +{ + CallbackInfo *info = (CallbackInfo *) ptr; + RtApiAlsa *object = (RtApiAlsa *) info->object; + bool *isRunning = &info->isRunning; + +#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread) + if ( &info->doRealtime ) { + pthread_t tID = pthread_self(); // ID of this thread + sched_param prio = { info->priority }; // scheduling priority of thread + pthread_setschedparam( tID, SCHED_RR, &prio ); + } +#endif + + while ( *isRunning == true ) { + pthread_testcancel(); + object->callbackEvent(); + } + + pthread_exit( NULL ); +} + +//******************** End of __LINUX_ALSA__ *********************// +#endif + +#if defined(__LINUX_PULSE__) + +// Code written by Peter Meerwald, pmeerw@pmeerw.net +// and Tristan Matthews. + +#include <pulse/error.h> +#include <pulse/simple.h> +#include <cstdio> + +static const unsigned int SUPPORTED_SAMPLERATES[] = { 8000, 16000, 22050, 32000, + 44100, 48000, 96000, 0}; + +struct rtaudio_pa_format_mapping_t { + RtAudioFormat rtaudio_format; + pa_sample_format_t pa_format; +}; + +static const rtaudio_pa_format_mapping_t supported_sampleformats[] = { + {RTAUDIO_SINT16, PA_SAMPLE_S16LE}, + {RTAUDIO_SINT32, PA_SAMPLE_S32LE}, + {RTAUDIO_FLOAT32, PA_SAMPLE_FLOAT32LE}, + {0, PA_SAMPLE_INVALID}}; + +struct PulseAudioHandle { + pa_simple *s_play; + pa_simple *s_rec; + pthread_t thread; + pthread_cond_t runnable_cv; + bool runnable; + PulseAudioHandle() : s_play(0), s_rec(0), runnable(false) { } +}; + +RtApiPulse::~RtApiPulse() +{ + if ( stream_.state != STREAM_CLOSED ) + closeStream(); +} + +unsigned int RtApiPulse::getDeviceCount( void ) +{ + return 1; +} + +RtAudio::DeviceInfo RtApiPulse::getDeviceInfo( unsigned int /*device*/ ) +{ + RtAudio::DeviceInfo info; + info.probed = true; + info.name = "PulseAudio"; + info.outputChannels = 2; + info.inputChannels = 2; + info.duplexChannels = 2; + info.isDefaultOutput = true; + info.isDefaultInput = true; + + for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr ) + info.sampleRates.push_back( *sr ); + + info.preferredSampleRate = 48000; + info.nativeFormats = RTAUDIO_SINT16 | RTAUDIO_SINT32 | RTAUDIO_FLOAT32; + + return info; +} + +static void *pulseaudio_callback( void * user ) +{ + CallbackInfo *cbi = static_cast<CallbackInfo *>( user ); + RtApiPulse *context = static_cast<RtApiPulse *>( cbi->object ); + volatile bool *isRunning = &cbi->isRunning; + + while ( *isRunning ) { + pthread_testcancel(); + context->callbackEvent(); + } + + pthread_exit( NULL ); +} + +void RtApiPulse::closeStream( void ) +{ + PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle ); + + stream_.callbackInfo.isRunning = false; + if ( pah ) { + MUTEX_LOCK( &stream_.mutex ); + if ( stream_.state == STREAM_STOPPED ) { + pah->runnable = true; + pthread_cond_signal( &pah->runnable_cv ); + } + MUTEX_UNLOCK( &stream_.mutex ); + + pthread_join( pah->thread, 0 ); + if ( pah->s_play ) { + pa_simple_flush( pah->s_play, NULL ); + pa_simple_free( pah->s_play ); + } + if ( pah->s_rec ) + pa_simple_free( pah->s_rec ); + + pthread_cond_destroy( &pah->runnable_cv ); + delete pah; + stream_.apiHandle = 0; + } + + if ( stream_.userBuffer[0] ) { + free( stream_.userBuffer[0] ); + stream_.userBuffer[0] = 0; + } + if ( stream_.userBuffer[1] ) { + free( stream_.userBuffer[1] ); + stream_.userBuffer[1] = 0; + } + + stream_.state = STREAM_CLOSED; + stream_.mode = UNINITIALIZED; +} + +void RtApiPulse::callbackEvent( void ) +{ + PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle ); + + if ( stream_.state == STREAM_STOPPED ) { + MUTEX_LOCK( &stream_.mutex ); + while ( !pah->runnable ) + pthread_cond_wait( &pah->runnable_cv, &stream_.mutex ); + + if ( stream_.state != STREAM_RUNNING ) { + MUTEX_UNLOCK( &stream_.mutex ); + return; + } + MUTEX_UNLOCK( &stream_.mutex ); + } + + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiPulse::callbackEvent(): the stream is closed ... " + "this shouldn't happen!"; + error( RtAudioError::WARNING ); + return; + } + + RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback; + double streamTime = getStreamTime(); + RtAudioStreamStatus status = 0; + int doStopStream = callback( stream_.userBuffer[OUTPUT], stream_.userBuffer[INPUT], + stream_.bufferSize, streamTime, status, + stream_.callbackInfo.userData ); + + if ( doStopStream == 2 ) { + abortStream(); + return; + } + + MUTEX_LOCK( &stream_.mutex ); + void *pulse_in = stream_.doConvertBuffer[INPUT] ? stream_.deviceBuffer : stream_.userBuffer[INPUT]; + void *pulse_out = stream_.doConvertBuffer[OUTPUT] ? stream_.deviceBuffer : stream_.userBuffer[OUTPUT]; + + if ( stream_.state != STREAM_RUNNING ) + goto unlock; + + int pa_error; + size_t bytes; + if (stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + if ( stream_.doConvertBuffer[OUTPUT] ) { + convertBuffer( stream_.deviceBuffer, + stream_.userBuffer[OUTPUT], + stream_.convertInfo[OUTPUT] ); + bytes = stream_.nDeviceChannels[OUTPUT] * stream_.bufferSize * + formatBytes( stream_.deviceFormat[OUTPUT] ); + } else + bytes = stream_.nUserChannels[OUTPUT] * stream_.bufferSize * + formatBytes( stream_.userFormat ); + + if ( pa_simple_write( pah->s_play, pulse_out, bytes, &pa_error ) < 0 ) { + errorStream_ << "RtApiPulse::callbackEvent: audio write error, " << + pa_strerror( pa_error ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + } + } + + if ( stream_.mode == INPUT || stream_.mode == DUPLEX) { + if ( stream_.doConvertBuffer[INPUT] ) + bytes = stream_.nDeviceChannels[INPUT] * stream_.bufferSize * + formatBytes( stream_.deviceFormat[INPUT] ); + else + bytes = stream_.nUserChannels[INPUT] * stream_.bufferSize * + formatBytes( stream_.userFormat ); + + if ( pa_simple_read( pah->s_rec, pulse_in, bytes, &pa_error ) < 0 ) { + errorStream_ << "RtApiPulse::callbackEvent: audio read error, " << + pa_strerror( pa_error ) << "."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + } + if ( stream_.doConvertBuffer[INPUT] ) { + convertBuffer( stream_.userBuffer[INPUT], + stream_.deviceBuffer, + stream_.convertInfo[INPUT] ); + } + } + + unlock: + MUTEX_UNLOCK( &stream_.mutex ); + RtApi::tickStreamTime(); + + if ( doStopStream == 1 ) + stopStream(); +} + +void RtApiPulse::startStream( void ) +{ + PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle ); + + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiPulse::startStream(): the stream is not open!"; + error( RtAudioError::INVALID_USE ); + return; + } + if ( stream_.state == STREAM_RUNNING ) { + errorText_ = "RtApiPulse::startStream(): the stream is already running!"; + error( RtAudioError::WARNING ); + return; + } + + MUTEX_LOCK( &stream_.mutex ); + + stream_.state = STREAM_RUNNING; + + pah->runnable = true; + pthread_cond_signal( &pah->runnable_cv ); + MUTEX_UNLOCK( &stream_.mutex ); +} + +void RtApiPulse::stopStream( void ) +{ + PulseAudioHandle *pah = static_cast<PulseAudioHandle *>( stream_.apiHandle ); + + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiPulse::stopStream(): the stream is not open!"; + error( RtAudioError::INVALID_USE ); + return; + } + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiPulse::stopStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + stream_.state = STREAM_STOPPED; + MUTEX_LOCK( &stream_.mutex ); + + if ( pah && pah->s_play ) { + int pa_error; + if ( pa_simple_drain( pah->s_play, &pa_error ) < 0 ) { + errorStream_ << "RtApiPulse::stopStream: error draining output device, " << + pa_strerror( pa_error ) << "."; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + } + + stream_.state = STREAM_STOPPED; + MUTEX_UNLOCK( &stream_.mutex ); +} + +void RtApiPulse::abortStream( void ) +{ + PulseAudioHandle *pah = static_cast<PulseAudioHandle*>( stream_.apiHandle ); + + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiPulse::abortStream(): the stream is not open!"; + error( RtAudioError::INVALID_USE ); + return; + } + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiPulse::abortStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + stream_.state = STREAM_STOPPED; + MUTEX_LOCK( &stream_.mutex ); + + if ( pah && pah->s_play ) { + int pa_error; + if ( pa_simple_flush( pah->s_play, &pa_error ) < 0 ) { + errorStream_ << "RtApiPulse::abortStream: error flushing output device, " << + pa_strerror( pa_error ) << "."; + errorText_ = errorStream_.str(); + MUTEX_UNLOCK( &stream_.mutex ); + error( RtAudioError::SYSTEM_ERROR ); + return; + } + } + + stream_.state = STREAM_STOPPED; + MUTEX_UNLOCK( &stream_.mutex ); +} + +bool RtApiPulse::probeDeviceOpen( unsigned int device, StreamMode mode, + unsigned int channels, unsigned int firstChannel, + unsigned int sampleRate, RtAudioFormat format, + unsigned int *bufferSize, RtAudio::StreamOptions *options ) +{ + PulseAudioHandle *pah = 0; + unsigned long bufferBytes = 0; + pa_sample_spec ss; + + if ( device != 0 ) return false; + if ( mode != INPUT && mode != OUTPUT ) return false; + if ( channels != 1 && channels != 2 ) { + errorText_ = "RtApiPulse::probeDeviceOpen: unsupported number of channels."; + return false; + } + ss.channels = channels; + + if ( firstChannel != 0 ) return false; + + bool sr_found = false; + for ( const unsigned int *sr = SUPPORTED_SAMPLERATES; *sr; ++sr ) { + if ( sampleRate == *sr ) { + sr_found = true; + stream_.sampleRate = sampleRate; + ss.rate = sampleRate; + break; + } + } + if ( !sr_found ) { + errorText_ = "RtApiPulse::probeDeviceOpen: unsupported sample rate."; + return false; + } + + bool sf_found = 0; + for ( const rtaudio_pa_format_mapping_t *sf = supported_sampleformats; + sf->rtaudio_format && sf->pa_format != PA_SAMPLE_INVALID; ++sf ) { + if ( format == sf->rtaudio_format ) { + sf_found = true; + stream_.userFormat = sf->rtaudio_format; + stream_.deviceFormat[mode] = stream_.userFormat; + ss.format = sf->pa_format; + break; + } + } + if ( !sf_found ) { // Use internal data format conversion. + stream_.userFormat = format; + stream_.deviceFormat[mode] = RTAUDIO_FLOAT32; + ss.format = PA_SAMPLE_FLOAT32LE; + } + + // Set other stream parameters. + if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) stream_.userInterleaved = false; + else stream_.userInterleaved = true; + stream_.deviceInterleaved[mode] = true; + stream_.nBuffers = 1; + stream_.doByteSwap[mode] = false; + stream_.nUserChannels[mode] = channels; + stream_.nDeviceChannels[mode] = channels + firstChannel; + stream_.channelOffset[mode] = 0; + std::string streamName = "RtAudio"; + + // Set flags for buffer conversion. + stream_.doConvertBuffer[mode] = false; + if ( stream_.userFormat != stream_.deviceFormat[mode] ) + stream_.doConvertBuffer[mode] = true; + if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] ) + stream_.doConvertBuffer[mode] = true; + + // Allocate necessary internal buffers. + bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat ); + stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 ); + if ( stream_.userBuffer[mode] == NULL ) { + errorText_ = "RtApiPulse::probeDeviceOpen: error allocating user buffer memory."; + goto error; + } + stream_.bufferSize = *bufferSize; + + if ( stream_.doConvertBuffer[mode] ) { + + bool makeBuffer = true; + bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] ); + if ( mode == INPUT ) { + if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) { + unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] ); + if ( bufferBytes <= bytesOut ) makeBuffer = false; + } + } + + if ( makeBuffer ) { + bufferBytes *= *bufferSize; + if ( stream_.deviceBuffer ) free( stream_.deviceBuffer ); + stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 ); + if ( stream_.deviceBuffer == NULL ) { + errorText_ = "RtApiPulse::probeDeviceOpen: error allocating device buffer memory."; + goto error; + } + } + } + + stream_.device[mode] = device; + + // Setup the buffer conversion information structure. + if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel ); + + if ( !stream_.apiHandle ) { + PulseAudioHandle *pah = new PulseAudioHandle; + if ( !pah ) { + errorText_ = "RtApiPulse::probeDeviceOpen: error allocating memory for handle."; + goto error; + } + + stream_.apiHandle = pah; + if ( pthread_cond_init( &pah->runnable_cv, NULL ) != 0 ) { + errorText_ = "RtApiPulse::probeDeviceOpen: error creating condition variable."; + goto error; + } + } + pah = static_cast<PulseAudioHandle *>( stream_.apiHandle ); + + int error; + if ( options && !options->streamName.empty() ) streamName = options->streamName; + switch ( mode ) { + case INPUT: + pa_buffer_attr buffer_attr; + buffer_attr.fragsize = bufferBytes; + buffer_attr.maxlength = -1; + + pah->s_rec = pa_simple_new( NULL, streamName.c_str(), PA_STREAM_RECORD, NULL, "Record", &ss, NULL, &buffer_attr, &error ); + if ( !pah->s_rec ) { + errorText_ = "RtApiPulse::probeDeviceOpen: error connecting input to PulseAudio server."; + goto error; + } + break; + case OUTPUT: + pah->s_play = pa_simple_new( NULL, "RtAudio", PA_STREAM_PLAYBACK, NULL, "Playback", &ss, NULL, NULL, &error ); + if ( !pah->s_play ) { + errorText_ = "RtApiPulse::probeDeviceOpen: error connecting output to PulseAudio server."; + goto error; + } + break; + default: + goto error; + } + + if ( stream_.mode == UNINITIALIZED ) + stream_.mode = mode; + else if ( stream_.mode == mode ) + goto error; + else + stream_.mode = DUPLEX; + + if ( !stream_.callbackInfo.isRunning ) { + stream_.callbackInfo.object = this; + stream_.callbackInfo.isRunning = true; + if ( pthread_create( &pah->thread, NULL, pulseaudio_callback, (void *)&stream_.callbackInfo) != 0 ) { + errorText_ = "RtApiPulse::probeDeviceOpen: error creating thread."; + goto error; + } + } + + stream_.state = STREAM_STOPPED; + return true; + + error: + if ( pah && stream_.callbackInfo.isRunning ) { + pthread_cond_destroy( &pah->runnable_cv ); + delete pah; + stream_.apiHandle = 0; + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + return FAILURE; +} + +//******************** End of __LINUX_PULSE__ *********************// +#endif + +#if defined(__LINUX_OSS__) + +#include <unistd.h> +#include <sys/ioctl.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/soundcard.h> +#include <errno.h> +#include <math.h> + +static void *ossCallbackHandler(void * ptr); + +// A structure to hold various information related to the OSS API +// implementation. +struct OssHandle { + int id[2]; // device ids + bool xrun[2]; + bool triggered; + pthread_cond_t runnable; + + OssHandle() + :triggered(false) { id[0] = 0; id[1] = 0; xrun[0] = false; xrun[1] = false; } +}; + +RtApiOss :: RtApiOss() +{ + // Nothing to do here. +} + +RtApiOss :: ~RtApiOss() +{ + if ( stream_.state != STREAM_CLOSED ) closeStream(); +} + +unsigned int RtApiOss :: getDeviceCount( void ) +{ + int mixerfd = open( "/dev/mixer", O_RDWR, 0 ); + if ( mixerfd == -1 ) { + errorText_ = "RtApiOss::getDeviceCount: error opening '/dev/mixer'."; + error( RtAudioError::WARNING ); + return 0; + } + + oss_sysinfo sysinfo; + if ( ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo ) == -1 ) { + close( mixerfd ); + errorText_ = "RtApiOss::getDeviceCount: error getting sysinfo, OSS version >= 4.0 is required."; + error( RtAudioError::WARNING ); + return 0; + } + + close( mixerfd ); + return sysinfo.numaudios; +} + +RtAudio::DeviceInfo RtApiOss :: getDeviceInfo( unsigned int device ) +{ + RtAudio::DeviceInfo info; + info.probed = false; + + int mixerfd = open( "/dev/mixer", O_RDWR, 0 ); + if ( mixerfd == -1 ) { + errorText_ = "RtApiOss::getDeviceInfo: error opening '/dev/mixer'."; + error( RtAudioError::WARNING ); + return info; + } + + oss_sysinfo sysinfo; + int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo ); + if ( result == -1 ) { + close( mixerfd ); + errorText_ = "RtApiOss::getDeviceInfo: error getting sysinfo, OSS version >= 4.0 is required."; + error( RtAudioError::WARNING ); + return info; + } + + unsigned nDevices = sysinfo.numaudios; + if ( nDevices == 0 ) { + close( mixerfd ); + errorText_ = "RtApiOss::getDeviceInfo: no devices found!"; + error( RtAudioError::INVALID_USE ); + return info; + } + + if ( device >= nDevices ) { + close( mixerfd ); + errorText_ = "RtApiOss::getDeviceInfo: device ID is invalid!"; + error( RtAudioError::INVALID_USE ); + return info; + } + + oss_audioinfo ainfo; + ainfo.dev = device; + result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo ); + close( mixerfd ); + if ( result == -1 ) { + errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Probe channels + if ( ainfo.caps & PCM_CAP_OUTPUT ) info.outputChannels = ainfo.max_channels; + if ( ainfo.caps & PCM_CAP_INPUT ) info.inputChannels = ainfo.max_channels; + if ( ainfo.caps & PCM_CAP_DUPLEX ) { + if ( info.outputChannels > 0 && info.inputChannels > 0 && ainfo.caps & PCM_CAP_DUPLEX ) + info.duplexChannels = (info.outputChannels > info.inputChannels) ? info.inputChannels : info.outputChannels; + } + + // Probe data formats ... do for input + unsigned long mask = ainfo.iformats; + if ( mask & AFMT_S16_LE || mask & AFMT_S16_BE ) + info.nativeFormats |= RTAUDIO_SINT16; + if ( mask & AFMT_S8 ) + info.nativeFormats |= RTAUDIO_SINT8; + if ( mask & AFMT_S32_LE || mask & AFMT_S32_BE ) + info.nativeFormats |= RTAUDIO_SINT32; + if ( mask & AFMT_FLOAT ) + info.nativeFormats |= RTAUDIO_FLOAT32; + if ( mask & AFMT_S24_LE || mask & AFMT_S24_BE ) + info.nativeFormats |= RTAUDIO_SINT24; + + // Check that we have at least one supported format + if ( info.nativeFormats == 0 ) { + errorStream_ << "RtApiOss::getDeviceInfo: device (" << ainfo.name << ") data format not supported by RtAudio."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + return info; + } + + // Probe the supported sample rates. + info.sampleRates.clear(); + if ( ainfo.nrates ) { + for ( unsigned int i=0; i<ainfo.nrates; i++ ) { + for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) { + if ( ainfo.rates[i] == SAMPLE_RATES[k] ) { + info.sampleRates.push_back( SAMPLE_RATES[k] ); + + if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) ) + info.preferredSampleRate = SAMPLE_RATES[k]; + + break; + } + } + } + } + else { + // Check min and max rate values; + for ( unsigned int k=0; k<MAX_SAMPLE_RATES; k++ ) { + if ( ainfo.min_rate <= (int) SAMPLE_RATES[k] && ainfo.max_rate >= (int) SAMPLE_RATES[k] ) { + info.sampleRates.push_back( SAMPLE_RATES[k] ); + + if ( !info.preferredSampleRate || ( SAMPLE_RATES[k] <= 48000 && SAMPLE_RATES[k] > info.preferredSampleRate ) ) + info.preferredSampleRate = SAMPLE_RATES[k]; + } + } + } + + if ( info.sampleRates.size() == 0 ) { + errorStream_ << "RtApiOss::getDeviceInfo: no supported sample rates found for device (" << ainfo.name << ")."; + errorText_ = errorStream_.str(); + error( RtAudioError::WARNING ); + } + else { + info.probed = true; + info.name = ainfo.name; + } + + return info; +} + + +bool RtApiOss :: probeDeviceOpen( unsigned int device, StreamMode mode, unsigned int channels, + unsigned int firstChannel, unsigned int sampleRate, + RtAudioFormat format, unsigned int *bufferSize, + RtAudio::StreamOptions *options ) +{ + int mixerfd = open( "/dev/mixer", O_RDWR, 0 ); + if ( mixerfd == -1 ) { + errorText_ = "RtApiOss::probeDeviceOpen: error opening '/dev/mixer'."; + return FAILURE; + } + + oss_sysinfo sysinfo; + int result = ioctl( mixerfd, SNDCTL_SYSINFO, &sysinfo ); + if ( result == -1 ) { + close( mixerfd ); + errorText_ = "RtApiOss::probeDeviceOpen: error getting sysinfo, OSS version >= 4.0 is required."; + return FAILURE; + } + + unsigned nDevices = sysinfo.numaudios; + if ( nDevices == 0 ) { + // This should not happen because a check is made before this function is called. + close( mixerfd ); + errorText_ = "RtApiOss::probeDeviceOpen: no devices found!"; + return FAILURE; + } + + if ( device >= nDevices ) { + // This should not happen because a check is made before this function is called. + close( mixerfd ); + errorText_ = "RtApiOss::probeDeviceOpen: device ID is invalid!"; + return FAILURE; + } + + oss_audioinfo ainfo; + ainfo.dev = device; + result = ioctl( mixerfd, SNDCTL_AUDIOINFO, &ainfo ); + close( mixerfd ); + if ( result == -1 ) { + errorStream_ << "RtApiOss::getDeviceInfo: error getting device (" << ainfo.name << ") info."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Check if device supports input or output + if ( ( mode == OUTPUT && !( ainfo.caps & PCM_CAP_OUTPUT ) ) || + ( mode == INPUT && !( ainfo.caps & PCM_CAP_INPUT ) ) ) { + if ( mode == OUTPUT ) + errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support output."; + else + errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support input."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + int flags = 0; + OssHandle *handle = (OssHandle *) stream_.apiHandle; + if ( mode == OUTPUT ) + flags |= O_WRONLY; + else { // mode == INPUT + if (stream_.mode == OUTPUT && stream_.device[0] == device) { + // We just set the same device for playback ... close and reopen for duplex (OSS only). + close( handle->id[0] ); + handle->id[0] = 0; + if ( !( ainfo.caps & PCM_CAP_DUPLEX ) ) { + errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support duplex mode."; + errorText_ = errorStream_.str(); + return FAILURE; + } + // Check that the number previously set channels is the same. + if ( stream_.nUserChannels[0] != channels ) { + errorStream_ << "RtApiOss::probeDeviceOpen: input/output channels must be equal for OSS duplex device (" << ainfo.name << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + flags |= O_RDWR; + } + else + flags |= O_RDONLY; + } + + // Set exclusive access if specified. + if ( options && options->flags & RTAUDIO_HOG_DEVICE ) flags |= O_EXCL; + + // Try to open the device. + int fd; + fd = open( ainfo.devnode, flags, 0 ); + if ( fd == -1 ) { + if ( errno == EBUSY ) + errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") is busy."; + else + errorStream_ << "RtApiOss::probeDeviceOpen: error opening device (" << ainfo.name << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // For duplex operation, specifically set this mode (this doesn't seem to work). + /* + if ( flags | O_RDWR ) { + result = ioctl( fd, SNDCTL_DSP_SETDUPLEX, NULL ); + if ( result == -1) { + errorStream_ << "RtApiOss::probeDeviceOpen: error setting duplex mode for device (" << ainfo.name << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + } + */ + + // Check the device channel support. + stream_.nUserChannels[mode] = channels; + if ( ainfo.max_channels < (int)(channels + firstChannel) ) { + close( fd ); + errorStream_ << "RtApiOss::probeDeviceOpen: the device (" << ainfo.name << ") does not support requested channel parameters."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Set the number of channels. + int deviceChannels = channels + firstChannel; + result = ioctl( fd, SNDCTL_DSP_CHANNELS, &deviceChannels ); + if ( result == -1 || deviceChannels < (int)(channels + firstChannel) ) { + close( fd ); + errorStream_ << "RtApiOss::probeDeviceOpen: error setting channel parameters on device (" << ainfo.name << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + stream_.nDeviceChannels[mode] = deviceChannels; + + // Get the data format mask + int mask; + result = ioctl( fd, SNDCTL_DSP_GETFMTS, &mask ); + if ( result == -1 ) { + close( fd ); + errorStream_ << "RtApiOss::probeDeviceOpen: error getting device (" << ainfo.name << ") data formats."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Determine how to set the device format. + stream_.userFormat = format; + int deviceFormat = -1; + stream_.doByteSwap[mode] = false; + if ( format == RTAUDIO_SINT8 ) { + if ( mask & AFMT_S8 ) { + deviceFormat = AFMT_S8; + stream_.deviceFormat[mode] = RTAUDIO_SINT8; + } + } + else if ( format == RTAUDIO_SINT16 ) { + if ( mask & AFMT_S16_NE ) { + deviceFormat = AFMT_S16_NE; + stream_.deviceFormat[mode] = RTAUDIO_SINT16; + } + else if ( mask & AFMT_S16_OE ) { + deviceFormat = AFMT_S16_OE; + stream_.deviceFormat[mode] = RTAUDIO_SINT16; + stream_.doByteSwap[mode] = true; + } + } + else if ( format == RTAUDIO_SINT24 ) { + if ( mask & AFMT_S24_NE ) { + deviceFormat = AFMT_S24_NE; + stream_.deviceFormat[mode] = RTAUDIO_SINT24; + } + else if ( mask & AFMT_S24_OE ) { + deviceFormat = AFMT_S24_OE; + stream_.deviceFormat[mode] = RTAUDIO_SINT24; + stream_.doByteSwap[mode] = true; + } + } + else if ( format == RTAUDIO_SINT32 ) { + if ( mask & AFMT_S32_NE ) { + deviceFormat = AFMT_S32_NE; + stream_.deviceFormat[mode] = RTAUDIO_SINT32; + } + else if ( mask & AFMT_S32_OE ) { + deviceFormat = AFMT_S32_OE; + stream_.deviceFormat[mode] = RTAUDIO_SINT32; + stream_.doByteSwap[mode] = true; + } + } + + if ( deviceFormat == -1 ) { + // The user requested format is not natively supported by the device. + if ( mask & AFMT_S16_NE ) { + deviceFormat = AFMT_S16_NE; + stream_.deviceFormat[mode] = RTAUDIO_SINT16; + } + else if ( mask & AFMT_S32_NE ) { + deviceFormat = AFMT_S32_NE; + stream_.deviceFormat[mode] = RTAUDIO_SINT32; + } + else if ( mask & AFMT_S24_NE ) { + deviceFormat = AFMT_S24_NE; + stream_.deviceFormat[mode] = RTAUDIO_SINT24; + } + else if ( mask & AFMT_S16_OE ) { + deviceFormat = AFMT_S16_OE; + stream_.deviceFormat[mode] = RTAUDIO_SINT16; + stream_.doByteSwap[mode] = true; + } + else if ( mask & AFMT_S32_OE ) { + deviceFormat = AFMT_S32_OE; + stream_.deviceFormat[mode] = RTAUDIO_SINT32; + stream_.doByteSwap[mode] = true; + } + else if ( mask & AFMT_S24_OE ) { + deviceFormat = AFMT_S24_OE; + stream_.deviceFormat[mode] = RTAUDIO_SINT24; + stream_.doByteSwap[mode] = true; + } + else if ( mask & AFMT_S8) { + deviceFormat = AFMT_S8; + stream_.deviceFormat[mode] = RTAUDIO_SINT8; + } + } + + if ( stream_.deviceFormat[mode] == 0 ) { + // This really shouldn't happen ... + close( fd ); + errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") data format not supported by RtAudio."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Set the data format. + int temp = deviceFormat; + result = ioctl( fd, SNDCTL_DSP_SETFMT, &deviceFormat ); + if ( result == -1 || deviceFormat != temp ) { + close( fd ); + errorStream_ << "RtApiOss::probeDeviceOpen: error setting data format on device (" << ainfo.name << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Attempt to set the buffer size. According to OSS, the minimum + // number of buffers is two. The supposed minimum buffer size is 16 + // bytes, so that will be our lower bound. The argument to this + // call is in the form 0xMMMMSSSS (hex), where the buffer size (in + // bytes) is given as 2^SSSS and the number of buffers as 2^MMMM. + // We'll check the actual value used near the end of the setup + // procedure. + int ossBufferBytes = *bufferSize * formatBytes( stream_.deviceFormat[mode] ) * deviceChannels; + if ( ossBufferBytes < 16 ) ossBufferBytes = 16; + int buffers = 0; + if ( options ) buffers = options->numberOfBuffers; + if ( options && options->flags & RTAUDIO_MINIMIZE_LATENCY ) buffers = 2; + if ( buffers < 2 ) buffers = 3; + temp = ((int) buffers << 16) + (int)( log10( (double)ossBufferBytes ) / log10( 2.0 ) ); + result = ioctl( fd, SNDCTL_DSP_SETFRAGMENT, &temp ); + if ( result == -1 ) { + close( fd ); + errorStream_ << "RtApiOss::probeDeviceOpen: error setting buffer size on device (" << ainfo.name << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + stream_.nBuffers = buffers; + + // Save buffer size (in sample frames). + *bufferSize = ossBufferBytes / ( formatBytes(stream_.deviceFormat[mode]) * deviceChannels ); + stream_.bufferSize = *bufferSize; + + // Set the sample rate. + int srate = sampleRate; + result = ioctl( fd, SNDCTL_DSP_SPEED, &srate ); + if ( result == -1 ) { + close( fd ); + errorStream_ << "RtApiOss::probeDeviceOpen: error setting sample rate (" << sampleRate << ") on device (" << ainfo.name << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + + // Verify the sample rate setup worked. + if ( abs( srate - sampleRate ) > 100 ) { + close( fd ); + errorStream_ << "RtApiOss::probeDeviceOpen: device (" << ainfo.name << ") does not support sample rate (" << sampleRate << ")."; + errorText_ = errorStream_.str(); + return FAILURE; + } + stream_.sampleRate = sampleRate; + + if ( mode == INPUT && stream_.mode == OUTPUT && stream_.device[0] == device) { + // We're doing duplex setup here. + stream_.deviceFormat[0] = stream_.deviceFormat[1]; + stream_.nDeviceChannels[0] = deviceChannels; + } + + // Set interleaving parameters. + stream_.userInterleaved = true; + stream_.deviceInterleaved[mode] = true; + if ( options && options->flags & RTAUDIO_NONINTERLEAVED ) + stream_.userInterleaved = false; + + // Set flags for buffer conversion + stream_.doConvertBuffer[mode] = false; + if ( stream_.userFormat != stream_.deviceFormat[mode] ) + stream_.doConvertBuffer[mode] = true; + if ( stream_.nUserChannels[mode] < stream_.nDeviceChannels[mode] ) + stream_.doConvertBuffer[mode] = true; + if ( stream_.userInterleaved != stream_.deviceInterleaved[mode] && + stream_.nUserChannels[mode] > 1 ) + stream_.doConvertBuffer[mode] = true; + + // Allocate the stream handles if necessary and then save. + if ( stream_.apiHandle == 0 ) { + try { + handle = new OssHandle; + } + catch ( std::bad_alloc& ) { + errorText_ = "RtApiOss::probeDeviceOpen: error allocating OssHandle memory."; + goto error; + } + + if ( pthread_cond_init( &handle->runnable, NULL ) ) { + errorText_ = "RtApiOss::probeDeviceOpen: error initializing pthread condition variable."; + goto error; + } + + stream_.apiHandle = (void *) handle; + } + else { + handle = (OssHandle *) stream_.apiHandle; + } + handle->id[mode] = fd; + + // Allocate necessary internal buffers. + unsigned long bufferBytes; + bufferBytes = stream_.nUserChannels[mode] * *bufferSize * formatBytes( stream_.userFormat ); + stream_.userBuffer[mode] = (char *) calloc( bufferBytes, 1 ); + if ( stream_.userBuffer[mode] == NULL ) { + errorText_ = "RtApiOss::probeDeviceOpen: error allocating user buffer memory."; + goto error; + } + + if ( stream_.doConvertBuffer[mode] ) { + + bool makeBuffer = true; + bufferBytes = stream_.nDeviceChannels[mode] * formatBytes( stream_.deviceFormat[mode] ); + if ( mode == INPUT ) { + if ( stream_.mode == OUTPUT && stream_.deviceBuffer ) { + unsigned long bytesOut = stream_.nDeviceChannels[0] * formatBytes( stream_.deviceFormat[0] ); + if ( bufferBytes <= bytesOut ) makeBuffer = false; + } + } + + if ( makeBuffer ) { + bufferBytes *= *bufferSize; + if ( stream_.deviceBuffer ) free( stream_.deviceBuffer ); + stream_.deviceBuffer = (char *) calloc( bufferBytes, 1 ); + if ( stream_.deviceBuffer == NULL ) { + errorText_ = "RtApiOss::probeDeviceOpen: error allocating device buffer memory."; + goto error; + } + } + } + + stream_.device[mode] = device; + stream_.state = STREAM_STOPPED; + + // Setup the buffer conversion information structure. + if ( stream_.doConvertBuffer[mode] ) setConvertInfo( mode, firstChannel ); + + // Setup thread if necessary. + if ( stream_.mode == OUTPUT && mode == INPUT ) { + // We had already set up an output stream. + stream_.mode = DUPLEX; + if ( stream_.device[0] == device ) handle->id[0] = fd; + } + else { + stream_.mode = mode; + + // Setup callback thread. + stream_.callbackInfo.object = (void *) this; + + // Set the thread attributes for joinable and realtime scheduling + // priority. The higher priority will only take affect if the + // program is run as root or suid. + pthread_attr_t attr; + pthread_attr_init( &attr ); + pthread_attr_setdetachstate( &attr, PTHREAD_CREATE_JOINABLE ); +#ifdef SCHED_RR // Undefined with some OSes (eg: NetBSD 1.6.x with GNU Pthread) + if ( options && options->flags & RTAUDIO_SCHEDULE_REALTIME ) { + struct sched_param param; + int priority = options->priority; + int min = sched_get_priority_min( SCHED_RR ); + int max = sched_get_priority_max( SCHED_RR ); + if ( priority < min ) priority = min; + else if ( priority > max ) priority = max; + param.sched_priority = priority; + pthread_attr_setschedparam( &attr, ¶m ); + pthread_attr_setschedpolicy( &attr, SCHED_RR ); + } + else + pthread_attr_setschedpolicy( &attr, SCHED_OTHER ); +#else + pthread_attr_setschedpolicy( &attr, SCHED_OTHER ); +#endif + + stream_.callbackInfo.isRunning = true; + result = pthread_create( &stream_.callbackInfo.thread, &attr, ossCallbackHandler, &stream_.callbackInfo ); + pthread_attr_destroy( &attr ); + if ( result ) { + stream_.callbackInfo.isRunning = false; + errorText_ = "RtApiOss::error creating callback thread!"; + goto error; + } + } + + return SUCCESS; + + error: + if ( handle ) { + pthread_cond_destroy( &handle->runnable ); + if ( handle->id[0] ) close( handle->id[0] ); + if ( handle->id[1] ) close( handle->id[1] ); + delete handle; + stream_.apiHandle = 0; + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + return FAILURE; +} + +void RtApiOss :: closeStream() +{ + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiOss::closeStream(): no open stream to close!"; + error( RtAudioError::WARNING ); + return; + } + + OssHandle *handle = (OssHandle *) stream_.apiHandle; + stream_.callbackInfo.isRunning = false; + MUTEX_LOCK( &stream_.mutex ); + if ( stream_.state == STREAM_STOPPED ) + pthread_cond_signal( &handle->runnable ); + MUTEX_UNLOCK( &stream_.mutex ); + pthread_join( stream_.callbackInfo.thread, NULL ); + + if ( stream_.state == STREAM_RUNNING ) { + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) + ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 ); + else + ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 ); + stream_.state = STREAM_STOPPED; + } + + if ( handle ) { + pthread_cond_destroy( &handle->runnable ); + if ( handle->id[0] ) close( handle->id[0] ); + if ( handle->id[1] ) close( handle->id[1] ); + delete handle; + stream_.apiHandle = 0; + } + + for ( int i=0; i<2; i++ ) { + if ( stream_.userBuffer[i] ) { + free( stream_.userBuffer[i] ); + stream_.userBuffer[i] = 0; + } + } + + if ( stream_.deviceBuffer ) { + free( stream_.deviceBuffer ); + stream_.deviceBuffer = 0; + } + + stream_.mode = UNINITIALIZED; + stream_.state = STREAM_CLOSED; +} + +void RtApiOss :: startStream() +{ + verifyStream(); + if ( stream_.state == STREAM_RUNNING ) { + errorText_ = "RtApiOss::startStream(): the stream is already running!"; + error( RtAudioError::WARNING ); + return; + } + + MUTEX_LOCK( &stream_.mutex ); + + stream_.state = STREAM_RUNNING; + + // No need to do anything else here ... OSS automatically starts + // when fed samples. + + MUTEX_UNLOCK( &stream_.mutex ); + + OssHandle *handle = (OssHandle *) stream_.apiHandle; + pthread_cond_signal( &handle->runnable ); +} + +void RtApiOss :: stopStream() +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiOss::stopStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + MUTEX_LOCK( &stream_.mutex ); + + // The state might change while waiting on a mutex. + if ( stream_.state == STREAM_STOPPED ) { + MUTEX_UNLOCK( &stream_.mutex ); + return; + } + + int result = 0; + OssHandle *handle = (OssHandle *) stream_.apiHandle; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + + // Flush the output with zeros a few times. + char *buffer; + int samples; + RtAudioFormat format; + + if ( stream_.doConvertBuffer[0] ) { + buffer = stream_.deviceBuffer; + samples = stream_.bufferSize * stream_.nDeviceChannels[0]; + format = stream_.deviceFormat[0]; + } + else { + buffer = stream_.userBuffer[0]; + samples = stream_.bufferSize * stream_.nUserChannels[0]; + format = stream_.userFormat; + } + + memset( buffer, 0, samples * formatBytes(format) ); + for ( unsigned int i=0; i<stream_.nBuffers+1; i++ ) { + result = write( handle->id[0], buffer, samples * formatBytes(format) ); + if ( result == -1 ) { + errorText_ = "RtApiOss::stopStream: audio write error."; + error( RtAudioError::WARNING ); + } + } + + result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 ); + if ( result == -1 ) { + errorStream_ << "RtApiOss::stopStream: system error stopping callback procedure on device (" << stream_.device[0] << ")."; + errorText_ = errorStream_.str(); + goto unlock; + } + handle->triggered = false; + } + + if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) { + result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 ); + if ( result == -1 ) { + errorStream_ << "RtApiOss::stopStream: system error stopping input callback procedure on device (" << stream_.device[0] << ")."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + unlock: + stream_.state = STREAM_STOPPED; + MUTEX_UNLOCK( &stream_.mutex ); + + if ( result != -1 ) return; + error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiOss :: abortStream() +{ + verifyStream(); + if ( stream_.state == STREAM_STOPPED ) { + errorText_ = "RtApiOss::abortStream(): the stream is already stopped!"; + error( RtAudioError::WARNING ); + return; + } + + MUTEX_LOCK( &stream_.mutex ); + + // The state might change while waiting on a mutex. + if ( stream_.state == STREAM_STOPPED ) { + MUTEX_UNLOCK( &stream_.mutex ); + return; + } + + int result = 0; + OssHandle *handle = (OssHandle *) stream_.apiHandle; + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + result = ioctl( handle->id[0], SNDCTL_DSP_HALT, 0 ); + if ( result == -1 ) { + errorStream_ << "RtApiOss::abortStream: system error stopping callback procedure on device (" << stream_.device[0] << ")."; + errorText_ = errorStream_.str(); + goto unlock; + } + handle->triggered = false; + } + + if ( stream_.mode == INPUT || ( stream_.mode == DUPLEX && handle->id[0] != handle->id[1] ) ) { + result = ioctl( handle->id[1], SNDCTL_DSP_HALT, 0 ); + if ( result == -1 ) { + errorStream_ << "RtApiOss::abortStream: system error stopping input callback procedure on device (" << stream_.device[0] << ")."; + errorText_ = errorStream_.str(); + goto unlock; + } + } + + unlock: + stream_.state = STREAM_STOPPED; + MUTEX_UNLOCK( &stream_.mutex ); + + if ( result != -1 ) return; + error( RtAudioError::SYSTEM_ERROR ); +} + +void RtApiOss :: callbackEvent() +{ + OssHandle *handle = (OssHandle *) stream_.apiHandle; + if ( stream_.state == STREAM_STOPPED ) { + MUTEX_LOCK( &stream_.mutex ); + pthread_cond_wait( &handle->runnable, &stream_.mutex ); + if ( stream_.state != STREAM_RUNNING ) { + MUTEX_UNLOCK( &stream_.mutex ); + return; + } + MUTEX_UNLOCK( &stream_.mutex ); + } + + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApiOss::callbackEvent(): the stream is closed ... this shouldn't happen!"; + error( RtAudioError::WARNING ); + return; + } + + // Invoke user callback to get fresh output data. + int doStopStream = 0; + RtAudioCallback callback = (RtAudioCallback) stream_.callbackInfo.callback; + double streamTime = getStreamTime(); + RtAudioStreamStatus status = 0; + if ( stream_.mode != INPUT && handle->xrun[0] == true ) { + status |= RTAUDIO_OUTPUT_UNDERFLOW; + handle->xrun[0] = false; + } + if ( stream_.mode != OUTPUT && handle->xrun[1] == true ) { + status |= RTAUDIO_INPUT_OVERFLOW; + handle->xrun[1] = false; + } + doStopStream = callback( stream_.userBuffer[0], stream_.userBuffer[1], + stream_.bufferSize, streamTime, status, stream_.callbackInfo.userData ); + if ( doStopStream == 2 ) { + this->abortStream(); + return; + } + + MUTEX_LOCK( &stream_.mutex ); + + // The state might change while waiting on a mutex. + if ( stream_.state == STREAM_STOPPED ) goto unlock; + + int result; + char *buffer; + int samples; + RtAudioFormat format; + + if ( stream_.mode == OUTPUT || stream_.mode == DUPLEX ) { + + // Setup parameters and do buffer conversion if necessary. + if ( stream_.doConvertBuffer[0] ) { + buffer = stream_.deviceBuffer; + convertBuffer( buffer, stream_.userBuffer[0], stream_.convertInfo[0] ); + samples = stream_.bufferSize * stream_.nDeviceChannels[0]; + format = stream_.deviceFormat[0]; + } + else { + buffer = stream_.userBuffer[0]; + samples = stream_.bufferSize * stream_.nUserChannels[0]; + format = stream_.userFormat; + } + + // Do byte swapping if necessary. + if ( stream_.doByteSwap[0] ) + byteSwapBuffer( buffer, samples, format ); + + if ( stream_.mode == DUPLEX && handle->triggered == false ) { + int trig = 0; + ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig ); + result = write( handle->id[0], buffer, samples * formatBytes(format) ); + trig = PCM_ENABLE_INPUT|PCM_ENABLE_OUTPUT; + ioctl( handle->id[0], SNDCTL_DSP_SETTRIGGER, &trig ); + handle->triggered = true; + } + else + // Write samples to device. + result = write( handle->id[0], buffer, samples * formatBytes(format) ); + + if ( result == -1 ) { + // We'll assume this is an underrun, though there isn't a + // specific means for determining that. + handle->xrun[0] = true; + errorText_ = "RtApiOss::callbackEvent: audio write error."; + error( RtAudioError::WARNING ); + // Continue on to input section. + } + } + + if ( stream_.mode == INPUT || stream_.mode == DUPLEX ) { + + // Setup parameters. + if ( stream_.doConvertBuffer[1] ) { + buffer = stream_.deviceBuffer; + samples = stream_.bufferSize * stream_.nDeviceChannels[1]; + format = stream_.deviceFormat[1]; + } + else { + buffer = stream_.userBuffer[1]; + samples = stream_.bufferSize * stream_.nUserChannels[1]; + format = stream_.userFormat; + } + + // Read samples from device. + result = read( handle->id[1], buffer, samples * formatBytes(format) ); + + if ( result == -1 ) { + // We'll assume this is an overrun, though there isn't a + // specific means for determining that. + handle->xrun[1] = true; + errorText_ = "RtApiOss::callbackEvent: audio read error."; + error( RtAudioError::WARNING ); + goto unlock; + } + + // Do byte swapping if necessary. + if ( stream_.doByteSwap[1] ) + byteSwapBuffer( buffer, samples, format ); + + // Do buffer conversion if necessary. + if ( stream_.doConvertBuffer[1] ) + convertBuffer( stream_.userBuffer[1], stream_.deviceBuffer, stream_.convertInfo[1] ); + } + + unlock: + MUTEX_UNLOCK( &stream_.mutex ); + + RtApi::tickStreamTime(); + if ( doStopStream == 1 ) this->stopStream(); +} + +static void *ossCallbackHandler( void *ptr ) +{ + CallbackInfo *info = (CallbackInfo *) ptr; + RtApiOss *object = (RtApiOss *) info->object; + bool *isRunning = &info->isRunning; + + while ( *isRunning == true ) { + pthread_testcancel(); + object->callbackEvent(); + } + + pthread_exit( NULL ); +} + +//******************** End of __LINUX_OSS__ *********************// +#endif + + +// *************************************************** // +// +// Protected common (OS-independent) RtAudio methods. +// +// *************************************************** // + +// This method can be modified to control the behavior of error +// message printing. +void RtApi :: error( RtAudioError::Type type ) +{ + errorStream_.str(""); // clear the ostringstream + + RtAudioErrorCallback errorCallback = (RtAudioErrorCallback) stream_.callbackInfo.errorCallback; + if ( errorCallback ) { + // abortStream() can generate new error messages. Ignore them. Just keep original one. + + if ( firstErrorOccurred_ ) + return; + + firstErrorOccurred_ = true; + const std::string errorMessage = errorText_; + + if ( type != RtAudioError::WARNING && stream_.state != STREAM_STOPPED) { + stream_.callbackInfo.isRunning = false; // exit from the thread + abortStream(); + } + + errorCallback( type, errorMessage ); + firstErrorOccurred_ = false; + return; + } + + if ( type == RtAudioError::WARNING && showWarnings_ == true ) + std::cerr << '\n' << errorText_ << "\n\n"; + else if ( type != RtAudioError::WARNING ) + throw( RtAudioError( errorText_, type ) ); +} + +void RtApi :: verifyStream() +{ + if ( stream_.state == STREAM_CLOSED ) { + errorText_ = "RtApi:: a stream is not open!"; + error( RtAudioError::INVALID_USE ); + } +} + +void RtApi :: clearStreamInfo() +{ + stream_.mode = UNINITIALIZED; + stream_.state = STREAM_CLOSED; + stream_.sampleRate = 0; + stream_.bufferSize = 0; + stream_.nBuffers = 0; + stream_.userFormat = 0; + stream_.userInterleaved = true; + stream_.streamTime = 0.0; + stream_.apiHandle = 0; + stream_.deviceBuffer = 0; + stream_.callbackInfo.callback = 0; + stream_.callbackInfo.userData = 0; + stream_.callbackInfo.isRunning = false; + stream_.callbackInfo.errorCallback = 0; + for ( int i=0; i<2; i++ ) { + stream_.device[i] = 11111; + stream_.doConvertBuffer[i] = false; + stream_.deviceInterleaved[i] = true; + stream_.doByteSwap[i] = false; + stream_.nUserChannels[i] = 0; + stream_.nDeviceChannels[i] = 0; + stream_.channelOffset[i] = 0; + stream_.deviceFormat[i] = 0; + stream_.latency[i] = 0; + stream_.userBuffer[i] = 0; + stream_.convertInfo[i].channels = 0; + stream_.convertInfo[i].inJump = 0; + stream_.convertInfo[i].outJump = 0; + stream_.convertInfo[i].inFormat = 0; + stream_.convertInfo[i].outFormat = 0; + stream_.convertInfo[i].inOffset.clear(); + stream_.convertInfo[i].outOffset.clear(); + } +} + +unsigned int RtApi :: formatBytes( RtAudioFormat format ) +{ + if ( format == RTAUDIO_SINT16 ) + return 2; + else if ( format == RTAUDIO_SINT32 || format == RTAUDIO_FLOAT32 ) + return 4; + else if ( format == RTAUDIO_FLOAT64 ) + return 8; + else if ( format == RTAUDIO_SINT24 ) + return 3; + else if ( format == RTAUDIO_SINT8 ) + return 1; + + errorText_ = "RtApi::formatBytes: undefined format."; + error( RtAudioError::WARNING ); + + return 0; +} + +void RtApi :: setConvertInfo( StreamMode mode, unsigned int firstChannel ) +{ + if ( mode == INPUT ) { // convert device to user buffer + stream_.convertInfo[mode].inJump = stream_.nDeviceChannels[1]; + stream_.convertInfo[mode].outJump = stream_.nUserChannels[1]; + stream_.convertInfo[mode].inFormat = stream_.deviceFormat[1]; + stream_.convertInfo[mode].outFormat = stream_.userFormat; + } + else { // convert user to device buffer + stream_.convertInfo[mode].inJump = stream_.nUserChannels[0]; + stream_.convertInfo[mode].outJump = stream_.nDeviceChannels[0]; + stream_.convertInfo[mode].inFormat = stream_.userFormat; + stream_.convertInfo[mode].outFormat = stream_.deviceFormat[0]; + } + + if ( stream_.convertInfo[mode].inJump < stream_.convertInfo[mode].outJump ) + stream_.convertInfo[mode].channels = stream_.convertInfo[mode].inJump; + else + stream_.convertInfo[mode].channels = stream_.convertInfo[mode].outJump; + + // Set up the interleave/deinterleave offsets. + if ( stream_.deviceInterleaved[mode] != stream_.userInterleaved ) { + if ( ( mode == OUTPUT && stream_.deviceInterleaved[mode] ) || + ( mode == INPUT && stream_.userInterleaved ) ) { + for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) { + stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize ); + stream_.convertInfo[mode].outOffset.push_back( k ); + stream_.convertInfo[mode].inJump = 1; + } + } + else { + for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) { + stream_.convertInfo[mode].inOffset.push_back( k ); + stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize ); + stream_.convertInfo[mode].outJump = 1; + } + } + } + else { // no (de)interleaving + if ( stream_.userInterleaved ) { + for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) { + stream_.convertInfo[mode].inOffset.push_back( k ); + stream_.convertInfo[mode].outOffset.push_back( k ); + } + } + else { + for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) { + stream_.convertInfo[mode].inOffset.push_back( k * stream_.bufferSize ); + stream_.convertInfo[mode].outOffset.push_back( k * stream_.bufferSize ); + stream_.convertInfo[mode].inJump = 1; + stream_.convertInfo[mode].outJump = 1; + } + } + } + + // Add channel offset. + if ( firstChannel > 0 ) { + if ( stream_.deviceInterleaved[mode] ) { + if ( mode == OUTPUT ) { + for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) + stream_.convertInfo[mode].outOffset[k] += firstChannel; + } + else { + for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) + stream_.convertInfo[mode].inOffset[k] += firstChannel; + } + } + else { + if ( mode == OUTPUT ) { + for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) + stream_.convertInfo[mode].outOffset[k] += ( firstChannel * stream_.bufferSize ); + } + else { + for ( int k=0; k<stream_.convertInfo[mode].channels; k++ ) + stream_.convertInfo[mode].inOffset[k] += ( firstChannel * stream_.bufferSize ); + } + } + } +} + +void RtApi :: convertBuffer( char *outBuffer, char *inBuffer, ConvertInfo &info ) +{ + // This function does format conversion, input/output channel compensation, and + // data interleaving/deinterleaving. 24-bit integers are assumed to occupy + // the lower three bytes of a 32-bit integer. + + // Clear our device buffer when in/out duplex device channels are different + if ( outBuffer == stream_.deviceBuffer && stream_.mode == DUPLEX && + ( stream_.nDeviceChannels[0] < stream_.nDeviceChannels[1] ) ) + memset( outBuffer, 0, stream_.bufferSize * info.outJump * formatBytes( info.outFormat ) ); + + int j; + if (info.outFormat == RTAUDIO_FLOAT64) { + Float64 scale; + Float64 *out = (Float64 *)outBuffer; + + if (info.inFormat == RTAUDIO_SINT8) { + signed char *in = (signed char *)inBuffer; + scale = 1.0 / 127.5; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Float64) in[info.inOffset[j]]; + out[info.outOffset[j]] += 0.5; + out[info.outOffset[j]] *= scale; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT16) { + Int16 *in = (Int16 *)inBuffer; + scale = 1.0 / 32767.5; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Float64) in[info.inOffset[j]]; + out[info.outOffset[j]] += 0.5; + out[info.outOffset[j]] *= scale; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT24) { + Int24 *in = (Int24 *)inBuffer; + scale = 1.0 / 8388607.5; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Float64) (in[info.inOffset[j]].asInt()); + out[info.outOffset[j]] += 0.5; + out[info.outOffset[j]] *= scale; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT32) { + Int32 *in = (Int32 *)inBuffer; + scale = 1.0 / 2147483647.5; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Float64) in[info.inOffset[j]]; + out[info.outOffset[j]] += 0.5; + out[info.outOffset[j]] *= scale; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT32) { + Float32 *in = (Float32 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Float64) in[info.inOffset[j]]; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT64) { + // Channel compensation and/or (de)interleaving only. + Float64 *in = (Float64 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = in[info.inOffset[j]]; + } + in += info.inJump; + out += info.outJump; + } + } + } + else if (info.outFormat == RTAUDIO_FLOAT32) { + Float32 scale; + Float32 *out = (Float32 *)outBuffer; + + if (info.inFormat == RTAUDIO_SINT8) { + signed char *in = (signed char *)inBuffer; + scale = (Float32) ( 1.0 / 127.5 ); + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Float32) in[info.inOffset[j]]; + out[info.outOffset[j]] += 0.5; + out[info.outOffset[j]] *= scale; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT16) { + Int16 *in = (Int16 *)inBuffer; + scale = (Float32) ( 1.0 / 32767.5 ); + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Float32) in[info.inOffset[j]]; + out[info.outOffset[j]] += 0.5; + out[info.outOffset[j]] *= scale; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT24) { + Int24 *in = (Int24 *)inBuffer; + scale = (Float32) ( 1.0 / 8388607.5 ); + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Float32) (in[info.inOffset[j]].asInt()); + out[info.outOffset[j]] += 0.5; + out[info.outOffset[j]] *= scale; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT32) { + Int32 *in = (Int32 *)inBuffer; + scale = (Float32) ( 1.0 / 2147483647.5 ); + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Float32) in[info.inOffset[j]]; + out[info.outOffset[j]] += 0.5; + out[info.outOffset[j]] *= scale; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT32) { + // Channel compensation and/or (de)interleaving only. + Float32 *in = (Float32 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = in[info.inOffset[j]]; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT64) { + Float64 *in = (Float64 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Float32) in[info.inOffset[j]]; + } + in += info.inJump; + out += info.outJump; + } + } + } + else if (info.outFormat == RTAUDIO_SINT32) { + Int32 *out = (Int32 *)outBuffer; + if (info.inFormat == RTAUDIO_SINT8) { + signed char *in = (signed char *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int32) in[info.inOffset[j]]; + out[info.outOffset[j]] <<= 24; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT16) { + Int16 *in = (Int16 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int32) in[info.inOffset[j]]; + out[info.outOffset[j]] <<= 16; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT24) { + Int24 *in = (Int24 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int32) in[info.inOffset[j]].asInt(); + out[info.outOffset[j]] <<= 8; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT32) { + // Channel compensation and/or (de)interleaving only. + Int32 *in = (Int32 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = in[info.inOffset[j]]; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT32) { + Float32 *in = (Float32 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5); + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT64) { + Float64 *in = (Float64 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 2147483647.5 - 0.5); + } + in += info.inJump; + out += info.outJump; + } + } + } + else if (info.outFormat == RTAUDIO_SINT24) { + Int24 *out = (Int24 *)outBuffer; + if (info.inFormat == RTAUDIO_SINT8) { + signed char *in = (signed char *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 16); + //out[info.outOffset[j]] <<= 16; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT16) { + Int16 *in = (Int16 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] << 8); + //out[info.outOffset[j]] <<= 8; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT24) { + // Channel compensation and/or (de)interleaving only. + Int24 *in = (Int24 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = in[info.inOffset[j]]; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT32) { + Int32 *in = (Int32 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] >> 8); + //out[info.outOffset[j]] >>= 8; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT32) { + Float32 *in = (Float32 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5); + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT64) { + Float64 *in = (Float64 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int32) (in[info.inOffset[j]] * 8388607.5 - 0.5); + } + in += info.inJump; + out += info.outJump; + } + } + } + else if (info.outFormat == RTAUDIO_SINT16) { + Int16 *out = (Int16 *)outBuffer; + if (info.inFormat == RTAUDIO_SINT8) { + signed char *in = (signed char *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int16) in[info.inOffset[j]]; + out[info.outOffset[j]] <<= 8; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT16) { + // Channel compensation and/or (de)interleaving only. + Int16 *in = (Int16 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = in[info.inOffset[j]]; + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT24) { + Int24 *in = (Int24 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]].asInt() >> 8); + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT32) { + Int32 *in = (Int32 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int16) ((in[info.inOffset[j]] >> 16) & 0x0000ffff); + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT32) { + Float32 *in = (Float32 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5); + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT64) { + Float64 *in = (Float64 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (Int16) (in[info.inOffset[j]] * 32767.5 - 0.5); + } + in += info.inJump; + out += info.outJump; + } + } + } + else if (info.outFormat == RTAUDIO_SINT8) { + signed char *out = (signed char *)outBuffer; + if (info.inFormat == RTAUDIO_SINT8) { + // Channel compensation and/or (de)interleaving only. + signed char *in = (signed char *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = in[info.inOffset[j]]; + } + in += info.inJump; + out += info.outJump; + } + } + if (info.inFormat == RTAUDIO_SINT16) { + Int16 *in = (Int16 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 8) & 0x00ff); + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT24) { + Int24 *in = (Int24 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]].asInt() >> 16); + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_SINT32) { + Int32 *in = (Int32 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (signed char) ((in[info.inOffset[j]] >> 24) & 0x000000ff); + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT32) { + Float32 *in = (Float32 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5); + } + in += info.inJump; + out += info.outJump; + } + } + else if (info.inFormat == RTAUDIO_FLOAT64) { + Float64 *in = (Float64 *)inBuffer; + for (unsigned int i=0; i<stream_.bufferSize; i++) { + for (j=0; j<info.channels; j++) { + out[info.outOffset[j]] = (signed char) (in[info.inOffset[j]] * 127.5 - 0.5); + } + in += info.inJump; + out += info.outJump; + } + } + } +} + +//static inline uint16_t bswap_16(uint16_t x) { return (x>>8) | (x<<8); } +//static inline uint32_t bswap_32(uint32_t x) { return (bswap_16(x&0xffff)<<16) | (bswap_16(x>>16)); } +//static inline uint64_t bswap_64(uint64_t x) { return (((unsigned long long)bswap_32(x&0xffffffffull))<<32) | (bswap_32(x>>32)); } + +void RtApi :: byteSwapBuffer( char *buffer, unsigned int samples, RtAudioFormat format ) +{ + register char val; + register char *ptr; + + ptr = buffer; + if ( format == RTAUDIO_SINT16 ) { + for ( unsigned int i=0; i<samples; i++ ) { + // Swap 1st and 2nd bytes. + val = *(ptr); + *(ptr) = *(ptr+1); + *(ptr+1) = val; + + // Increment 2 bytes. + ptr += 2; + } + } + else if ( format == RTAUDIO_SINT32 || + format == RTAUDIO_FLOAT32 ) { + for ( unsigned int i=0; i<samples; i++ ) { + // Swap 1st and 4th bytes. + val = *(ptr); + *(ptr) = *(ptr+3); + *(ptr+3) = val; + + // Swap 2nd and 3rd bytes. + ptr += 1; + val = *(ptr); + *(ptr) = *(ptr+1); + *(ptr+1) = val; + + // Increment 3 more bytes. + ptr += 3; + } + } + else if ( format == RTAUDIO_SINT24 ) { + for ( unsigned int i=0; i<samples; i++ ) { + // Swap 1st and 3rd bytes. + val = *(ptr); + *(ptr) = *(ptr+2); + *(ptr+2) = val; + + // Increment 2 more bytes. + ptr += 2; + } + } + else if ( format == RTAUDIO_FLOAT64 ) { + for ( unsigned int i=0; i<samples; i++ ) { + // Swap 1st and 8th bytes + val = *(ptr); + *(ptr) = *(ptr+7); + *(ptr+7) = val; + + // Swap 2nd and 7th bytes + ptr += 1; + val = *(ptr); + *(ptr) = *(ptr+5); + *(ptr+5) = val; + + // Swap 3rd and 6th bytes + ptr += 1; + val = *(ptr); + *(ptr) = *(ptr+3); + *(ptr+3) = val; + + // Swap 4th and 5th bytes + ptr += 1; + val = *(ptr); + *(ptr) = *(ptr+1); + *(ptr+1) = val; + + // Increment 5 more bytes. + ptr += 5; + } + } +} + + // Indentation settings for Vim and Emacs + // + // Local Variables: + // c-basic-offset: 2 + // indent-tabs-mode: nil + // End: + // + // vim: et sts=2 sw=2 + +#endif diff --git a/drivers/speex/audio_stream_speex.cpp b/drivers/speex/audio_stream_speex.cpp index 2cffb17049..1bb4952cc8 100644 --- a/drivers/speex/audio_stream_speex.cpp +++ b/drivers/speex/audio_stream_speex.cpp @@ -15,14 +15,15 @@ static _FORCE_INLINE_ uint16_t le_short(uint16_t s) } -void AudioStreamSpeex::update() { +int AudioStreamPlaybackSpeex::mix(int16_t* p_buffer,int p_frames) { + + - _THREAD_SAFE_METHOD_; //printf("update, loops %i, read ofs %i\n", (int)loops, read_ofs); //printf("playing %i, paused %i\n", (int)playing, (int)paused); - if (!active || !playing || paused || !data.size()) - return; + if (!active || !playing || !data.size()) + return 0; /* if (read_ofs >= data.size()) { @@ -35,12 +36,13 @@ void AudioStreamSpeex::update() { }; */ - int todo = get_todo(); + int todo = p_frames; if (todo < page_size) { - return; + return 0; }; - int eos = 0; + int eos = 0; + bool reloaded=false; while (todo > page_size) { @@ -92,7 +94,7 @@ void AudioStreamSpeex::update() { for (int j=0;j!=nframes;j++) { - int16_t* out = get_write_buffer(); + int16_t* out = p_buffer; int ret; /*Decode frame*/ @@ -120,7 +122,7 @@ void AudioStreamSpeex::update() { /*Convert to short and save to output file*/ - for (int i=0;i<frame_size*get_channel_count();i++) { + for (int i=0;i<frame_size*stream_channels;i++) { out[i]=le_short(out[i]); } @@ -149,7 +151,7 @@ void AudioStreamSpeex::update() { } - write(new_frame_size); + p_buffer+=new_frame_size*stream_channels; todo-=new_frame_size; } } @@ -175,6 +177,7 @@ void AudioStreamSpeex::update() { if (loops) { reload(); ++loop_count; + //break; } else { playing=false; unload(); @@ -183,18 +186,22 @@ void AudioStreamSpeex::update() { } }; }; + + return p_frames-todo; }; -void AudioStreamSpeex::unload() { +void AudioStreamPlaybackSpeex::unload() { + - _THREAD_SAFE_METHOD_ if (!active) return; speex_bits_destroy(&bits); if (st) speex_decoder_destroy(st); + + ogg_sync_clear(&oy); active = false; //data.resize(0); st = NULL; @@ -204,7 +211,7 @@ void AudioStreamSpeex::unload() { loop_count = 0; } -void *AudioStreamSpeex::process_header(ogg_packet *op, int *frame_size, int *rate, int *nframes, int *channels, int *extra_headers) { +void *AudioStreamPlaybackSpeex::process_header(ogg_packet *op, int *frame_size, int *rate, int *nframes, int *channels, int *extra_headers) { void *st; SpeexHeader *header; @@ -276,9 +283,9 @@ void *AudioStreamSpeex::process_header(ogg_packet *op, int *frame_size, int *rat -void AudioStreamSpeex::reload() { +void AudioStreamPlaybackSpeex::reload() { + - _THREAD_SAFE_METHOD_ if (active) unload(); @@ -359,8 +366,10 @@ void AudioStreamSpeex::reload() { }; page_size = nframes * frame_size; + stream_srate=rate; + stream_channels=channels; + stream_minbuff_size=page_size; - _setup(channels, rate,page_size); } else if (packet_count==1) { @@ -374,23 +383,23 @@ void AudioStreamSpeex::reload() { } while (packet_count <= extra_headers); - active = true; + active=true; } -void AudioStreamSpeex::_bind_methods() { +void AudioStreamPlaybackSpeex::_bind_methods() { - ObjectTypeDB::bind_method(_MD("set_file","file"),&AudioStreamSpeex::set_file); - ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamSpeex::get_file); + //ObjectTypeDB::bind_method(_MD("set_file","file"),&AudioStreamPlaybackSpeex::set_file); +// ObjectTypeDB::bind_method(_MD("get_file"),&AudioStreamPlaybackSpeex::get_file); - ObjectTypeDB::bind_method(_MD("_set_bundled"),&AudioStreamSpeex::_set_bundled); - ObjectTypeDB::bind_method(_MD("_get_bundled"),&AudioStreamSpeex::_get_bundled); + ObjectTypeDB::bind_method(_MD("_set_bundled"),&AudioStreamPlaybackSpeex::_set_bundled); + ObjectTypeDB::bind_method(_MD("_get_bundled"),&AudioStreamPlaybackSpeex::_get_bundled); ADD_PROPERTY( PropertyInfo(Variant::DICTIONARY,"_bundled",PROPERTY_HINT_NONE,"",PROPERTY_USAGE_BUNDLE),_SCS("_set_bundled"),_SCS("_get_bundled")); - ADD_PROPERTY( PropertyInfo(Variant::STRING,"file",PROPERTY_HINT_FILE,"*.spx"),_SCS("set_file"),_SCS("get_file")); + //ADD_PROPERTY( PropertyInfo(Variant::STRING,"file",PROPERTY_HINT_FILE,"*.spx"),_SCS("set_file"),_SCS("get_file")); }; -void AudioStreamSpeex::_set_bundled(const Dictionary& dict) { +void AudioStreamPlaybackSpeex::_set_bundled(const Dictionary& dict) { ERR_FAIL_COND( !dict.has("filename")); ERR_FAIL_COND( !dict.has("data")); @@ -399,7 +408,7 @@ void AudioStreamSpeex::_set_bundled(const Dictionary& dict) { data = dict["data"]; }; -Dictionary AudioStreamSpeex::_get_bundled() const { +Dictionary AudioStreamPlaybackSpeex::_get_bundled() const { Dictionary d; d["filename"] = filename; @@ -408,43 +417,17 @@ Dictionary AudioStreamSpeex::_get_bundled() const { }; -String AudioStreamSpeex::get_file() const { - - return filename; -}; - -void AudioStreamSpeex::set_file(const String& p_file){ - - if (filename == p_file) - return; - - if (active) { - unload(); - } - - if (p_file == "") { - data.resize(0); - return; - }; - - Error err; - FileAccess* file = FileAccess::open(p_file, FileAccess::READ,&err); - if (err != OK) { - data.resize(0); - }; - ERR_FAIL_COND(err != OK); - filename = p_file; - data.resize(file->get_len()); - int read = file->get_buffer(&data[0], data.size()); - memdelete(file); +void AudioStreamPlaybackSpeex::set_data(const Vector<uint8_t>& p_data) { + data=p_data; reload(); } -void AudioStreamSpeex::play() { - _THREAD_SAFE_METHOD_ +void AudioStreamPlaybackSpeex::play(float p_from_pos) { + + reload(); if (!active) @@ -452,82 +435,103 @@ void AudioStreamSpeex::play() { playing = true; } -void AudioStreamSpeex::stop(){ +void AudioStreamPlaybackSpeex::stop(){ + - _THREAD_SAFE_METHOD_ unload(); playing = false; - _clear(); -} -bool AudioStreamSpeex::is_playing() const{ - return _is_ready() && (playing || (get_total() - get_todo() -1 > 0)); } +bool AudioStreamPlaybackSpeex::is_playing() const{ -void AudioStreamSpeex::set_paused(bool p_paused){ - - playing = !p_paused; - paused = p_paused; + return playing; } -bool AudioStreamSpeex::is_paused(bool p_paused) const{ - return paused; -} -void AudioStreamSpeex::set_loop(bool p_enable){ +void AudioStreamPlaybackSpeex::set_loop(bool p_enable){ loops = p_enable; } -bool AudioStreamSpeex::has_loop() const{ +bool AudioStreamPlaybackSpeex::has_loop() const{ return loops; } -float AudioStreamSpeex::get_length() const{ +float AudioStreamPlaybackSpeex::get_length() const{ return 0; } -String AudioStreamSpeex::get_stream_name() const{ +String AudioStreamPlaybackSpeex::get_stream_name() const{ return ""; } -int AudioStreamSpeex::get_loop_count() const{ +int AudioStreamPlaybackSpeex::get_loop_count() const{ return 0; } -float AudioStreamSpeex::get_pos() const{ +float AudioStreamPlaybackSpeex::get_pos() const{ return 0; } -void AudioStreamSpeex::seek_pos(float p_time){ +void AudioStreamPlaybackSpeex::seek_pos(float p_time){ }; -bool AudioStreamSpeex::_can_mix() const { - //return playing; - return data.size() != 0; -}; +AudioStreamPlaybackSpeex::AudioStreamPlaybackSpeex() { + + active=false; + st = NULL; + stream_channels=1; + stream_srate=1; + stream_minbuff_size=1; + playing=false; -AudioStream::UpdateMode AudioStreamSpeex::get_update_mode() const { - return UPDATE_THREAD; } -AudioStreamSpeex::AudioStreamSpeex() { +AudioStreamPlaybackSpeex::~AudioStreamPlaybackSpeex() { - active=false; - st = NULL; + unload(); } -AudioStreamSpeex::~AudioStreamSpeex() { - unload(); + + + +//////////////////////////////////////// + + + +void AudioStreamSpeex::set_file(const String& p_file) { + + if (this->file == p_file) + return; + + this->file=p_file; + + if (p_file == "") { + data.resize(0); + return; + }; + + Error err; + FileAccess* file = FileAccess::open(p_file, FileAccess::READ,&err); + if (err != OK) { + data.resize(0); + }; + ERR_FAIL_COND(err != OK); + + this->file = p_file; + data.resize(file->get_len()); + int read = file->get_buffer(&data[0], data.size()); + memdelete(file); + } RES ResourceFormatLoaderAudioStreamSpeex::load(const String &p_path, const String& p_original_path, Error *r_error) { diff --git a/drivers/speex/audio_stream_speex.h b/drivers/speex/audio_stream_speex.h index f9e0fce666..f0617b302f 100644 --- a/drivers/speex/audio_stream_speex.h +++ b/drivers/speex/audio_stream_speex.h @@ -1,7 +1,7 @@ #ifndef AUDIO_STREAM_SPEEX_H #define AUDIO_STREAM_SPEEX_H -#include "scene/resources/audio_stream_resampled.h" +#include "scene/resources/audio_stream.h" #include "speex/speex.h" #include "os/file_access.h" #include "io/resource_loader.h" @@ -14,10 +14,10 @@ #include <ogg/ogg.h> -class AudioStreamSpeex : public AudioStreamResampled { +class AudioStreamPlaybackSpeex : public AudioStreamPlayback { + + OBJ_TYPE(AudioStreamPlaybackSpeex, AudioStreamPlayback); - OBJ_TYPE(AudioStreamSpeex, AudioStreamResampled); - _THREAD_SAFE_CLASS_ void *st; SpeexBits bits; @@ -29,7 +29,6 @@ class AudioStreamSpeex : public AudioStreamResampled { bool loops; int page_size; bool playing; - bool paused; bool packets_available; void unload(); @@ -45,6 +44,9 @@ class AudioStreamSpeex : public AudioStreamResampled { ogg_int64_t page_granule, last_granule; int skip_samples, page_nb_packets; + int stream_channels; + int stream_srate; + int stream_minbuff_size; void* process_header(ogg_packet *op, int *frame_size, int *rate, int *nframes, int *channels, int *extra_headers); @@ -52,7 +54,7 @@ class AudioStreamSpeex : public AudioStreamResampled { protected: - virtual bool _can_mix() const; + //virtual bool _can_mix() const; Dictionary _get_bundled() const; void _set_bundled(const Dictionary& dict); @@ -60,16 +62,12 @@ protected: public: - void set_file(const String& p_file); - String get_file() const; + void set_data(const Vector<uint8_t>& p_data); - virtual void play(); + virtual void play(float p_from_pos=0); virtual void stop(); virtual bool is_playing() const; - virtual void set_paused(bool p_paused); - virtual bool is_paused(bool p_paused) const; - virtual void set_loop(bool p_enable); virtual bool has_loop() const; @@ -82,13 +80,39 @@ public: virtual float get_pos() const; virtual void seek_pos(float p_time); - virtual UpdateMode get_update_mode() const; - virtual void update(); + virtual int get_channels() const { return stream_channels; } + virtual int get_mix_rate() const { return stream_srate; } + + virtual int get_minimum_buffer_size() const { return stream_minbuff_size; } + virtual int mix(int16_t* p_bufer,int p_frames); + + virtual void set_loop_restart_time(float p_time) { } //no loop restart, ignore + + AudioStreamPlaybackSpeex(); + ~AudioStreamPlaybackSpeex(); +}; + + + +class AudioStreamSpeex : public AudioStream { + + OBJ_TYPE(AudioStreamSpeex,AudioStream); + + Vector<uint8_t> data; + String file; +public: + + Ref<AudioStreamPlayback> instance_playback() { + Ref<AudioStreamPlaybackSpeex> pb = memnew( AudioStreamPlaybackSpeex ); + pb->set_data(data); + return pb; + } + + void set_file(const String& p_file); - AudioStreamSpeex(); - ~AudioStreamSpeex(); }; + class ResourceFormatLoaderAudioStreamSpeex : public ResourceFormatLoader { public: virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL); diff --git a/drivers/speex/config.h b/drivers/speex/config.h index d31382702c..8c48e3b99d 100644 --- a/drivers/speex/config.h +++ b/drivers/speex/config.h @@ -1,52 +1,52 @@ -/*
- Copyright (C) 2003 Commonwealth Scientific and Industrial Research
- Organisation (CSIRO) Australia
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- - Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- - Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- - Neither the name of CSIRO Australia nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ORGANISATION OR
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef CONFIG_H
-#define CONFIG_H
-
-/* An inline macro is required for use of the inline keyword as not all C compilers support */
-/* inline. It is officially C99 and C++ only */
-
-
-/* Use only fixed point arithmetic */
-
-//#ifdef _MSC_VER
-//#define inline _inline
-//#endif
-
-#define FIXED_POINT 1
-
-#ifdef _MSC_VER
-#define inline __inline
-#endif
-
-#endif /* ! CONFIG_H */
+/* + Copyright (C) 2003 Commonwealth Scientific and Industrial Research + Organisation (CSIRO) Australia + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of CSIRO Australia nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ORGANISATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef CONFIG_H +#define CONFIG_H + +/* An inline macro is required for use of the inline keyword as not all C compilers support */ +/* inline. It is officially C99 and C++ only */ + + +/* Use only fixed point arithmetic */ + +//#ifdef _MSC_VER +//#define inline _inline +//#endif + +#define FIXED_POINT 1 + +#ifdef _MSC_VER +#define inline __inline +#endif + +#endif /* ! CONFIG_H */ diff --git a/drivers/speex/lsp.h b/drivers/speex/lsp.h index 648652fb9e..b55bd42f2c 100644 --- a/drivers/speex/lsp.h +++ b/drivers/speex/lsp.h @@ -1,64 +1,64 @@ -/*---------------------------------------------------------------------------*\
-Original Copyright
- FILE........: AK2LSPD.H
- TYPE........: Turbo C header file
- COMPANY.....: Voicetronix
- AUTHOR......: James Whitehall
- DATE CREATED: 21/11/95
-
-Modified by Jean-Marc Valin
-
- This file contains functions for converting Linear Prediction
- Coefficients (LPC) to Line Spectral Pair (LSP) and back. Note that the
- LSP coefficients are not in radians format but in the x domain of the
- unit circle.
-
-\*---------------------------------------------------------------------------*/
-/**
- @file lsp.h
- @brief Line Spectral Pair (LSP) functions.
-*/
-/* Speex License:
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- - Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- - Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- - Neither the name of the Xiph.org Foundation nor the names of its
- contributors may be used to endorse or promote products derived from
- this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef __AK2LSPD__
-#define __AK2LSPD__
-
-#include "arch.h"
-
-int lpc_to_lsp (spx_coef_t *a, int lpcrdr, spx_lsp_t *freq, int nb, spx_word16_t delta, char *stack);
-void lsp_to_lpc(spx_lsp_t *freq, spx_coef_t *ak, int lpcrdr, char *stack);
-
-/*Added by JMV*/
-void lsp_enforce_margin(spx_lsp_t *lsp, int len, spx_word16_t margin);
-
-void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes);
-
-#endif /* __AK2LSPD__ */
+/*---------------------------------------------------------------------------*\ +Original Copyright + FILE........: AK2LSPD.H + TYPE........: Turbo C header file + COMPANY.....: Voicetronix + AUTHOR......: James Whitehall + DATE CREATED: 21/11/95 + +Modified by Jean-Marc Valin + + This file contains functions for converting Linear Prediction + Coefficients (LPC) to Line Spectral Pair (LSP) and back. Note that the + LSP coefficients are not in radians format but in the x domain of the + unit circle. + +\*---------------------------------------------------------------------------*/ +/** + @file lsp.h + @brief Line Spectral Pair (LSP) functions. +*/ +/* Speex License: + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Xiph.org Foundation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef __AK2LSPD__ +#define __AK2LSPD__ + +#include "arch.h" + +int lpc_to_lsp (spx_coef_t *a, int lpcrdr, spx_lsp_t *freq, int nb, spx_word16_t delta, char *stack); +void lsp_to_lpc(spx_lsp_t *freq, spx_coef_t *ak, int lpcrdr, char *stack); + +/*Added by JMV*/ +void lsp_enforce_margin(spx_lsp_t *lsp, int len, spx_word16_t margin); + +void lsp_interpolate(spx_lsp_t *old_lsp, spx_lsp_t *new_lsp, spx_lsp_t *interp_lsp, int len, int subframe, int nb_subframes); + +#endif /* __AK2LSPD__ */ diff --git a/drivers/speex/speex_bind.cpp b/drivers/speex/speex_bind.cpp index 6e9eb638a2..d15bb3da8c 100644 --- a/drivers/speex/speex_bind.cpp +++ b/drivers/speex/speex_bind.cpp @@ -1,64 +1,64 @@ -
-#include "memory.h"
-#include "speex_bind.h"
-#include
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void *speex_alloc (int size) {
-
- uint8_t * mem = (uint8_t*)memalloc(size);
- for(int i=0;i<size;i++)
- mem[i]=0;
- return mem;
-}
-
-void *speex_alloc_scratch (int size) {
-
- return memalloc(size);
-}
-
-void *speex_realloc (void *ptr, int size) {
-
- return memrealloc(ptr,size);
-}
-
-void speex_free (void *ptr) {
-
- memfree(ptr);
-}
-
-void speex_free_scratch (void *ptr) {
-
- memfree(ptr);
-}
-
-void _speex_fatal(const char *str, const char *file, int line) {
-
- _err_print_error("SPEEX ERROR",p_file,p_line,str);
-}
-
-void speex_warning(const char *str) {
-
- _err_print_error("SPEEX WARNING","",0,str);
-}
-
-void speex_warning_int(const char *str, int val) {
-
- _err_print_error("SPEEX WARNING INT","Value",val,str);
-}
-
-void speex_notify(const char *str) {
-
- print_line(str);
-}
-
-void _speex_putc(int ch, void *file) {
-
- // will not putc, no.
-}
-
-#ifdef __cplusplus
-}
-#endif
+ +#include "memory.h" +#include "speex_bind.h" +#include +#ifdef __cplusplus +extern "C" { +#endif + +void *speex_alloc (int size) { + + uint8_t * mem = (uint8_t*)memalloc(size); + for(int i=0;i<size;i++) + mem[i]=0; + return mem; +} + +void *speex_alloc_scratch (int size) { + + return memalloc(size); +} + +void *speex_realloc (void *ptr, int size) { + + return memrealloc(ptr,size); +} + +void speex_free (void *ptr) { + + memfree(ptr); +} + +void speex_free_scratch (void *ptr) { + + memfree(ptr); +} + +void _speex_fatal(const char *str, const char *file, int line) { + + _err_print_error("SPEEX ERROR",p_file,p_line,str); +} + +void speex_warning(const char *str) { + + _err_print_error("SPEEX WARNING","",0,str); +} + +void speex_warning_int(const char *str, int val) { + + _err_print_error("SPEEX WARNING INT","Value",val,str); +} + +void speex_notify(const char *str) { + + print_line(str); +} + +void _speex_putc(int ch, void *file) { + + // will not putc, no. +} + +#ifdef __cplusplus +} +#endif diff --git a/drivers/speex/speex_bind.h b/drivers/speex/speex_bind.h index e842960d3c..c928430a33 100644 --- a/drivers/speex/speex_bind.h +++ b/drivers/speex/speex_bind.h @@ -1,48 +1,48 @@ -#ifndef SPEEX_BIND_H
-#define SPEEX_BIND_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
-#define OVERRIDE_SPEEX_ALLOC
-#define OVERRIDE_SPEEX_ALLOC_SCRATCH
-#define OVERRIDE_SPEEX_REALLOC
-#define OVERRIDE_SPEEX_FREE
-#define OVERRIDE_SPEEX_FREE_SCRATCH
-#define OVERRIDE_SPEEX_FATAL
-#define OVERRIDE_SPEEX_WARNING
-#define OVERRIDE_SPEEX_WARNING_INT
-#define OVERRIDE_SPEEX_NOTIFY
-#define OVERRIDE_SPEEX_PUTC
-
-void *speex_alloc (int size);
-void *speex_alloc_scratch (int size);
-void *speex_realloc (void *ptr, int size);
-void speex_free (void *ptr);
-void speex_free_scratch (void *ptr);
-void _speex_fatal(const char *str, const char *file, int line);
-void speex_warning(const char *str);
-void speex_warning_int(const char *str, int val);
-void speex_notify(const char *str);
-void _speex_putc(int ch, void *file);
-
-
-*/
-#define RELEASE
-#define SPEEX_PI 3.14159265358979323846
-
-#ifdef _MSC_VER
-#define SPEEX_INLINE __inline
-#else
-#define SPEEX_INLINE inline
-#endif
-
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // SPEEX_BIND_H
+#ifndef SPEEX_BIND_H +#define SPEEX_BIND_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* +#define OVERRIDE_SPEEX_ALLOC +#define OVERRIDE_SPEEX_ALLOC_SCRATCH +#define OVERRIDE_SPEEX_REALLOC +#define OVERRIDE_SPEEX_FREE +#define OVERRIDE_SPEEX_FREE_SCRATCH +#define OVERRIDE_SPEEX_FATAL +#define OVERRIDE_SPEEX_WARNING +#define OVERRIDE_SPEEX_WARNING_INT +#define OVERRIDE_SPEEX_NOTIFY +#define OVERRIDE_SPEEX_PUTC + +void *speex_alloc (int size); +void *speex_alloc_scratch (int size); +void *speex_realloc (void *ptr, int size); +void speex_free (void *ptr); +void speex_free_scratch (void *ptr); +void _speex_fatal(const char *str, const char *file, int line); +void speex_warning(const char *str); +void speex_warning_int(const char *str, int val); +void speex_notify(const char *str); +void _speex_putc(int ch, void *file); + + +*/ +#define RELEASE +#define SPEEX_PI 3.14159265358979323846 + +#ifdef _MSC_VER +#define SPEEX_INLINE __inline +#else +#define SPEEX_INLINE inline +#endif + + + +#ifdef __cplusplus +} +#endif + +#endif // SPEEX_BIND_H diff --git a/drivers/theora/video_stream_theora.cpp b/drivers/theora/video_stream_theora.cpp index 214185cf88..bea49e34b7 100644 --- a/drivers/theora/video_stream_theora.cpp +++ b/drivers/theora/video_stream_theora.cpp @@ -1,16 +1,12 @@ #ifdef THEORA_ENABLED -#if 0 + #include "video_stream_theora.h" #include "os/os.h" #include "yuv2rgb.h" +#include "globals.h" -AudioStream::UpdateMode VideoStreamTheora::get_update_mode() const { - - return UPDATE_IDLE; -}; - -int VideoStreamTheora:: buffer_data() { +int VideoStreamPlaybackTheora:: buffer_data() { char *buffer=ogg_sync_buffer(&oy,4096); int bytes=file->get_buffer((uint8_t*)buffer, 4096); @@ -18,33 +14,13 @@ int VideoStreamTheora:: buffer_data() { return(bytes); } -int VideoStreamTheora::queue_page(ogg_page *page){ +int VideoStreamPlaybackTheora::queue_page(ogg_page *page){ if(theora_p)ogg_stream_pagein(&to,page); if(vorbis_p)ogg_stream_pagein(&vo,page); return 0; } -Image VideoStreamTheora::peek_frame() const { - - if (frames_pending == 0) - return Image(); - return Image(size.x, size.y, 0, format, frame_data); -}; - -Image VideoStreamTheora::pop_frame() { - - Image ret = peek_frame(); - frames_pending = 0; - - return ret; -}; - -int VideoStreamTheora::get_pending_frame_count() const { - - return frames_pending; -}; - -void VideoStreamTheora::video_write(void){ +void VideoStreamPlaybackTheora::video_write(void){ th_ycbcr_buffer yuv; int y_offset, uv_offset; th_decode_ycbcr_out(td,yuv); @@ -78,25 +54,31 @@ void VideoStreamTheora::video_write(void){ int pitch = 4; frame_data.resize(size.x * size.y * pitch); - DVector<uint8_t>::Write w = frame_data.write(); - char* dst = (char*)w.ptr(); + { + DVector<uint8_t>::Write w = frame_data.write(); + char* dst = (char*)w.ptr(); - uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y/2); + uv_offset=(ti.pic_x/2)+(yuv[1].stride)*(ti.pic_y/2); - if (px_fmt == TH_PF_444) { + if (px_fmt == TH_PF_444) { - yuv444_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); + yuv444_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); - } else if (px_fmt == TH_PF_422) { + } else if (px_fmt == TH_PF_422) { - yuv422_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); + yuv422_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[1].data, (uint8_t*)yuv[2].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); - } else if (px_fmt == TH_PF_420) { + } else if (px_fmt == TH_PF_420) { - yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); - }; + yuv420_2_rgb8888((uint8_t*)dst, (uint8_t*)yuv[0].data, (uint8_t*)yuv[2].data, (uint8_t*)yuv[1].data, size.x, size.y, yuv[0].stride, yuv[1].stride, size.x<<2, 0); + }; - format = Image::FORMAT_RGBA; + format = Image::FORMAT_RGBA; + } + + Image img(size.x,size.y,0,Image::FORMAT_RGBA,frame_data); //zero copy image creation + + texture->set_data(img); //zero copy send to visual server /* @@ -194,7 +176,7 @@ void VideoStreamTheora::video_write(void){ frames_pending = 1; } -void VideoStreamTheora::clear() { +void VideoStreamPlaybackTheora::clear() { if (file_name == "") return; @@ -218,7 +200,7 @@ void VideoStreamTheora::clear() { } ogg_sync_clear(&oy); - file_name = ""; + //file_name = ""; theora_p = 0; vorbis_p = 0; @@ -229,7 +211,7 @@ void VideoStreamTheora::clear() { playing = false; }; -void VideoStreamTheora::set_file(const String& p_file) { +void VideoStreamPlaybackTheora::set_file(const String& p_file) { ogg_packet op; th_setup_info *ts = NULL; @@ -241,7 +223,7 @@ void VideoStreamTheora::set_file(const String& p_file) { file = FileAccess::open(p_file, FileAccess::READ); ERR_FAIL_COND(!file); - audio_frames_wrote = 0; + ogg_sync_init(&oy); @@ -256,6 +238,8 @@ void VideoStreamTheora::set_file(const String& p_file) { /* Ogg file open; parse the headers */ /* Only interested in Vorbis/Theora streams */ int stateflag = 0; + + int audio_track_skip=audio_track; while(!stateflag){ int ret=buffer_data(); if(ret==0)break; @@ -282,8 +266,14 @@ void VideoStreamTheora::set_file(const String& p_file) { theora_p=1; }else if(!vorbis_p && vorbis_synthesis_headerin(&vi,&vc,&op)>=0){ /* it is vorbis */ - copymem(&vo,&test,sizeof(test)); - vorbis_p=1; + if (audio_track_skip) { + vorbis_info_clear(&vi); + vorbis_comment_clear(&vc); + audio_track_skip--; + } else { + copymem(&vo,&test,sizeof(test)); + vorbis_p=1; + } }else{ /* whatever it is, we don't care about it */ ogg_stream_clear(&test); @@ -386,6 +376,8 @@ void VideoStreamTheora::set_file(const String& p_file) { size.x = w; size.y = h; + texture->create(w,h,Image::FORMAT_RGBA,Texture::FLAG_FILTER|Texture::FLAG_VIDEO_SURFACE); + }else{ /* tear down the partial theora setup */ th_info_clear(&ti); @@ -399,7 +391,7 @@ void VideoStreamTheora::set_file(const String& p_file) { vorbis_block_init(&vd,&vb); fprintf(stderr,"Ogg logical stream %lx is Vorbis %d channel %ld Hz audio.\n", vo.serialno,vi.channels,vi.rate); - _setup(vi.channels, vi.rate); + //_setup(vi.channels, vi.rate); }else{ /* tear down the partial vorbis setup */ vorbis_info_clear(&vi); @@ -411,227 +403,299 @@ void VideoStreamTheora::set_file(const String& p_file) { time=0; }; -float VideoStreamTheora::get_time() const { +float VideoStreamPlaybackTheora::get_time() const { //print_line("total: "+itos(get_total())+" todo: "+itos(get_todo())); //return MAX(0,time-((get_total())/(float)vi.rate)); - return time-((get_total())/(float)vi.rate); + return time-AudioServer::get_singleton()->get_output_delay()-delay_compensation;//-((get_total())/(float)vi.rate); }; -void VideoStreamTheora::update() { +Ref<Texture> VideoStreamPlaybackTheora::get_texture() { + + return texture; +} + +void VideoStreamPlaybackTheora::update(float p_delta) { if (!playing) { //printf("not playing\n"); return; }; - double ctime =AudioServer::get_singleton()->get_mix_time(); + //double ctime =AudioServer::get_singleton()->get_mix_time(); - if (last_update_time) { - double delta = (ctime-last_update_time); - time+=delta; - //print_line("delta: "+rtos(delta)); - } - last_update_time=ctime; + //print_line("play "+rtos(p_delta)); + time+=p_delta; + if (videobuf_time>get_time()) + return; //no new frames need to be produced - int audio_todo = get_todo(); - ogg_packet op; - int audio_pending = 0; + bool frame_done=false; + while (!frame_done) { + //a frame needs to be produced - while (vorbis_p && audio_todo) { - int ret; - float **pcm; - - /* if there's pending, decoded audio, grab it */ - if ((ret=vorbis_synthesis_pcmout(&vd,&pcm))>0) { - - audio_pending = ret; - int16_t* out = get_write_buffer(); - int count = 0; - int to_read = MIN(ret, audio_todo); - for (int i=0; i<to_read; i++) { - - for(int j=0;j<vi.channels;j++){ - int val=Math::fast_ftoi(pcm[j][i]*32767.f); - if(val>32767)val=32767; - if(val<-32768)val=-32768; - out[count++] = val; - }; - }; - int tr = vorbis_synthesis_read(&vd, to_read); - audio_todo -= to_read; - audio_frames_wrote += to_read; - write(to_read); - audio_pending -= to_read; - if (audio_todo==0) - buffering=false; + ogg_packet op; + bool audio_pending = false; - } else { + while (vorbis_p) { + int ret; + float **pcm; + + bool buffer_full=false; + + /* if there's pending, decoded audio, grab it */ + if ((ret=vorbis_synthesis_pcmout(&vd,&pcm))>0) { + + + + const int AUXBUF_LEN=4096; + int to_read = ret; + int16_t aux_buffer[AUXBUF_LEN]; + + while(to_read) { + + int m = MIN(AUXBUF_LEN/vi.channels,to_read); + + int count = 0; + + for(int j=0;j<m;j++){ + for(int i=0;i<vi.channels;i++){ + + int val=Math::fast_ftoi(pcm[i][j]*32767.f); + if(val>32767)val=32767; + if(val<-32768)val=-32768; + aux_buffer[count++] = val; + } + } + + if (mix_callback) { + int mixed = mix_callback(mix_udata,aux_buffer,m); + to_read-=mixed; + if (mixed!=m) { //could mix no more + buffer_full=true; + break; + } + } else { + to_read-=m; //just pretend we sent the audio + } + - /* no pending audio; is there a pending packet to decode? */ - if (ogg_stream_packetout(&vo,&op)>0){ - if(vorbis_synthesis(&vb,&op)==0) { /* test for success! */ - vorbis_synthesis_blockin(&vd,&vb); } - } else { /* we need more data; break out to suck in another page */ - //printf("need moar data\n"); + + + int tr = vorbis_synthesis_read(&vd, ret-to_read); + + audio_pending=true; + + + } else { + + /* no pending audio; is there a pending packet to decode? */ + if (ogg_stream_packetout(&vo,&op)>0){ + if(vorbis_synthesis(&vb,&op)==0) { /* test for success! */ + vorbis_synthesis_blockin(&vd,&vb); + } + } else { /* we need more data; break out to suck in another page */ + //printf("need moar data\n"); + break; + }; + } + + if (buffer_full) break; - }; } - } - while(theora_p && !videobuf_ready){ - /* theora is one in, one out... */ - if(ogg_stream_packetout(&to,&op)>0){ + while(theora_p && !frame_done){ + /* theora is one in, one out... */ + if(ogg_stream_packetout(&to,&op)>0){ - if(pp_inc){ - pp_level+=pp_inc; - th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level, - sizeof(pp_level)); - pp_inc=0; - } - /*HACK: This should be set after a seek or a gap, but we might not have - a granulepos for the first packet (we only have them for the last - packet on a page), so we just set it as often as we get it. - To do this right, we should back-track from the last packet on the - page and compute the correct granulepos for the first packet after - a seek or a gap.*/ - if(op.granulepos>=0){ - th_decode_ctl(td,TH_DECCTL_SET_GRANPOS,&op.granulepos, - sizeof(op.granulepos)); - } - ogg_int64_t videobuf_granulepos; - if(th_decode_packetin(td,&op,&videobuf_granulepos)==0){ - videobuf_time=th_granule_time(td,videobuf_granulepos); - //printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready); - - /* is it already too old to be useful? This is only actually - useful cosmetically after a SIGSTOP. Note that we have to - decode the frame even if we don't show it (for now) due to - keyframing. Soon enough libtheora will be able to deal - with non-keyframe seeks. */ - - if(videobuf_time>=get_time()) - videobuf_ready=1; - else{ - /*If we are too slow, reduce the pp level.*/ - pp_inc=pp_level>0?-1:0; + if(pp_inc){ + pp_level+=pp_inc; + th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level, + sizeof(pp_level)); + pp_inc=0; + } + /*HACK: This should be set after a seek or a gap, but we might not have + a granulepos for the first packet (we only have them for the last + packet on a page), so we just set it as often as we get it. + To do this right, we should back-track from the last packet on the + page and compute the correct granulepos for the first packet after + a seek or a gap.*/ + if(op.granulepos>=0){ + th_decode_ctl(td,TH_DECCTL_SET_GRANPOS,&op.granulepos, + sizeof(op.granulepos)); + } + ogg_int64_t videobuf_granulepos; + if(th_decode_packetin(td,&op,&videobuf_granulepos)==0){ + videobuf_time=th_granule_time(td,videobuf_granulepos); + + //printf("frame time %f, play time %f, ready %i\n", (float)videobuf_time, get_time(), videobuf_ready); + + /* is it already too old to be useful? This is only actually + useful cosmetically after a SIGSTOP. Note that we have to + decode the frame even if we don't show it (for now) due to + keyframing. Soon enough libtheora will be able to deal + with non-keyframe seeks. */ + + if(videobuf_time>=get_time()) + frame_done=true; + else{ + /*If we are too slow, reduce the pp level.*/ + pp_inc=pp_level>0?-1:0; + } } - } - - } else - break; - } - if (/*!videobuf_ready && */ audio_pending == 0 && file->eof_reached()) { - printf("video done, stopping\n"); - stop(); - return; - }; + } else + break; + } - if (!videobuf_ready || audio_todo > 0){ - /* no data yet for somebody. Grab another page */ + if (file && /*!videobuf_ready && */ file->eof_reached()) { + printf("video done, stopping\n"); + stop(); + return; + }; + #if 0 + if (!videobuf_ready || audio_todo > 0){ + /* no data yet for somebody. Grab another page */ - buffer_data(); - while(ogg_sync_pageout(&oy,&og)>0){ - queue_page(&og); + buffer_data(); + while(ogg_sync_pageout(&oy,&og)>0){ + queue_page(&og); + } } - } + #else + if (!frame_done){ + //what's the point of waiting for audio to grab a page? - /* If playback has begun, top audio buffer off immediately. */ - //if(stateflag) audio_write_nonblocking(); + buffer_data(); + while(ogg_sync_pageout(&oy,&og)>0){ + queue_page(&og); + } + } + #endif + /* If playback has begun, top audio buffer off immediately. */ + //if(stateflag) audio_write_nonblocking(); - /* are we at or past time for this video frame? */ - if(videobuf_ready && videobuf_time<=get_time()){ + /* are we at or past time for this video frame? */ + if(videobuf_ready && videobuf_time<=get_time()){ - video_write(); - videobuf_ready=0; - } else { - //printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready); - } + //video_write(); + //videobuf_ready=0; + } else { + //printf("frame at %f not ready (time %f), ready %i\n", (float)videobuf_time, get_time(), videobuf_ready); + } - float tdiff=videobuf_time-get_time(); - /*If we have lots of extra time, increase the post-processing level.*/ - if(tdiff>ti.fps_denominator*0.25/ti.fps_numerator){ - pp_inc=pp_level<pp_level_max?1:0; - } - else if(tdiff<ti.fps_denominator*0.05/ti.fps_numerator){ - pp_inc=pp_level>0?-1:0; + float tdiff=videobuf_time-get_time(); + /*If we have lots of extra time, increase the post-processing level.*/ + if(tdiff>ti.fps_denominator*0.25/ti.fps_numerator){ + pp_inc=pp_level<pp_level_max?1:0; + } + else if(tdiff<ti.fps_denominator*0.05/ti.fps_numerator){ + pp_inc=pp_level>0?-1:0; + } } -}; -bool VideoStreamTheora::_can_mix() const { + video_write(); - return !buffering; }; -void VideoStreamTheora::play() { + +void VideoStreamPlaybackTheora::play() { if (!playing) - last_update_time=0; + time=0; playing = true; + delay_compensation=Globals::get_singleton()->get("audio/video_delay_compensation_ms"); + delay_compensation/=1000.0; + }; -void VideoStreamTheora::stop() { +void VideoStreamPlaybackTheora::stop() { + if (playing) { + clear(); + set_file(file_name); //reset + } playing = false; - last_update_time=0; + time=0; }; -bool VideoStreamTheora::is_playing() const { +bool VideoStreamPlaybackTheora::is_playing() const { return playing; }; -void VideoStreamTheora::set_paused(bool p_paused) { +void VideoStreamPlaybackTheora::set_paused(bool p_paused) { playing = !p_paused; }; -bool VideoStreamTheora::is_paused(bool p_paused) const { +bool VideoStreamPlaybackTheora::is_paused(bool p_paused) const { return playing; }; -void VideoStreamTheora::set_loop(bool p_enable) { +void VideoStreamPlaybackTheora::set_loop(bool p_enable) { }; -bool VideoStreamTheora::has_loop() const { +bool VideoStreamPlaybackTheora::has_loop() const { return false; }; -float VideoStreamTheora::get_length() const { +float VideoStreamPlaybackTheora::get_length() const { return 0; }; -String VideoStreamTheora::get_stream_name() const { +String VideoStreamPlaybackTheora::get_stream_name() const { return ""; }; -int VideoStreamTheora::get_loop_count() const { +int VideoStreamPlaybackTheora::get_loop_count() const { return 0; }; -float VideoStreamTheora::get_pos() const { +float VideoStreamPlaybackTheora::get_pos() const { return get_time(); }; -void VideoStreamTheora::seek_pos(float p_time) { +void VideoStreamPlaybackTheora::seek_pos(float p_time) { // no }; -VideoStreamTheora::VideoStreamTheora() { +void VideoStreamPlaybackTheora::set_mix_callback(AudioMixCallback p_callback,void *p_userdata) { + + mix_callback=p_callback; + mix_udata=p_userdata; +} + +int VideoStreamPlaybackTheora::get_channels() const{ + + return vi.channels; +} + +void VideoStreamPlaybackTheora::set_audio_track(int p_idx) { + + audio_track=p_idx; +} + +int VideoStreamPlaybackTheora::get_mix_rate() const{ + + return vi.rate; +} + + + +VideoStreamPlaybackTheora::VideoStreamPlaybackTheora() { file = NULL; theora_p = 0; @@ -640,11 +704,16 @@ VideoStreamTheora::VideoStreamTheora() { playing = false; frames_pending = 0; videobuf_time = 0; - last_update_time =0; + buffering=false; + texture = Ref<ImageTexture>( memnew(ImageTexture )); + mix_callback=NULL; + mix_udata=NULL; + audio_track=0; + delay_compensation=0; }; -VideoStreamTheora::~VideoStreamTheora() { +VideoStreamPlaybackTheora::~VideoStreamPlaybackTheora() { clear(); @@ -653,10 +722,16 @@ VideoStreamTheora::~VideoStreamTheora() { }; -RES ResourceFormatLoaderVideoStreamTheora::load(const String &p_path,const String& p_original_path) { +RES ResourceFormatLoaderVideoStreamTheora::load(const String &p_path,const String& p_original_path, Error *r_error) { + if (r_error) + *r_error=ERR_FILE_CANT_OPEN; VideoStreamTheora *stream = memnew(VideoStreamTheora); stream->set_file(p_path); + + if (r_error) + *r_error=OK; + return Ref<VideoStreamTheora>(stream); } @@ -666,16 +741,16 @@ void ResourceFormatLoaderVideoStreamTheora::get_recognized_extensions(List<Strin p_extensions->push_back("ogv"); } bool ResourceFormatLoaderVideoStreamTheora::handles_type(const String& p_type) const { - return (p_type=="AudioStream" || p_type=="VideoStreamTheora"); + return (p_type=="VideoStream" || p_type=="VideoStreamTheora"); } String ResourceFormatLoaderVideoStreamTheora::get_resource_type(const String &p_path) const { String exl=p_path.extension().to_lower(); if (exl=="ogm" || exl=="ogv") - return "AudioStreamTheora"; + return "VideoStreamTheora"; return ""; } #endif -#endif + diff --git a/drivers/theora/video_stream_theora.h b/drivers/theora/video_stream_theora.h index 12aac731fc..95c7fe88f6 100644 --- a/drivers/theora/video_stream_theora.h +++ b/drivers/theora/video_stream_theora.h @@ -10,9 +10,9 @@ #include "io/resource_loader.h" #include "scene/resources/video_stream.h" -class VideoStreamTheora : public VideoStream { +class VideoStreamPlaybackTheora : public VideoStreamPlayback { - OBJ_TYPE(VideoStreamTheora, VideoStream); + OBJ_TYPE(VideoStreamPlaybackTheora, VideoStreamPlayback); enum { MAX_FRAMES = 4, @@ -58,16 +58,19 @@ class VideoStreamTheora : public VideoStream { double last_update_time; double time; + double delay_compensation; -protected: + Ref<ImageTexture> texture; - virtual UpdateMode get_update_mode() const; - virtual void update(); + AudioMixCallback mix_callback; + void* mix_udata; - void clear(); + int audio_track; - virtual bool _can_mix() const; +protected: + void clear(); + public: virtual void play(); @@ -92,12 +95,43 @@ public: void set_file(const String& p_file); - int get_pending_frame_count() const; - Image pop_frame(); - Image peek_frame() const; + virtual Ref<Texture> get_texture(); + virtual void update(float p_delta); + + virtual void set_mix_callback(AudioMixCallback p_callback,void *p_userdata); + virtual int get_channels() const; + virtual int get_mix_rate() const; + + virtual void set_audio_track(int p_idx); + + VideoStreamPlaybackTheora(); + ~VideoStreamPlaybackTheora(); +}; + + + +class VideoStreamTheora : public VideoStream { + + OBJ_TYPE(VideoStreamTheora,VideoStream); + + String file; + int audio_track; + + +public: + + Ref<VideoStreamPlayback> instance_playback() { + Ref<VideoStreamPlaybackTheora> pb = memnew( VideoStreamPlaybackTheora ); + pb->set_audio_track(audio_track); + pb->set_file(file); + return pb; + } + + void set_file(const String& p_file) { file=p_file; } + void set_audio_track(int p_track) { audio_track=p_track; } + + VideoStreamTheora() { audio_track=0; } - VideoStreamTheora(); - ~VideoStreamTheora(); }; class ResourceFormatLoaderVideoStreamTheora : public ResourceFormatLoader { diff --git a/drivers/theoraplayer/SCsub b/drivers/theoraplayer/SCsub deleted file mode 100644 index 09fb13d8e9..0000000000 --- a/drivers/theoraplayer/SCsub +++ /dev/null @@ -1,106 +0,0 @@ -Import("env") - -import string - -sources = string.split(""" -src/TheoraVideoClip.cpp -src/FFmpeg/TheoraVideoClip_FFmpeg.cpp -src/TheoraAsync.cpp -src/TheoraAudioInterface.cpp -src/TheoraException.cpp -src/TheoraWorkerThread.cpp -src/TheoraVideoManager.cpp -src/TheoraTimer.cpp -src/TheoraUtil.cpp -src/TheoraDataSource.cpp -src/TheoraAudioPacketQueue.cpp -src/TheoraFrameQueue.cpp -src/Theora/TheoraVideoClip_Theora.cpp -src/YUV/yuv_util.c -src/YUV/libyuv/src/row_any.cc -src/YUV/libyuv/src/compare_common.cc -src/YUV/libyuv/src/scale_neon.cc -src/YUV/libyuv/src/planar_functions.cc -src/YUV/libyuv/src/compare.cc -src/YUV/libyuv/src/scale_mips.cc -src/YUV/libyuv/src/scale_posix.cc -src/YUV/libyuv/src/row_posix.cc -src/YUV/libyuv/src/row_win.cc -src/YUV/libyuv/src/compare_neon.cc -src/YUV/libyuv/src/convert_from_argb.cc -src/YUV/libyuv/src/mjpeg_validate.cc -src/YUV/libyuv/src/convert_from.cc -src/YUV/libyuv/src/rotate_neon.cc -src/YUV/libyuv/src/row_neon.cc -src/YUV/libyuv/src/rotate_mips.cc -src/YUV/libyuv/src/compare_posix.cc -src/YUV/libyuv/src/row_mips.cc -src/YUV/libyuv/src/scale.cc -src/YUV/libyuv/src/scale_argb.cc -src/YUV/libyuv/src/mjpeg_decoder.cc -src/YUV/libyuv/src/scale_win.cc -src/YUV/libyuv/src/scale_common.cc -src/YUV/libyuv/src/scale_argb_neon.cc -src/YUV/libyuv/src/row_common.cc -src/YUV/libyuv/src/convert.cc -src/YUV/libyuv/src/format_conversion.cc -src/YUV/libyuv/src/rotate_argb.cc -src/YUV/libyuv/src/rotate.cc -src/YUV/libyuv/src/convert_argb.cc -src/YUV/libyuv/src/cpu_id.cc -src/YUV/libyuv/src/video_common.cc -src/YUV/libyuv/src/convert_to_argb.cc -src/YUV/libyuv/src/compare_win.cc -src/YUV/libyuv/src/convert_to_i420.cc -src/YUV/libyuv/src/convert_jpeg.cc -src/YUV/libyuv/yuv_libyuv.c -src/YUV/android/cpu-features.c -src/YUV/C/yuv420_grey_c.c -src/YUV/C/yuv420_yuv_c.c -src/YUV/C/yuv420_rgb_c.c -src/TheoraVideoFrame.cpp -""") - -env_theora = env.Clone() - -if env["platform"] == "iphone": - sources.append("src/AVFoundation/TheoraVideoClip_AVFoundation.mm") - env.Append(LINKFLAGS=['-framework', 'CoreVideo', '-framework', 'CoreMedia', '-framework', 'AVFoundation']) - if env["target"] == "release": - env_theora.Append(CPPFLAGS=["-D_IOS", "-D__ARM_NEON__", "-fstrict-aliasing", "-fmessage-length=210", "-fdiagnostics-show-note-include-stack", "-fmacro-backtrace-limit=0", "-fcolor-diagnostics", "-Wno-trigraphs", "-fpascal-strings", "-fvisibility=hidden", "-fvisibility-inlines-hidden"]) - -env_theora.Append(CPPFLAGS=["-D_LIB", "-D__THEORA"]) # removed -D_YUV_C -env_theora.Append(CPPFLAGS=["-D_YUV_LIBYUV"]) -#env_theora.Append(CPPFLAGS=["-D_YUV_C"]) - -if env["platform"] == "iphone": - env_theora.Append(CPPFLAGS=["-D__AVFOUNDATION"]) -else: - pass - #env_theora.Append(CPPFLAGS=["-D__FFMPEG"]) - -if env["platform"] == "android": - env_theora.Append(CPPFLAGS=["-D_ANDROID"]) - -if env["platform"] == "winrt": - env_theora.Append(CPPFLAGS=["-D_WINRT"]) - -env_theora.Append(CPPPATH=["#drivers/theoraplayer/include/theoraplayer", "#drivers/theoraplayer/src/YUV", "#drivers/theoraplayer/src/YUV/libyuv/include", "#drivers/theoraplayer/src/Theora", "#drivers/theoraplayer/src/AVFoundation"]) - -objs = [] - -env_theora.add_source_files(objs, ["video_stream_theoraplayer.cpp"]) - -if env['use_theoraplayer_binary'] == "yes": - if env["platform"] == "iphone": - env.Append(LIBPATH=['#drivers/theoraplayer/lib/ios']) - env.Append(LIBS=['theoraplayer', 'ogg', 'theora', 'tremor']) - if env["platform"] == "windows": - env.Append(LIBPATH=['#drivers/theoraplayer/lib/windows']) - env.Append(LINKFLAGS=['libtheoraplayer_static.lib', 'libogg.lib', 'libtheora.lib', 'libvorbis.lib']) -else: - env_theora.add_source_files(objs, sources) - -env.drivers_sources += objs - - diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraAsync.h b/drivers/theoraplayer/include/theoraplayer/TheoraAsync.h deleted file mode 100644 index 7f1b49b9af..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraAsync.h +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _TheoraAsync_h -#define _TheoraAsync_h - -#ifndef _WIN32 -#include <pthread.h> -#endif - -/// @note Based on hltypes::Thread -class TheoraMutex -{ -public: - TheoraMutex(); - ~TheoraMutex(); - void lock(); - void unlock(); - -protected: - void* mHandle; - -}; - -/// @note Based on hltypes::Thread -class TheoraThread -{ - TheoraMutex mRunningMutex; -public: - TheoraThread(); - virtual ~TheoraThread(); - void start(); - void stop(); - void resume(); - void pause(); - bool isRunning(); - virtual void execute() = 0; - void join(); - -protected: - void* mId; - volatile bool mRunning; - -}; - -#endif diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraAudioInterface.h b/drivers/theoraplayer/include/theoraplayer/TheoraAudioInterface.h deleted file mode 100644 index aa03293806..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraAudioInterface.h +++ /dev/null @@ -1,51 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _TheoraAudioInterface_h -#define _TheoraAudioInterface_h - -#include "TheoraExport.h" - -class TheoraVideoClip; - - -/** - This is the class that serves as an interface between the library's audio - output and the audio playback library of your choice. - The class gets mono or stereo PCM data in in floating point data - */ -class TheoraPlayerExport TheoraAudioInterface -{ -public: - //! PCM frequency, usualy 44100 Hz - int mFreq; - //! Mono or stereo - int mNumChannels; - //! Pointer to the parent TheoraVideoClip object - TheoraVideoClip* mClip; - - TheoraAudioInterface(TheoraVideoClip* owner, int nChannels, int freq); - virtual ~TheoraAudioInterface(); - //! A function that the TheoraVideoClip object calls once more audio packets are decoded - /*! - \param data contains one or two channels of float PCM data in the range [-1,1] - \param nSamples contains the number of samples that the data parameter contains in each channel - */ - virtual void insertData(float* data, int nSamples)=0; -}; - -class TheoraPlayerExport TheoraAudioInterfaceFactory -{ -public: - //! VideoManager calls this when creating a new TheoraVideoClip object - virtual TheoraAudioInterface* createInstance(TheoraVideoClip* owner, int nChannels, int freq) = 0; -}; - - -#endif - diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraAudioPacketQueue.h b/drivers/theoraplayer/include/theoraplayer/TheoraAudioPacketQueue.h deleted file mode 100644 index e0d17516e6..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraAudioPacketQueue.h +++ /dev/null @@ -1,48 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _TheoraAudioPacketQueue_h -#define _TheoraAudioPacketQueue_h - -#include "TheoraExport.h" - -class TheoraAudioInterface; -/** - This is an internal structure which TheoraVideoClip_Theora uses to store audio packets - */ -struct TheoraAudioPacket -{ - float* pcm; - int numSamples; //! size in number of float samples (stereo has twice the number of samples) - TheoraAudioPacket* next; // pointer to the next audio packet, to implement a linked list -}; - -/** - This is a Mutex object, used in thread syncronization. - */ -class TheoraPlayerExport TheoraAudioPacketQueue -{ -protected: - unsigned int mAudioFrequency, mNumAudioChannels; - TheoraAudioPacket* mTheoraAudioPacketQueue; - void _addAudioPacket(float* data, int numSamples); -public: - TheoraAudioPacketQueue(); - ~TheoraAudioPacketQueue(); - - float getAudioPacketQueueLength(); - void addAudioPacket(float** buffer, int numSamples, float gain); - void addAudioPacket(float* buffer, int numSamples, float gain); - TheoraAudioPacket* popAudioPacket(); - void destroyAudioPacket(TheoraAudioPacket* p); - void destroyAllAudioPackets(); - - void flushAudioPackets(TheoraAudioInterface* audioInterface); -}; - -#endif diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraDataSource.h b/drivers/theoraplayer/include/theoraplayer/TheoraDataSource.h deleted file mode 100644 index b7427e97d3..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraDataSource.h +++ /dev/null @@ -1,89 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _TheoraDataSource_h -#define _TheoraDataSource_h - -#include <stdio.h> -#include <string> -#include "TheoraExport.h" - -/** - This is a simple class that provides abstracted data feeding. You can use the - TheoraFileDataSource for regular file playback or you can implement your own - internet streaming solution, or a class that uses encrypted datafiles etc. - The sky is the limit -*/ -class TheoraPlayerExport TheoraDataSource -{ -public: - - virtual ~TheoraDataSource(); - /** - Reads nBytes bytes from data source and returns number of read bytes. - if function returns less bytes then nBytes, the system assumes EOF is reached. - */ - virtual int read(void* output,int nBytes)=0; - //! returns a string representation of the DataSource, eg 'File: source.ogg' - virtual std::string repr()=0; - //! position the source pointer to byte_index from the start of the source - virtual void seek(unsigned long byte_index)=0; - //! return the size of the stream in bytes - virtual unsigned long size()=0; - //! return the current position of the source pointer - virtual unsigned long tell()=0; -}; - - -/** - provides standard file IO -*/ -class TheoraPlayerExport TheoraFileDataSource : public TheoraDataSource -{ - FILE* mFilePtr; - std::string mFilename; - unsigned long mSize; - - void openFile(); -public: - TheoraFileDataSource(std::string filename); - ~TheoraFileDataSource(); - - int read(void* output,int nBytes); - void seek(unsigned long byte_index); - std::string repr() { return mFilename; } - unsigned long size(); - unsigned long tell(); - - std::string getFilename() { return mFilename; } -}; - -/** - Pre-loads the entire file and streams from memory. - Very useful if you're continuously displaying a video and want to avoid disk reads. - Not very practical for large files. -*/ -class TheoraPlayerExport TheoraMemoryFileDataSource : public TheoraDataSource -{ - std::string mFilename; - unsigned long mSize, mReadPointer; - unsigned char* mData; -public: - TheoraMemoryFileDataSource(unsigned char* data, long size, const std::string& filename = "memory"); - TheoraMemoryFileDataSource(std::string filename); - ~TheoraMemoryFileDataSource(); - - int read(void* output,int nBytes); - void seek(unsigned long byte_index); - std::string repr() { return "MEM:"+mFilename; } - unsigned long size(); - unsigned long tell(); - std::string getFilename() { return mFilename; } -}; - -#endif diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraException.h b/drivers/theoraplayer/include/theoraplayer/TheoraException.h deleted file mode 100644 index f79368fa1e..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraException.h +++ /dev/null @@ -1,46 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef EXCEPTION_H -#define EXCEPTION_H - -#include <string> -#include "TheoraExport.h" - -class TheoraPlayerExport _TheoraGenericException -{ -public: - std::string mErrText,mFile,mType; - int mLineNumber; - - _TheoraGenericException(const std::string& errorText, std::string type = "",std::string file = "", int line = 0); - virtual ~_TheoraGenericException() {} - - virtual std::string repr(); - - void writeOutput(); - - virtual const std::string& getErrorText() { return mErrText; } - - const std::string getType(){ return mType; } -}; - -#define TheoraGenericException(msg) _TheoraGenericException(msg, "TheoraGenericException", __FILE__, __LINE__) - - -#define exception_cls(name) class name : public _TheoraGenericException \ -{ \ -public: \ - name(const std::string& errorText,std::string type = "",std::string file = "",int line = 0) : \ - _TheoraGenericException(errorText, type, file, line){} \ -} - -exception_cls(_KeyException); - - -#endif diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraExport.h b/drivers/theoraplayer/include/theoraplayer/TheoraExport.h deleted file mode 100644 index cf16d1004c..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraExport.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _theoraVideoExport_h -#define _theoraVideoExport_h - - #ifdef _LIB - #define TheoraPlayerExport - #define TheoraPlayerFnExport - #else - #ifdef _WIN32 - #ifdef THEORAVIDEO_EXPORTS - #define TheoraPlayerExport __declspec(dllexport) - #define TheoraPlayerFnExport __declspec(dllexport) - #else - #define TheoraPlayerExport __declspec(dllimport) - #define TheoraPlayerFnExport __declspec(dllimport) - #endif - #else - #define TheoraPlayerExport __attribute__ ((visibility("default"))) - #define TheoraPlayerFnExport __attribute__ ((visibility("default"))) - #endif - #endif - #ifndef DEPRECATED_ATTRIBUTE - #ifdef _MSC_VER - #define DEPRECATED_ATTRIBUTE __declspec(deprecated("function is deprecated")) - #else - #define DEPRECATED_ATTRIBUTE __attribute__((deprecated)) - #endif - #endif - -#endif - diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraFrameQueue.h b/drivers/theoraplayer/include/theoraplayer/TheoraFrameQueue.h deleted file mode 100644 index fd985bb65a..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraFrameQueue.h +++ /dev/null @@ -1,95 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ - -#ifndef _TheoraFrameQueue_h -#define _TheoraFrameQueue_h - -#include "TheoraAsync.h" -#include <list> -#include "TheoraExport.h" - -class TheoraVideoFrame; -class TheoraVideoClip; - -/** - This class handles the frame queue. contains frames and handles their alloctation/deallocation - it is designed to be thread-safe -*/ -class TheoraPlayerExport TheoraFrameQueue -{ -protected: - std::list<TheoraVideoFrame*> mQueue; - TheoraVideoClip* mParent; - TheoraMutex mMutex; - - //! implementation function that returns a TheoraVideoFrame instance - TheoraVideoFrame* createFrameInstance(TheoraVideoClip* clip); -public: - TheoraFrameQueue(TheoraVideoClip* parent); - ~TheoraFrameQueue(); - - /** - \brief Returns the first available frame in the queue or NULL if no frames are available. - - This function DOES NOT remove the frame from the queue, you have to do it manually - when you want to mark the frame as used by calling the pop() function. - */ - TheoraVideoFrame* getFirstAvailableFrame(); - //! non-mutex version - TheoraVideoFrame* _getFirstAvailableFrame(); - - //! return the number of used (not ready) frames - int getUsedCount(); - - //! return the number of ready frames - int getReadyCount(); - //! non-mutex version - int _getReadyCount(); - - /** - \brief remove the first N available frame from the queue. - - Use this every time you display a frame so you can get the next one when the time comes. - This function marks the frame on the front of the queue as unused and it's memory then - get's used again in the decoding process. - If you don't call this, the frame queue will fill up with precached frames up to the - specified amount in the TheoraVideoManager class and you won't be able to advance the video. - */ - void pop(int n = 1); - - //! This is an internal _pop function. use externally only in combination with lock() / unlock() calls - void _pop(int n); - - //! frees all decoded frames for reuse (does not destroy memory, just marks them as free) - void clear(); - //! Called by WorkerThreads when they need to unload frame data, do not call directly! - TheoraVideoFrame* requestEmptyFrame(); - - /** - \brief set's the size of the frame queue. - - Beware, currently stored ready frames will be lost upon this call - */ - void setSize(int n); - //! return the size of the queue - int getSize(); - - //! return whether all frames in the queue are ready for display - bool isFull(); - - //! lock the queue's mutex manually - void lock(); - //! unlock the queue's mutex manually - void unlock(); - - //! returns the internal frame queue. Warning: Always lock / unlock queue's mutex before accessing frames directly! - std::list<TheoraVideoFrame*>& _getFrameQueue(); -}; - -#endif diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraPixelTransform.h b/drivers/theoraplayer/include/theoraplayer/TheoraPixelTransform.h deleted file mode 100644 index 73d853cd03..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraPixelTransform.h +++ /dev/null @@ -1,18 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _TheoraPixelTransform_h -#define _TheoraPixelTransform_h - -struct TheoraPixelTransform -{ - unsigned char *raw, *y, *u, *v, *out; - unsigned int w, h, rawStride, yStride, uStride, vStride; -}; - -#endif diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraPlayer.h b/drivers/theoraplayer/include/theoraplayer/TheoraPlayer.h deleted file mode 100644 index 8c5f2c735c..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraPlayer.h +++ /dev/null @@ -1,17 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _TheoraPlayer_h -#define _TheoraPlayer_h - -#include "TheoraVideoManager.h" -#include "TheoraVideoClip.h" -#include "TheoraVideoFrame.h" - -#endif - diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraTimer.h b/drivers/theoraplayer/include/theoraplayer/TheoraTimer.h deleted file mode 100644 index 14fdbf47fc..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraTimer.h +++ /dev/null @@ -1,69 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ - -#ifndef _TheoraTimer_h -#define _TheoraTimer_h - -#include "TheoraExport.h" - -/** - This is a Timer object, it is used to control the playback of a TheoraVideoClip. - - You can inherit this class and make a timer that eg. plays twice as fast, - or playbacks an audio track and uses it's time offset for syncronizing Video etc. - */ -class TheoraPlayerExport TheoraTimer -{ -protected: - //! Current time in seconds - float mTime,mSpeed; - //! Is the timer paused or not - bool mPaused; -public: - TheoraTimer(); - virtual ~TheoraTimer(); - - virtual float getTime(); - /** - \brief advance the time. - - If you're using another synronization system, eg. an audio track, - then you can ignore this call or use it to perform other updates. - - NOTE: this is called by TheoraVideoManager from the main thread - */ - virtual void update(float timeDelta); - - virtual void pause(); - virtual void play(); - virtual bool isPaused(); - virtual void stop(); - /** - \brief set's playback speed - - 1.0 is the default. The speed factor multiplies time advance, thus - setting the value higher will increase playback speed etc. - - NOTE: depending on Timer implementation, it may not support setting the speed - - */ - virtual void setSpeed(float speed); - //! return the update speed 1.0 is the default - virtual float getSpeed(); - - /** - \brief change the current time. - - if you're using another syncronization mechanism, make sure to adjust - the time offset there - */ - virtual void seek(float time); -}; -#endif - diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraUtil.h b/drivers/theoraplayer/include/theoraplayer/TheoraUtil.h deleted file mode 100644 index f168971ac7..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraUtil.h +++ /dev/null @@ -1,32 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _TheoraUtil_h -#define _TheoraUtil_h - -#include <string> -#include <vector> - -#ifndef THEORAUTIL_NOMACROS - -#define foreach(type,lst) for (std::vector<type>::iterator it=lst.begin();it != lst.end(); ++it) -#define foreach_l(type,lst) for (std::list<type>::iterator it=lst.begin();it != lst.end(); ++it) -#define foreach_r(type,lst) for (std::vector<type>::reverse_iterator it=lst.rbegin();it != lst.rend(); ++it) -#define foreach_in_map(type,lst) for (std::map<std::string,type>::iterator it=lst.begin();it != lst.end(); ++it) - -#endif - -#define th_writelog(x) TheoraVideoManager::getSingleton().logMessage(x) - - -std::string str(int i); -std::string strf(float i); -void _psleep(int milliseconds); -int _nextPow2(int x); - -#endif
\ No newline at end of file diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraVideoClip.h b/drivers/theoraplayer/include/theoraplayer/TheoraVideoClip.h deleted file mode 100644 index fe71cf8566..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraVideoClip.h +++ /dev/null @@ -1,282 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ - -#ifndef _TheoraVideoClip_h -#define _TheoraVideoClip_h - -#include <string> -#include "TheoraExport.h" - -// forward class declarations -class TheoraMutex; -class TheoraFrameQueue; -class TheoraTimer; -class TheoraAudioInterface; -class TheoraWorkerThread; -class TheoraDataSource; -class TheoraVideoFrame; - -/** - format of the TheoraVideoFrame pixels. Affects decoding time - */ -enum TheoraOutputMode -{ - // A = full alpha (255), order of letters represents the byte order for a pixel - // A means the image is treated as if it contains an alpha channel, while X formats - // just mean that RGB frame is transformed to a 4 byte format - TH_UNDEFINED = 0, - TH_RGB = 1, - TH_RGBA = 2, - TH_RGBX = 3, - TH_ARGB = 4, - TH_XRGB = 5, - TH_BGR = 6, - TH_BGRA = 7, - TH_BGRX = 8, - TH_ABGR = 9, - TH_XBGR = 10, - TH_GREY = 11, - TH_GREY3 = 12, - TH_GREY3A = 13, - TH_GREY3X = 14, - TH_AGREY3 = 15, - TH_XGREY3 = 16, - TH_YUV = 17, - TH_YUVA = 18, - TH_YUVX = 19, - TH_AYUV = 20, - TH_XYUV = 21 -}; - -/** - This object contains all data related to video playback, eg. the open source file, - the frame queue etc. -*/ -class TheoraPlayerExport TheoraVideoClip -{ - friend class TheoraWorkerThread; - friend class TheoraVideoFrame; - friend class TheoraVideoManager; -protected: - TheoraFrameQueue* mFrameQueue; - TheoraAudioInterface* mAudioInterface; - TheoraDataSource* mStream; - - TheoraTimer *mTimer, *mDefaultTimer; - - TheoraWorkerThread* mAssignedWorkerThread; - - bool mUseAlpha; - - bool mWaitingForCache; - - // benchmark vars - int mNumDroppedFrames, mNumDisplayedFrames, mNumPrecachedFrames; - - int mThreadAccessCount; //! counter used by TheoraVideoManager to schedule workload - - int mSeekFrame; //! stores desired seek position as a frame number. next worker thread will do the seeking and reset this var to -1 - float mDuration, mFrameDuration, mFPS; - float mPriority; //! User assigned priority. Default value is 1 - std::string mName; - int mWidth, mHeight, mStride; - int mNumFrames; - int audio_track; - - int mSubFrameWidth, mSubFrameHeight, mSubFrameOffsetX, mSubFrameOffsetY; - float mAudioGain; //! multiplier for audio samples. between 0 and 1 - - TheoraOutputMode mOutputMode, mRequestedOutputMode; - bool mFirstFrameDisplayed; - bool mAutoRestart; - bool mEndOfFile, mRestarted; - int mIteration, mPlaybackIteration; //! used to ensure smooth playback of looping videos - - TheoraMutex* mAudioMutex; //! syncs audio decoding and extraction - TheoraMutex* mThreadAccessMutex; - - /** - * Get the priority of a video clip. based on a forumula that includes user - * priority factor, whether the video is paused or not, how many precached - * frames it has etc. - * This function is used in TheoraVideoManager to efficiently distribute job - * assignments among worker threads - * @return priority number of this video clip - */ - int calculatePriority(); - void readTheoraVorbisHeaders(); - virtual void doSeek() = 0; //! called by WorkerThread to seek to mSeekFrame - virtual bool _readData() = 0; - bool isBusy(); - - /** - * decodes audio from the vorbis stream and stores it in audio packets - * This is an internal function of TheoraVideoClip, called regularly if playing an - * audio enabled video clip. - * @return last decoded timestamp (if found in decoded packet's granule position) - */ - virtual float decodeAudio() = 0; - - int _getNumReadyFrames(); - void resetFrameQueue(); - int discardOutdatedFrames(float absTime); - float getAbsPlaybackTime(); - virtual void load(TheoraDataSource* source) = 0; - - virtual void _restart() = 0; // resets the decoder and stream but leaves the frame queue intact -public: - TheoraVideoClip(TheoraDataSource* data_source, - TheoraOutputMode output_mode, - int nPrecachedFrames, - bool usePower2Stride); - virtual ~TheoraVideoClip(); - - std::string getName(); - //! Returns the string name of the decoder backend (eg. Theora, AVFoundation) - virtual std::string getDecoderName() = 0; - - //! benchmark function - int getNumDisplayedFrames() { return mNumDisplayedFrames; } - //! benchmark function - int getNumDroppedFrames() { return mNumDroppedFrames; } - - //! return width in pixels of the video clip - int getWidth(); - //! return height in pixels of the video clip - int getHeight(); - - //! Width of the actual picture inside a video frame (depending on implementation, this may be equal to mWidth or differ within a codec block size (usually 16)) - int getSubFrameWidth(); - //! Height of the actual picture inside a video frame (depending on implementation, this may be equal to mHeight or differ within a codec block size (usually 16)) - int getSubFrameHeight(); - //! X Offset of the actual picture inside a video frame (depending on implementation, this may be 0 or within a codec block size (usually 16)) - int getSubFrameOffsetX(); - //! Y Offset of the actual picture inside a video frame (depending on implementation, this may be 0 or differ within a codec block size (usually 16)) - int getSubFrameOffsetY(); - /** - \brief return stride in pixels - - If you've specified usePower2Stride when creating the TheoraVideoClip object - then this value will be the next power of two size compared to width, - eg: w=376, stride=512. - - Otherwise, stride will be equal to width - */ - int getStride() { return mStride; } - - //! retur the timer objet associated with this object - TheoraTimer* getTimer(); - //! replace the timer object with a new one - void setTimer(TheoraTimer* timer); - - //! used by TheoraWorkerThread, do not call directly - virtual bool decodeNextFrame() = 0; - - //! advance time. TheoraVideoManager calls this - void update(float timeDelta); - /** - \brief update timer to the display time of the next frame - - useful if you want to grab frames instead of regular display - \return time advanced. 0 if no frames are ready - */ - float updateToNextFrame(); - - - TheoraFrameQueue* getFrameQueue(); - - /** - \brief pop the frame from the front of the FrameQueue - - see TheoraFrameQueue::pop() for more details - */ - void popFrame(); - - /** - \brief Returns the first available frame in the queue or NULL if no frames are available. - - see TheoraFrameQueue::getFirstAvailableFrame() for more details - */ - TheoraVideoFrame* getNextFrame(); - /** - check if there is enough audio data decoded to submit to the audio interface - - TheoraWorkerThread calls this - */ - virtual void decodedAudioCheck() = 0; - - void setAudioInterface(TheoraAudioInterface* iface); - TheoraAudioInterface* getAudioInterface(); - - /** - \brief resize the frame queues - - Warning: this call discards ready frames in the frame queue - */ - void setNumPrecachedFrames(int n); - //! returns the size of the frame queue - int getNumPrecachedFrames(); - //! returns the number of ready frames in the frame queue - int getNumReadyFrames(); - - //! if you want to adjust the audio gain. range [0,1] - void setAudioGain(float gain); - float getAudioGain(); - - //! if you want the video to automatically and smoothly restart when the last frame is reached - void setAutoRestart(bool value); - bool getAutoRestart() { return mAutoRestart; } - - - void set_audio_track(int p_track) { audio_track=p_track; } - - /** - TODO: user priority. Useful only when more than one video is being decoded - */ - void setPriority(float priority); - float getPriority(); - - //! Used by TheoraVideoManager to schedule work - float getPriorityIndex(); - - //! get the current time index from the timer object - float getTimePosition(); - //! get the duration of the movie in seconds - float getDuration(); - //! return the clips' frame rate, warning, fps can be a non integer number! - float getFPS(); - //! get the number of frames in this movie - int getNumFrames() { return mNumFrames; } - - //! return the current output mode for this video object - TheoraOutputMode getOutputMode(); - /** - set a new output mode - - Warning: this discards the frame queue. ready frames will be lost. - */ - void setOutputMode(TheoraOutputMode mode); - - bool isDone(); - void play(); - void pause(); - void restart(); - bool isPaused(); - void stop(); - void setPlaybackSpeed(float speed); - float getPlaybackSpeed(); - //! seek to a given time position - void seek(float time); - //! seek to a given frame number - void seekToFrame(int frame); - //! wait max_time for the clip to cache a given percentage of frames, factor in range [0,1] - void waitForCache(float desired_cache_factor = 0.5f, float max_wait_time = 1.0f); -}; - -#endif diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraVideoFrame.h b/drivers/theoraplayer/include/theoraplayer/TheoraVideoFrame.h deleted file mode 100644 index 5d27f54d1c..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraVideoFrame.h +++ /dev/null @@ -1,56 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _TheoraVideoFrame_h -#define _TheoraVideoFrame_h - -#include "TheoraExport.h" -#include "TheoraVideoClip.h" - -struct TheoraPixelTransform; -/** - -*/ -class TheoraPlayerExport TheoraVideoFrame -{ -protected: - TheoraVideoClip* mParent; - unsigned char* mBuffer; - unsigned long mFrameNumber; -public: - //! global time in seconds this frame should be displayed on - float mTimeToDisplay; - //! whether the frame is ready for display or not - bool mReady; - //! indicates the frame is being used by TheoraWorkerThread instance - bool mInUse; - //! used to keep track of linear time in looping videos - int mIteration; - - int mBpp; - - TheoraVideoFrame(TheoraVideoClip* parent); - virtual ~TheoraVideoFrame(); - - //! internal function, do not use directly - void _setFrameNumber(unsigned long number) { mFrameNumber = number; } - //! returns the frame number of this frame in the theora stream - unsigned long getFrameNumber() { return mFrameNumber; } - - void clear(); - - int getWidth(); - int getStride(); - int getHeight(); - - unsigned char* getBuffer(); - - //! Called by TheoraVideoClip to decode a source buffer onto itself - virtual void decode(struct TheoraPixelTransform* t); -}; -#endif diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraVideoManager.h b/drivers/theoraplayer/include/theoraplayer/TheoraVideoManager.h deleted file mode 100644 index d94c51b4d4..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraVideoManager.h +++ /dev/null @@ -1,110 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ - -#ifndef _TheoraVideoManager_h -#define _TheoraVideoManager_h - -#include <vector> -#include <list> -#include <string> -#include "TheoraExport.h" -#include "TheoraVideoClip.h" -#ifdef _WIN32 -#pragma warning( disable: 4251 ) // MSVC++ -#endif -// forward class declarations -class TheoraWorkerThread; -class TheoraMutex; -class TheoraDataSource; -class TheoraAudioInterfaceFactory; -/** - This is the main singleton class that handles all playback/sync operations -*/ -class TheoraPlayerExport TheoraVideoManager -{ -protected: - friend class TheoraWorkerThread; - typedef std::vector<TheoraVideoClip*> ClipList; - typedef std::vector<TheoraWorkerThread*> ThreadList; - - //! stores pointers to worker threads which are decoding video and audio - ThreadList mWorkerThreads; - //! stores pointers to created video clips - ClipList mClips; - - //! stores pointer to clips that were docoded in the past in order to achieve fair scheduling - std::list<TheoraVideoClip*> mWorkLog; - - int mDefaultNumPrecachedFrames; - - TheoraMutex* mWorkMutex; - TheoraAudioInterfaceFactory* mAudioFactory; - - void createWorkerThreads(int n); - void destroyWorkerThreads(); - - float calcClipWorkTime(TheoraVideoClip* clip); - - /** - * Called by TheoraWorkerThread to request a TheoraVideoClip instance to work on decoding - */ - TheoraVideoClip* requestWork(TheoraWorkerThread* caller); -public: - TheoraVideoManager(int num_worker_threads=1); - virtual ~TheoraVideoManager(); - - //! get the global reference to the manager instance - static TheoraVideoManager& getSingleton(); - //! get the global pointer to the manager instance - static TheoraVideoManager* getSingletonPtr(); - - //! search registered clips by name - TheoraVideoClip* getVideoClipByName(std::string name); - - TheoraVideoClip* createVideoClip(std::string filename,TheoraOutputMode output_mode=TH_RGB,int numPrecachedOverride=0,bool usePower2Stride=0, int p_track=0); - TheoraVideoClip* createVideoClip(TheoraDataSource* data_source,TheoraOutputMode output_mode=TH_RGB,int numPrecachedOverride=0,bool usePower2Stride=0, int p_audio_track=0); - - void update(float timeDelta); - - void destroyVideoClip(TheoraVideoClip* clip); - - void setAudioInterfaceFactory(TheoraAudioInterfaceFactory* factory); - TheoraAudioInterfaceFactory* getAudioInterfaceFactory(); - - int getNumWorkerThreads(); - void setNumWorkerThreads(int n); - - void setDefaultNumPrecachedFrames(int n) { mDefaultNumPrecachedFrames=n; } - int getDefaultNumPrecachedFrames() { return mDefaultNumPrecachedFrames; } - - //! used by libtheoraplayer functions - void logMessage(std::string msg); - - /** - \brief you can set your own log function to recieve theora's log calls - - This way you can integrate libtheoraplayer's log messages in your own - logging system, prefix them, mute them or whatever you want - */ - static void setLogFunction(void (*fn)(std::string)); - - //! get nicely formated version string - std::string getVersionString(); - /** - \brief get version numbers - - if c is negative, it means it's a release candidate -c - */ - void getVersion(int* a,int* b,int* c); - - //! returns the supported decoders (eg. Theora, AVFoundation...) - std::vector<std::string> getSupportedDecoders(); -}; -#endif - diff --git a/drivers/theoraplayer/include/theoraplayer/TheoraWorkerThread.h b/drivers/theoraplayer/include/theoraplayer/TheoraWorkerThread.h deleted file mode 100644 index 2299acedbd..0000000000 --- a/drivers/theoraplayer/include/theoraplayer/TheoraWorkerThread.h +++ /dev/null @@ -1,32 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _TheoraWorkerThread_h -#define _TheoraWorkerThread_h - -#include "TheoraAsync.h" - -class TheoraVideoClip; - -/** - This is the worker thread, requests work from TheoraVideoManager - and decodes assigned TheoraVideoClip objects -*/ -class TheoraWorkerThread : public TheoraThread -{ - TheoraVideoClip* mClip; -public: - TheoraWorkerThread(); - ~TheoraWorkerThread(); - - TheoraVideoClip* getAssignedClip() { return mClip; } - - //! Main Thread Body - do not call directly! - void execute(); -}; -#endif diff --git a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.h b/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.h deleted file mode 100644 index abd898aa01..0000000000 --- a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.h +++ /dev/null @@ -1,47 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#if defined(__AVFOUNDATION) && !defined(_TheoraVideoClip_AVFoundation_h) -#define _TheoraVideoClip_AVFoundation_h - -#include "TheoraAudioPacketQueue.h" -#include "TheoraVideoClip.h" - -#ifndef AVFOUNDATION_CLASSES_DEFINED -class AVAssetReader; -class AVAssetReaderTrackOutput; -#endif - -class TheoraVideoClip_AVFoundation : public TheoraVideoClip, public TheoraAudioPacketQueue -{ -protected: - bool mLoaded; - int mFrameNumber; - AVAssetReader* mReader; - AVAssetReaderTrackOutput *mOutput, *mAudioOutput; - unsigned int mReadAudioSamples; - - void unload(); - void doSeek(); -public: - TheoraVideoClip_AVFoundation(TheoraDataSource* data_source, - TheoraOutputMode output_mode, - int nPrecachedFrames, - bool usePower2Stride); - ~TheoraVideoClip_AVFoundation(); - - bool _readData(); - bool decodeNextFrame(); - void _restart(); - void load(TheoraDataSource* source); - float decodeAudio(); - void decodedAudioCheck(); - std::string getDecoderName() { return "AVFoundation"; } -}; - -#endif diff --git a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.mm b/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.mm deleted file mode 100644 index 1b5cf0ab13..0000000000 --- a/drivers/theoraplayer/src/AVFoundation/TheoraVideoClip_AVFoundation.mm +++ /dev/null @@ -1,457 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifdef __AVFOUNDATION -#define AVFOUNDATION_CLASSES_DEFINED -#import <AVFoundation/AVFoundation.h> -#include "TheoraAudioInterface.h" -#include "TheoraDataSource.h" -#include "TheoraException.h" -#include "TheoraTimer.h" -#include "TheoraUtil.h" -#include "TheoraFrameQueue.h" -#include "TheoraVideoFrame.h" -#include "TheoraVideoManager.h" -#include "TheoraVideoClip_AVFoundation.h" -#include "TheoraPixelTransform.h" - -#ifdef _AVFOUNDATION_BGRX -// a fast function developed to use kernel byte swapping calls to optimize alpha decoding. -// In AVFoundation, BGRX mode conversion is prefered to YUV conversion because apple's YUV -// conversion on iOS seems to run faster than libtheoraplayer's implementation -// This may change in the future with more optimizations to libtheoraplayers's YUV conversion -// code, making this function obsolete. -static void bgrx2rgba(unsigned char* dest, int w, int h, struct TheoraPixelTransform* t) -{ - unsigned register int a; - unsigned int *dst = (unsigned int*) dest, *dstEnd; - unsigned char* src = t->raw; - int y, x, ax; - - for (y = 0; y < h; ++y, src += t->rawStride) - { - for (x = 0, ax = w * 4, dstEnd = dst + w; dst != dstEnd; x += 4, ax += 4, ++dst) - { - // use the full alpha range here because the Y channel has already been converted - // to RGB and that's in [0, 255] range. - a = src[ax]; - *dst = (OSReadSwapInt32(src, x) >> 8) | (a << 24); - } - } -} -#endif - -static CVPlanarPixelBufferInfo_YCbCrPlanar getYUVStruct(void* src) -{ - CVPlanarPixelBufferInfo_YCbCrPlanar* bigEndianYuv = (CVPlanarPixelBufferInfo_YCbCrPlanar*) src; - CVPlanarPixelBufferInfo_YCbCrPlanar yuv; - yuv.componentInfoY.offset = OSSwapInt32(bigEndianYuv->componentInfoY.offset); - yuv.componentInfoY.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoY.rowBytes); - yuv.componentInfoCb.offset = OSSwapInt32(bigEndianYuv->componentInfoCb.offset); - yuv.componentInfoCb.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoCb.rowBytes); - yuv.componentInfoCr.offset = OSSwapInt32(bigEndianYuv->componentInfoCr.offset); - yuv.componentInfoCr.rowBytes = OSSwapInt32(bigEndianYuv->componentInfoCr.rowBytes); - return yuv; -} - -TheoraVideoClip_AVFoundation::TheoraVideoClip_AVFoundation(TheoraDataSource* data_source, - TheoraOutputMode output_mode, - int nPrecachedFrames, - bool usePower2Stride): - TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride), - TheoraAudioPacketQueue() -{ - mLoaded = 0; - mReader = NULL; - mOutput = mAudioOutput = NULL; - mReadAudioSamples = mAudioFrequency = mNumAudioChannels = 0; -} - -TheoraVideoClip_AVFoundation::~TheoraVideoClip_AVFoundation() -{ - unload(); -} - -void TheoraVideoClip_AVFoundation::unload() -{ - if (mOutput != NULL || mAudioOutput != NULL || mReader != NULL) - { - NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; - - if (mOutput != NULL) - { - [mOutput release]; - mOutput = NULL; - } - - if (mAudioOutput) - { - [mAudioOutput release]; - mAudioOutput = NULL; - } - - if (mReader != NULL) - { - [mReader release]; - mReader = NULL; - } - - [pool release]; - } -} - -bool TheoraVideoClip_AVFoundation::_readData() -{ - return 1; -} - -bool TheoraVideoClip_AVFoundation::decodeNextFrame() -{ - if (mReader == NULL || mEndOfFile) return 0; - AVAssetReaderStatus status = [mReader status]; - if (status == AVAssetReaderStatusFailed) - { - // This can happen on iOS when you suspend the app... Only happens on the device, iOS simulator seems to work fine. - th_writelog("AVAssetReader reading failed, restarting..."); - - mSeekFrame = mTimer->getTime() * mFPS; - // just in case - if (mSeekFrame < 0) mSeekFrame = 0; - if (mSeekFrame > mDuration * mFPS - 1) mSeekFrame = mDuration * mFPS - 1; - _restart(); - status = [mReader status]; - if (status == AVAssetReaderStatusFailed) - { - th_writelog("AVAssetReader restart failed!"); - return 0; - } - th_writelog("AVAssetReader restart succeeded!"); - } - - TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame(); - if (!frame) return 0; - - CMSampleBufferRef sampleBuffer = NULL; - NSAutoreleasePool* pool = NULL; - CMTime presentationTime; - - if (mAudioInterface) decodeAudio(); - - if (status == AVAssetReaderStatusReading) - { - pool = [[NSAutoreleasePool alloc] init]; - - while ((sampleBuffer = [mOutput copyNextSampleBuffer])) - { - presentationTime = CMSampleBufferGetOutputPresentationTimeStamp(sampleBuffer); - frame->mTimeToDisplay = (float) CMTimeGetSeconds(presentationTime); - frame->mIteration = mIteration; - frame->_setFrameNumber(mFrameNumber); - ++mFrameNumber; - if (frame->mTimeToDisplay < mTimer->getTime() && !mRestarted && mFrameNumber % 16 != 0) - { - // %16 operation is here to prevent a playback halt during video playback if the decoder can't keep up with demand. -#ifdef _DEBUG - th_writelog(mName + ": pre-dropped frame " + str(mFrameNumber - 1)); -#endif - ++mNumDisplayedFrames; - ++mNumDroppedFrames; - CMSampleBufferInvalidate(sampleBuffer); - CFRelease(sampleBuffer); - sampleBuffer = NULL; - continue; // drop frame - } - - CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer); - CVPixelBufferLockBaseAddress(imageBuffer, 0); - void *baseAddress = CVPixelBufferGetBaseAddress(imageBuffer); - - mStride = CVPixelBufferGetBytesPerRow(imageBuffer); - size_t width = CVPixelBufferGetWidth(imageBuffer); - size_t height = CVPixelBufferGetHeight(imageBuffer); - - TheoraPixelTransform t; - memset(&t, 0, sizeof(TheoraPixelTransform)); -#ifdef _AVFOUNDATION_BGRX - if (mOutputMode == TH_BGRX || mOutputMode == TH_RGBA) - { - t.raw = (unsigned char*) baseAddress; - t.rawStride = mStride; - } - else -#endif - { - CVPlanarPixelBufferInfo_YCbCrPlanar yuv = getYUVStruct(baseAddress); - - t.y = (unsigned char*) baseAddress + yuv.componentInfoY.offset; t.yStride = yuv.componentInfoY.rowBytes; - t.u = (unsigned char*) baseAddress + yuv.componentInfoCb.offset; t.uStride = yuv.componentInfoCb.rowBytes; - t.v = (unsigned char*) baseAddress + yuv.componentInfoCr.offset; t.vStride = yuv.componentInfoCr.rowBytes; - } -#ifdef _AVFOUNDATION_BGRX - if (mOutputMode == TH_RGBA) - { - for (int i = 0; i < 1000; ++i) - bgrx2rgba(frame->getBuffer(), mWidth / 2, mHeight, &t); - frame->mReady = true; - } - else -#endif - frame->decode(&t); - - CVPixelBufferUnlockBaseAddress(imageBuffer, 0); - CMSampleBufferInvalidate(sampleBuffer); - CFRelease(sampleBuffer); - - break; // TODO - should this really be a while loop instead of an if block? - } - } - if (pool) [pool release]; - - if (!frame->mReady) // in case the frame wasn't used - { - frame->mInUse = 0; - } - - if (sampleBuffer == NULL && mReader.status == AVAssetReaderStatusCompleted) // other cases could be app suspended - { - if (mAutoRestart) - { - ++mIteration; - _restart(); - } - else - { - unload(); - mEndOfFile = true; - } - return 0; - } - - - return 1; -} - -void TheoraVideoClip_AVFoundation::_restart() -{ - mEndOfFile = false; - unload(); - load(mStream); - mRestarted = true; -} - -void TheoraVideoClip_AVFoundation::load(TheoraDataSource* source) -{ - mStream = source; - mFrameNumber = 0; - mEndOfFile = false; - TheoraFileDataSource* fileDataSource = dynamic_cast<TheoraFileDataSource*>(source); - std::string filename; - if (fileDataSource != NULL) filename = fileDataSource->getFilename(); - else - { - TheoraMemoryFileDataSource* memoryDataSource = dynamic_cast<TheoraMemoryFileDataSource*>(source); - if (memoryDataSource != NULL) filename = memoryDataSource->getFilename(); - else throw TheoraGenericException("Unable to load MP4 file"); - } - - NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init]; - NSString* path = [NSString stringWithUTF8String:filename.c_str()]; - NSError* err; - NSURL *url = [NSURL fileURLWithPath:path]; - AVAsset* asset = [[AVURLAsset alloc] initWithURL:url options:nil]; - mReader = [[AVAssetReader alloc] initWithAsset:asset error:&err]; - NSArray* tracks = [asset tracksWithMediaType:AVMediaTypeVideo]; - if ([tracks count] == 0) - throw TheoraGenericException("Unable to open video file: " + filename); - AVAssetTrack *videoTrack = [tracks objectAtIndex:0]; - - NSArray* audioTracks = [asset tracksWithMediaType:AVMediaTypeAudio]; - if (audio_track >= audioTracks.count) - audio_track = 0; - AVAssetTrack *audioTrack = audioTracks.count > 0 ? [audioTracks objectAtIndex:audio_track] : NULL; - printf("*********** using audio track %i\n", audio_track); - -#ifdef _AVFOUNDATION_BGRX - bool yuv_output = (mOutputMode != TH_BGRX && mOutputMode != TH_RGBA); -#else - bool yuv_output = true; -#endif - - NSDictionary *videoOptions = [NSDictionary dictionaryWithObjectsAndKeys:[NSNumber numberWithInt:(yuv_output) ? kCVPixelFormatType_420YpCbCr8Planar : kCVPixelFormatType_32BGRA], kCVPixelBufferPixelFormatTypeKey, nil]; - - mOutput = [[AVAssetReaderTrackOutput alloc] initWithTrack:videoTrack outputSettings:videoOptions]; - [mReader addOutput:mOutput]; - if ([mOutput respondsToSelector:@selector(setAlwaysCopiesSampleData:)]) // Not supported on iOS versions older than 5.0 - mOutput.alwaysCopiesSampleData = NO; - - mFPS = videoTrack.nominalFrameRate; - mWidth = mSubFrameWidth = mStride = videoTrack.naturalSize.width; - mHeight = mSubFrameHeight = videoTrack.naturalSize.height; - mFrameDuration = 1.0f / mFPS; - mDuration = (float) CMTimeGetSeconds(asset.duration); - if (mFrameQueue == NULL) - { - mFrameQueue = new TheoraFrameQueue(this); - mFrameQueue->setSize(mNumPrecachedFrames); - } - - if (mSeekFrame != -1) - { - mFrameNumber = mSeekFrame; - [mReader setTimeRange: CMTimeRangeMake(CMTimeMakeWithSeconds(mSeekFrame / mFPS, 1), kCMTimePositiveInfinity)]; - } - if (audioTrack) - { - TheoraAudioInterfaceFactory* audio_factory = TheoraVideoManager::getSingleton().getAudioInterfaceFactory(); - if (audio_factory) - { - NSDictionary *audioOptions = [NSDictionary dictionaryWithObjectsAndKeys: - [NSNumber numberWithInt:kAudioFormatLinearPCM], AVFormatIDKey, - [NSNumber numberWithBool:NO], AVLinearPCMIsNonInterleaved, - [NSNumber numberWithBool:NO], AVLinearPCMIsBigEndianKey, - [NSNumber numberWithBool:YES], AVLinearPCMIsFloatKey, - [NSNumber numberWithInt:32], AVLinearPCMBitDepthKey, - nil]; - - mAudioOutput = [[AVAssetReaderTrackOutput alloc] initWithTrack:audioTrack outputSettings:audioOptions]; - [mReader addOutput:mAudioOutput]; - if ([mAudioOutput respondsToSelector:@selector(setAlwaysCopiesSampleData:)]) // Not supported on iOS versions older than 5.0 - mAudioOutput.alwaysCopiesSampleData = NO; - - NSArray* desclst = audioTrack.formatDescriptions; - CMAudioFormatDescriptionRef desc = (CMAudioFormatDescriptionRef) [desclst objectAtIndex:0]; - const AudioStreamBasicDescription* audioDesc = CMAudioFormatDescriptionGetStreamBasicDescription(desc); - mAudioFrequency = (unsigned int) audioDesc->mSampleRate; - mNumAudioChannels = audioDesc->mChannelsPerFrame; - - if (mSeekFrame != -1) - { - mReadAudioSamples = mFrameNumber * (mAudioFrequency * mNumAudioChannels) / mFPS; - } - else mReadAudioSamples = 0; - - if (mAudioInterface == NULL) - setAudioInterface(audio_factory->createInstance(this, mNumAudioChannels, mAudioFrequency)); - } - } - -#ifdef _DEBUG - else if (!mLoaded) - { - th_writelog("-----\nwidth: " + str(mWidth) + ", height: " + str(mHeight) + ", fps: " + str((int) getFPS())); - th_writelog("duration: " + strf(mDuration) + " seconds\n-----"); - } -#endif - [mReader startReading]; - [pool release]; - mLoaded = true; -} - -void TheoraVideoClip_AVFoundation::decodedAudioCheck() -{ - if (!mAudioInterface || mTimer->isPaused()) return; - - mAudioMutex->lock(); - flushAudioPackets(mAudioInterface); - mAudioMutex->unlock(); -} - -float TheoraVideoClip_AVFoundation::decodeAudio() -{ - if (mRestarted) return -1; - - if (mReader == NULL || mEndOfFile) return 0; - AVAssetReaderStatus status = [mReader status]; - - if (mAudioOutput) - { - CMSampleBufferRef sampleBuffer = NULL; - NSAutoreleasePool* pool = NULL; - bool mutexLocked = 0; - - float factor = 1.0f / (mAudioFrequency * mNumAudioChannels); - float videoTime = (float) mFrameNumber / mFPS; - float min = mFrameQueue->getSize() / mFPS + 1.0f; - - if (status == AVAssetReaderStatusReading) - { - pool = [[NSAutoreleasePool alloc] init]; - - // always buffer up of audio ahead of the frames - while (mReadAudioSamples * factor - videoTime < min) - { - if ((sampleBuffer = [mAudioOutput copyNextSampleBuffer])) - { - AudioBufferList audioBufferList; - - CMBlockBufferRef blockBuffer = NULL; - CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer(sampleBuffer, NULL, &audioBufferList, sizeof(audioBufferList), NULL, NULL, 0, &blockBuffer); - - for (int y = 0; y < audioBufferList.mNumberBuffers; ++y) - { - AudioBuffer audioBuffer = audioBufferList.mBuffers[y]; - float *frame = (float*) audioBuffer.mData; - - if (!mutexLocked) - { - mAudioMutex->lock(); - mutexLocked = 1; - } - addAudioPacket(frame, audioBuffer.mDataByteSize / (mNumAudioChannels * sizeof(float)), mAudioGain); - - mReadAudioSamples += audioBuffer.mDataByteSize / (sizeof(float)); - } - - CFRelease(blockBuffer); - CMSampleBufferInvalidate(sampleBuffer); - CFRelease(sampleBuffer); - } - else - { - [mAudioOutput release]; - mAudioOutput = nil; - break; - } - } - [pool release]; - } - if (mutexLocked) mAudioMutex->unlock(); - } - - return -1; -} - -void TheoraVideoClip_AVFoundation::doSeek() -{ -#if _DEBUG - th_writelog(mName + " [seek]: seeking to frame " + str(mSeekFrame)); -#endif - int frame; - float time = mSeekFrame / getFPS(); - mTimer->seek(time); - bool paused = mTimer->isPaused(); - if (!paused) mTimer->pause(); // pause until seeking is done - - mEndOfFile = false; - mRestarted = false; - - resetFrameQueue(); - unload(); - load(mStream); - - if (mAudioInterface) - { - mAudioMutex->lock(); - destroyAllAudioPackets(); - mAudioMutex->unlock(); - } - - if (!paused) mTimer->play(); - mSeekFrame = -1; -} -#endif diff --git a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.cpp b/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.cpp deleted file mode 100644 index fa3fd43a47..0000000000 --- a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.cpp +++ /dev/null @@ -1,439 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifdef __FFMPEG -#include "TheoraAudioInterface.h" -#include "TheoraDataSource.h" -#include "TheoraException.h" -#include "TheoraTimer.h" -#include "TheoraUtil.h" -#include "TheoraFrameQueue.h" -#include "TheoraVideoFrame.h" -#include "TheoraVideoManager.h" -#include "TheoraVideoClip_FFmpeg.h" -#include "TheoraPixelTransform.h" - -#define READ_BUFFER_SIZE 4096 - -#ifdef __cplusplus -#define __STDC_CONSTANT_MACROS -#ifdef _STDINT_H -#undef _STDINT_H -#endif -# include <stdint.h> -#endif - -#define _FFMPEG_DEBUG - -extern "C" -{ -#include <libavcodec/avcodec.h> -#include <libavformat/avformat.h> -#include "libavutil/avassert.h" -} - -static bool ffmpegInitialised = 0; - -static int readFunction(void* data, uint8_t* buf, int buf_size) -{ -#ifdef _FFMPEG_DEBUG - th_writelog("reading " + str(buf_size) + " bytes"); -#endif - - TheoraDataSource* src = (TheoraDataSource*) data; - return src->read(buf, buf_size); -} - -static int64_t seekFunction(void* data, int64_t offset, int whence) -{ -#ifdef _FFMPEG_DEBUG - th_writelog("seeking: offset = " + str((long) offset) + ", whence = " + str(whence)); -#endif - - TheoraDataSource* src = (TheoraDataSource*) data; - if (whence == AVSEEK_SIZE) - return src->size(); - else if (whence == SEEK_SET) - src->seek((long) offset); - else if (whence == SEEK_END) - src->seek(src->size() - (long) offset); - return src->tell(); -} - -static void avlog_theoraplayer(void* p, int level, const char* fmt, va_list vargs) -{ - th_writelog(fmt); - static char logstr[2048]; - vsprintf(logstr, fmt, vargs); - th_writelog("ffmpeg: " + std::string(logstr)); -} - - -std::string text; - -static void _log(const char* s) -{ - text += s; -// th_writelog(text); -// text = ""; -} - -static void _log(const char c) -{ - char s[2] = {c, 0}; - _log(s); -} - -static const AVCodec *next_codec_for_id(enum AVCodecID id, const AVCodec *prev, - int encoder) -{ - while ((prev = av_codec_next(prev))) { - if (prev->id == id && - (encoder ? av_codec_is_encoder(prev) : av_codec_is_decoder(prev))) - return prev; - } - return NULL; -} - -static int compare_codec_desc(const void *a, const void *b) -{ - const AVCodecDescriptor **da = (const AVCodecDescriptor **) a; - const AVCodecDescriptor **db = (const AVCodecDescriptor **) b; - - return (*da)->type != (*db)->type ? (*da)->type - (*db)->type : - strcmp((*da)->name, (*db)->name); -} - -static unsigned get_codecs_sorted(const AVCodecDescriptor ***rcodecs) -{ - const AVCodecDescriptor *desc = NULL; - const AVCodecDescriptor **codecs; - unsigned nb_codecs = 0, i = 0; - - while ((desc = avcodec_descriptor_next(desc))) - ++nb_codecs; - if (!(codecs = (const AVCodecDescriptor**) av_calloc(nb_codecs, sizeof(*codecs)))) { - av_log(NULL, AV_LOG_ERROR, "Out of memory\n"); - exit(1); - } - desc = NULL; - while ((desc = avcodec_descriptor_next(desc))) - codecs[i++] = desc; - av_assert0(i == nb_codecs); - qsort(codecs, nb_codecs, sizeof(*codecs), compare_codec_desc); - *rcodecs = codecs; - return nb_codecs; -} - -static char get_media_type_char(enum AVMediaType type) -{ - switch (type) { - case AVMEDIA_TYPE_VIDEO: return 'V'; - case AVMEDIA_TYPE_AUDIO: return 'A'; - case AVMEDIA_TYPE_DATA: return 'D'; - case AVMEDIA_TYPE_SUBTITLE: return 'S'; - case AVMEDIA_TYPE_ATTACHMENT:return 'T'; - default: return '?'; - } -} - -static void print_codecs_for_id(enum AVCodecID id, int encoder) -{ - const AVCodec *codec = NULL; - - _log(encoder ? "encoders" : "decoders"); - - while ((codec = next_codec_for_id(id, codec, encoder))) - _log(codec->name); - - _log(")"); -} - -int show_codecs(void *optctx, const char *opt, const char *arg) -{ - const AVCodecDescriptor **codecs; - unsigned i, nb_codecs = get_codecs_sorted(&codecs); - - char tmp[1024]; - th_writelog("Codecs:\n" - " D..... = Decoding supported\n" - " .E.... = Encoding supported\n" - " ..V... = Video codec\n" - " ..A... = Audio codec\n" - " ..S... = Subtitle codec\n" - " ...I.. = Intra frame-only codec\n" - " ....L. = Lossy compression\n" - " .....S = Lossless compression\n" - " -------\n"); - for (i = 0; i < nb_codecs; ++i) { - const AVCodecDescriptor *desc = codecs[i]; - const AVCodec *codec = NULL; - - _log(" "); - _log(avcodec_find_decoder(desc->id) ? "D" : "."); - _log(avcodec_find_encoder(desc->id) ? "E" : "."); - - _log(get_media_type_char(desc->type)); - _log((desc->props & AV_CODEC_PROP_INTRA_ONLY) ? "I" : "."); - _log((desc->props & AV_CODEC_PROP_LOSSY) ? "L" : "."); - _log((desc->props & AV_CODEC_PROP_LOSSLESS) ? "S" : "."); - - - sprintf(tmp, " %-20s %s", desc->name, desc->long_name ? desc->long_name : ""); - - _log(tmp); - /* print decoders/encoders when there's more than one or their - * names are different from codec name */ - while ((codec = next_codec_for_id(desc->id, codec, 0))) { - if (strcmp(codec->name, desc->name)) { - print_codecs_for_id(desc->id, 0); - break; - } - } - codec = NULL; - while ((codec = next_codec_for_id(desc->id, codec, 1))) { - if (strcmp(codec->name, desc->name)) { - print_codecs_for_id(desc->id, 1); - break; - } - } - _log("\n"); - } - av_free(codecs); - - av_log(0, 0, "%s", text.c_str()); - return 0; -} - -TheoraVideoClip_FFmpeg::TheoraVideoClip_FFmpeg(TheoraDataSource* data_source, - TheoraOutputMode output_mode, - int nPrecachedFrames, - bool usePower2Stride): - TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride), - TheoraAudioPacketQueue() -{ - mFormatContext = NULL; - mCodecContext = NULL; - mCodec = NULL; - mFrame = NULL; - mVideoStreamIndex = -1; -} - -TheoraVideoClip_FFmpeg::~TheoraVideoClip_FFmpeg() -{ - unload(); -} - -void TheoraVideoClip_FFmpeg::load(TheoraDataSource* source) -{ - mVideoStreamIndex = -1; - mFrameNumber = 0; - AVDictionary* optionsDict = NULL; - - if (!ffmpegInitialised) - { -#ifdef _FFMPEG_DEBUG - th_writelog("Initializing ffmpeg"); -#endif - th_writelog("avcodec version: " + str(avcodec_version())); - av_register_all(); - av_log_set_level(AV_LOG_DEBUG); - av_log_set_callback(avlog_theoraplayer); - ffmpegInitialised = 1; - //show_codecs(0, 0, 0); - } - - mInputBuffer = (unsigned char*) av_malloc(READ_BUFFER_SIZE); - mAvioContext = avio_alloc_context(mInputBuffer, READ_BUFFER_SIZE, 0, source, &readFunction, NULL, &seekFunction); - -#ifdef _FFMPEG_DEBUG - th_writelog(mName + ": avio context created"); -#endif - - mFormatContext = avformat_alloc_context(); -#ifdef _FFMPEG_DEBUG - th_writelog(mName + ": avformat context created"); -#endif - mFormatContext->pb = mAvioContext; - - int err; - if ((err = avformat_open_input(&mFormatContext, "", NULL, NULL)) != 0) - { - th_writelog(mName + ": avformat input opening failed!"); - th_writelog(mName + ": error_code: " + str(err)); - return; - } - -#ifdef _FFMPEG_DEBUG - th_writelog(mName + ": avformat input opened"); -#endif - - // Retrieve stream information - if (avformat_find_stream_info(mFormatContext, NULL) < 0) - return; // Couldn't find stream information - -#ifdef _FFMPEG_DEBUG - th_writelog(mName + ": got stream info"); -#endif - - // Dump information about file onto standard error - // av_dump_format(mFormatContext, 0, "", 0); - - // Find the first video stream - for (int i = 0; i < mFormatContext->nb_streams; ++i) - { - if(mFormatContext->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO) - { - mVideoStreamIndex = i; - break; - } - } - if (mVideoStreamIndex == -1) - return; // Didn't find a video stream - -#ifdef _FFMPEG_DEBUG - th_writelog(mName + ": Found video stream at index " + str(mVideoStreamIndex)); -#endif - - // Get a pointer to the codec context for the video stream - mCodecContext = mFormatContext->streams[mVideoStreamIndex]->codec; - - // Find the decoder for the video stream - mCodec = avcodec_find_decoder(mCodecContext->codec_id); - if (mCodec == NULL) - { - th_writelog("Unsupported codec!"); - return; // Codec not found - } - // Open codec - if(avcodec_open2(mCodecContext, mCodec, &optionsDict) < 0) - return; // Could not open codec - -#ifdef _FFMPEG_DEBUG - th_writelog(mName + ": Codec opened"); -#endif - - - mFrame = avcodec_alloc_frame(); - -#ifdef _FFMPEG_DEBUG - th_writelog(mName + ": Frame allocated"); -#endif - - //AVRational rational = mCodecContext->time_base; - - mFPS = 25; //TODOOOOOO!!! - - mWidth = mStride = mCodecContext->width; - mHeight = mCodecContext->height; - mFrameDuration = 1.0f / mFPS; - mDuration = mFormatContext->duration / AV_TIME_BASE; - - if (mFrameQueue == NULL) // todo - why is this set in the backend class? it should be set in the base class, check other backends as well - { - mFrameQueue = new TheoraFrameQueue(this); - mFrameQueue->setSize(mNumPrecachedFrames); - } -} - -void TheoraVideoClip_FFmpeg::unload() -{ - if (mInputBuffer) - { -// av_free(mInputBuffer); - mInputBuffer = NULL; - } - if (mAvioContext) - { - av_free(mAvioContext); - mAvioContext = NULL; - } - if (mFrame) - { - av_free(mFrame); - mFrame = NULL; - } - if (mCodecContext) - { - avcodec_close(mCodecContext); - mCodecContext = NULL; - } - if (mFormatContext) - { - avformat_close_input(&mFormatContext); - mFormatContext = NULL; - } -} - -bool TheoraVideoClip_FFmpeg::_readData() -{ - return 1; -} - -bool TheoraVideoClip_FFmpeg::decodeNextFrame() -{ - TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame(); - if (!frame) return 0; - - AVPacket packet; - int frameFinished; - - while (av_read_frame(mFormatContext, &packet) >= 0) - { - if (packet.stream_index == mVideoStreamIndex) - { - avcodec_decode_video2(mCodecContext, mFrame, &frameFinished, &packet); - - if (frameFinished) - { - TheoraPixelTransform t; - memset(&t, 0, sizeof(TheoraPixelTransform)); - - t.y = mFrame->data[0]; t.yStride = mFrame->linesize[0]; - t.u = mFrame->data[1]; t.uStride = mFrame->linesize[1]; - t.v = mFrame->data[2]; t.vStride = mFrame->linesize[2]; - - frame->decode(&t); - frame->mTimeToDisplay = mFrameNumber / mFPS; - frame->mIteration = mIteration; - frame->_setFrameNumber(mFrameNumber++); - - av_free_packet(&packet); - break; - } - } - av_free_packet(&packet); - } - return 1; -} - -void TheoraVideoClip_FFmpeg::decodedAudioCheck() -{ - if (!mAudioInterface || mTimer->isPaused()) return; - - mAudioMutex->lock(); - flushAudioPackets(mAudioInterface); - mAudioMutex->unlock(); -} - -float TheoraVideoClip_FFmpeg::decodeAudio() -{ - return -1; -} - -void TheoraVideoClip_FFmpeg::doSeek() -{ - -} - -void TheoraVideoClip_FFmpeg::_restart() -{ - -} - -#endif diff --git a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.h b/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.h deleted file mode 100644 index 03f9a3d964..0000000000 --- a/drivers/theoraplayer/src/FFmpeg/TheoraVideoClip_FFmpeg.h +++ /dev/null @@ -1,53 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#if defined(__FFMPEG) && !defined(_TheoraVideoClip_FFmpeg_h) -#define _TheoraVideoClip_FFmpeg_h - -#include "TheoraAudioPacketQueue.h" -#include "TheoraVideoClip.h" - -struct AVFormatContext; -struct AVCodecContext; -struct AVCodec; -struct AVFrame; -struct AVIOContext; - -class TheoraVideoClip_FFmpeg : public TheoraVideoClip, public TheoraAudioPacketQueue -{ -protected: - bool mLoaded; - - AVFormatContext* mFormatContext; - AVCodecContext* mCodecContext; - AVIOContext* mAvioContext; - AVCodec* mCodec; - AVFrame* mFrame; - unsigned char* mInputBuffer; - int mVideoStreamIndex; - int mFrameNumber; - - void unload(); - void doSeek(); -public: - TheoraVideoClip_FFmpeg(TheoraDataSource* data_source, - TheoraOutputMode output_mode, - int nPrecachedFrames, - bool usePower2Stride); - ~TheoraVideoClip_FFmpeg(); - - bool _readData(); - bool decodeNextFrame(); - void _restart(); - void load(TheoraDataSource* source); - float decodeAudio(); - void decodedAudioCheck(); - std::string getDecoderName() { return "FFmpeg"; } -}; - -#endif diff --git a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.cpp b/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.cpp deleted file mode 100644 index c4f070ec50..0000000000 --- a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.cpp +++ /dev/null @@ -1,703 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifdef __THEORA -#include <memory.h> -#include <algorithm> -#include "TheoraVideoManager.h" -#include "TheoraFrameQueue.h" -#include "TheoraVideoFrame.h" -#include "TheoraAudioInterface.h" -#include "TheoraTimer.h" -#include "TheoraDataSource.h" -#include "TheoraUtil.h" -#include "TheoraException.h" -#include "TheoraVideoClip_Theora.h" -#include "TheoraPixelTransform.h" - -TheoraVideoClip_Theora::TheoraVideoClip_Theora(TheoraDataSource* data_source, - TheoraOutputMode output_mode, - int nPrecachedFrames, - bool usePower2Stride): - TheoraVideoClip(data_source, output_mode, nPrecachedFrames, usePower2Stride), - TheoraAudioPacketQueue() -{ - mInfo.TheoraDecoder = NULL; - mInfo.TheoraSetup = NULL; - mVorbisStreams = mTheoraStreams = 0; - mReadAudioSamples = 0; - mLastDecodedFrameNumber = 0; -} - -TheoraVideoClip_Theora::~TheoraVideoClip_Theora() -{ - if (mInfo.TheoraDecoder) - { - th_decode_free(mInfo.TheoraDecoder); - th_setup_free(mInfo.TheoraSetup); - - if (mAudioInterface) - { - vorbis_dsp_clear(&mInfo.VorbisDSPState); - vorbis_block_clear(&mInfo.VorbisBlock); - } - - ogg_stream_clear(&mInfo.TheoraStreamState); - th_comment_clear(&mInfo.TheoraComment); - th_info_clear(&mInfo.TheoraInfo); - - ogg_stream_clear(&mInfo.VorbisStreamState); - vorbis_comment_clear(&mInfo.VorbisComment); - vorbis_info_clear(&mInfo.VorbisInfo); - - ogg_sync_clear(&mInfo.OggSyncState); - } -} - -bool TheoraVideoClip_Theora::_readData() -{ - int audio_eos = 0, serno; - float audio_time = 0; - float time = mTimer->getTime(); - if (mRestarted) time = 0; - - for (;;) - { - char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096); - int bytes_read = mStream->read(buffer, 4096); - ogg_sync_wrote(&mInfo.OggSyncState, bytes_read); - - if (bytes_read < 4096) - { - if (bytes_read == 0) - { - if (!mAutoRestart) mEndOfFile = true; - return 0; - } - } - // when we fill the stream with enough pages, it'll start spitting out packets - // which contain keyframes, delta frames or audio data - while (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0) - { - serno = ogg_page_serialno(&mInfo.OggPage); - if (serno == mInfo.TheoraStreamState.serialno) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage); - if (mAudioInterface && serno == mInfo.VorbisStreamState.serialno) - { - ogg_int64_t g = ogg_page_granulepos(&mInfo.OggPage); - audio_time = (float) vorbis_granule_time(&mInfo.VorbisDSPState, g); - audio_eos = ogg_page_eos(&mInfo.OggPage); - ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage); - } - } - if (!(mAudioInterface && !audio_eos && audio_time < time + 1.0f)) - break; - } - return 1; -} - -bool TheoraVideoClip_Theora::decodeNextFrame() -{ - if (mEndOfFile) return 0; - - TheoraVideoFrame* frame = mFrameQueue->requestEmptyFrame(); - if (!frame) return 0; // max number of precached frames reached - bool should_restart = 0; - ogg_packet opTheora; - ogg_int64_t granulePos; - th_ycbcr_buffer buff; - int ret, nAttempts; - for (;;) - { - // ogg_stream_packetout can return -1 and the official docs suggest to do subsequent calls until it succeeds - // because the data is out of sync. still will limit the number of attempts just in case - for (ret = -1, nAttempts = 0; ret < 0 && nAttempts < 100; nAttempts++) - { - ret = ogg_stream_packetout(&mInfo.TheoraStreamState, &opTheora); - } - - if (ret > 0) - { - int status = th_decode_packetin(mInfo.TheoraDecoder, &opTheora, &granulePos); - if (status != 0 && status != TH_DUPFRAME) continue; // 0 means success - - float time = (float) th_granule_time(mInfo.TheoraDecoder, granulePos); - unsigned long frame_number = (unsigned long) th_granule_frame(mInfo.TheoraDecoder, granulePos); - - if (time < mTimer->getTime() && !mRestarted && frame_number % 16 != 0) - { - // %16 operation is here to prevent a playback halt during video playback if the decoder can't keep up with demand. -#ifdef _DEBUG - th_writelog(mName + ": pre-dropped frame " + str((int) frame_number)); -#endif - ++mNumDroppedFrames; - continue; // drop frame - } - frame->mTimeToDisplay = time - mFrameDuration; - frame->mIteration = mIteration; - frame->_setFrameNumber(frame_number); - mLastDecodedFrameNumber = frame_number; - th_decode_ycbcr_out(mInfo.TheoraDecoder, buff); - TheoraPixelTransform t; - memset(&t, 0, sizeof(TheoraPixelTransform)); - - t.y = buff[0].data; t.yStride = buff[0].stride; - t.u = buff[1].data; t.uStride = buff[1].stride; - t.v = buff[2].data; t.vStride = buff[2].stride; - frame->decode(&t); - break; - } - else - { - if (!_readData()) - { - frame->mInUse = 0; - should_restart = mAutoRestart; - break; - } - } - } - - if (mAudioInterface != NULL) - { - mAudioMutex->lock(); - decodeAudio(); - mAudioMutex->unlock(); - } - if (should_restart) - { - ++mIteration; - _restart(); - } - return 1; -} - -void TheoraVideoClip_Theora::_restart() -{ - bool paused = mTimer->isPaused(); - if (!paused) mTimer->pause(); - long granule=0; - th_decode_ctl(mInfo.TheoraDecoder,TH_DECCTL_SET_GRANPOS,&granule,sizeof(granule)); - th_decode_free(mInfo.TheoraDecoder); - mInfo.TheoraDecoder=th_decode_alloc(&mInfo.TheoraInfo,mInfo.TheoraSetup); - ogg_stream_reset(&mInfo.TheoraStreamState); - if (mAudioInterface) - { - // empty the DSP buffer - //float **pcm; - //int len = vorbis_synthesis_pcmout(&mInfo.VorbisDSPState,&pcm); - //if (len) vorbis_synthesis_read(&mInfo.VorbisDSPState,len); - ogg_packet opVorbis; - mReadAudioSamples = 0; - while (ogg_stream_packetout(&mInfo.VorbisStreamState,&opVorbis) > 0) - { - if (vorbis_synthesis(&mInfo.VorbisBlock,&opVorbis) == 0) - vorbis_synthesis_blockin(&mInfo.VorbisDSPState,&mInfo.VorbisBlock); - } - ogg_stream_reset(&mInfo.VorbisStreamState); - } - - ogg_sync_reset(&mInfo.OggSyncState); - mStream->seek(0); - ogg_int64_t granulePos = 0; - th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &granulePos, sizeof(granule)); - - mEndOfFile = false; - - mRestarted = 1; - - if (!paused) mTimer->play(); -} - -void TheoraVideoClip_Theora::load(TheoraDataSource* source) -{ -#ifdef _DEBUG - th_writelog("-----"); -#endif - mStream = source; - readTheoraVorbisHeaders(); - - mInfo.TheoraDecoder = th_decode_alloc(&mInfo.TheoraInfo,mInfo.TheoraSetup); - - mWidth = mInfo.TheoraInfo.frame_width; - mHeight = mInfo.TheoraInfo.frame_height; - mSubFrameWidth = mInfo.TheoraInfo.pic_width; - mSubFrameHeight = mInfo.TheoraInfo.pic_height; - mSubFrameOffsetX = mInfo.TheoraInfo.pic_x; - mSubFrameOffsetY = mInfo.TheoraInfo.pic_y; - mStride = (mStride == 1) ? mStride = _nextPow2(getWidth()) : getWidth(); - mFPS = mInfo.TheoraInfo.fps_numerator / (float) mInfo.TheoraInfo.fps_denominator; - -#ifdef _DEBUG - th_writelog("width: " + str(mWidth) + ", height: " + str(mHeight) + ", fps: " + str((int) getFPS())); -#endif - mFrameQueue = new TheoraFrameQueue(this); - mFrameQueue->setSize(mNumPrecachedFrames); - // find out the duration of the file by seeking to the end - // having ogg decode pages, extract the granule pos from - // the last theora page and seek back to beginning of the file - long streamSize = mStream->size(), seekPos; - for (int i = 1; i <= 50; ++i) - { - ogg_sync_reset(&mInfo.OggSyncState); - seekPos = streamSize - 4096 * i; - if (seekPos < 0) seekPos = 0; - mStream->seek(seekPos); - - char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096 * i); - int bytes_read = mStream->read(buffer, 4096 * i); - ogg_sync_wrote(&mInfo.OggSyncState, bytes_read); - ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage); - - for (;;) - { - int ret = ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage); - if (ret == 0) break; - // if page is not a theora page, skip it - if (ogg_page_serialno(&mInfo.OggPage) != mInfo.TheoraStreamState.serialno) continue; - - ogg_int64_t granule = ogg_page_granulepos(&mInfo.OggPage); - if (granule >= 0) - { - mNumFrames = (int) th_granule_frame(mInfo.TheoraDecoder, granule) + 1; - } - else if (mNumFrames > 0) - ++mNumFrames; // append delta frames at the end to get the exact numbe - } - if (mNumFrames > 0 || streamSize - 4096 * i < 0) break; - - } - if (mNumFrames < 0) - th_writelog("unable to determine file duration!"); - else - { - mDuration = mNumFrames / mFPS; -#ifdef _DEBUG - th_writelog("duration: " + strf(mDuration) + " seconds"); -#endif - } - // restore to beginning of stream. - ogg_sync_reset(&mInfo.OggSyncState); - mStream->seek(0); - - if (mVorbisStreams) // if there is no audio interface factory defined, even though the video - // clip might have audio, it will be ignored - { - vorbis_synthesis_init(&mInfo.VorbisDSPState, &mInfo.VorbisInfo); - vorbis_block_init(&mInfo.VorbisDSPState, &mInfo.VorbisBlock); - mNumAudioChannels = mInfo.VorbisInfo.channels; - mAudioFrequency = (int) mInfo.VorbisInfo.rate; - - // create an audio interface instance if available - TheoraAudioInterfaceFactory* audio_factory = TheoraVideoManager::getSingleton().getAudioInterfaceFactory(); - printf("**** audio factory is %p\n", audio_factory); - if (audio_factory) setAudioInterface(audio_factory->createInstance(this, mNumAudioChannels, mAudioFrequency)); - } - - mFrameDuration = 1.0f / getFPS(); -#ifdef _DEBUG - th_writelog("-----"); -#endif -} - -void TheoraVideoClip_Theora::readTheoraVorbisHeaders() -{ - ogg_packet tempOggPacket; - bool done = false; - bool decode_audio=TheoraVideoManager::getSingleton().getAudioInterfaceFactory() != NULL; - //init Vorbis/Theora Layer - //Ensure all structures get cleared out. - memset(&mInfo.OggSyncState, 0, sizeof(ogg_sync_state)); - memset(&mInfo.OggPage, 0, sizeof(ogg_page)); - memset(&mInfo.VorbisStreamState, 0, sizeof(ogg_stream_state)); - memset(&mInfo.TheoraStreamState, 0, sizeof(ogg_stream_state)); - memset(&mInfo.TheoraInfo, 0, sizeof(th_info)); - memset(&mInfo.TheoraComment, 0, sizeof(th_comment)); - memset(&mInfo.VorbisInfo, 0, sizeof(vorbis_info)); - memset(&mInfo.VorbisDSPState, 0, sizeof(vorbis_dsp_state)); - memset(&mInfo.VorbisBlock, 0, sizeof(vorbis_block)); - memset(&mInfo.VorbisComment, 0, sizeof(vorbis_comment)); - - ogg_sync_init(&mInfo.OggSyncState); - th_comment_init(&mInfo.TheoraComment); - th_info_init(&mInfo.TheoraInfo); - vorbis_info_init(&mInfo.VorbisInfo); - vorbis_comment_init(&mInfo.VorbisComment); - - while (!done) - { - char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096); - int bytes_read = mStream->read(buffer, 4096); - ogg_sync_wrote(&mInfo.OggSyncState, bytes_read); - - if (bytes_read == 0) - break; - - while (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0) - { - ogg_stream_state OggStateTest; - - //is this an initial header? If not, stop - if (!ogg_page_bos(&mInfo.OggPage)) - { - //This is done blindly, because stream only accept themselves - if (mTheoraStreams) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage); - if (mVorbisStreams) ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage); - - done=true; - break; - } - - ogg_stream_init(&OggStateTest, ogg_page_serialno(&mInfo.OggPage)); - ogg_stream_pagein(&OggStateTest, &mInfo.OggPage); - ogg_stream_packetout(&OggStateTest, &tempOggPacket); - - //identify the codec - int ret; - if (!mTheoraStreams) - { - ret = th_decode_headerin(&mInfo.TheoraInfo, &mInfo.TheoraComment, &mInfo.TheoraSetup, &tempOggPacket); - - if (ret > 0) - { - //This is the Theora Header - memcpy(&mInfo.TheoraStreamState, &OggStateTest, sizeof(OggStateTest)); - mTheoraStreams = 1; - continue; - } - } - if (decode_audio && !mVorbisStreams && - vorbis_synthesis_headerin(&mInfo.VorbisInfo, &mInfo.VorbisComment, &tempOggPacket) >=0) - { - //This is vorbis header - memcpy(&mInfo.VorbisStreamState, &OggStateTest, sizeof(OggStateTest)); - mVorbisStreams = 1; - continue; - } - //Hmm. I guess it's not a header we support, so erase it - ogg_stream_clear(&OggStateTest); - } - } - - while ((mTheoraStreams && (mTheoraStreams < 3)) || - (mVorbisStreams && (mVorbisStreams < 3))) - { - //Check 2nd'dary headers... Theora First - int iSuccess; - while (mTheoraStreams && mTheoraStreams < 3 && - (iSuccess = ogg_stream_packetout(&mInfo.TheoraStreamState, &tempOggPacket))) - { - if (iSuccess < 0) - throw TheoraGenericException("Error parsing Theora stream headers."); - if (!th_decode_headerin(&mInfo.TheoraInfo, &mInfo.TheoraComment, &mInfo.TheoraSetup, &tempOggPacket)) - throw TheoraGenericException("invalid theora stream"); - - ++mTheoraStreams; - } //end while looking for more theora headers - - //look 2nd vorbis header packets - while (mVorbisStreams < 3 && (iSuccess = ogg_stream_packetout(&mInfo.VorbisStreamState, &tempOggPacket))) - { - if (iSuccess < 0) - throw TheoraGenericException("Error parsing vorbis stream headers"); - - if (vorbis_synthesis_headerin(&mInfo.VorbisInfo, &mInfo.VorbisComment,&tempOggPacket)) - throw TheoraGenericException("invalid stream"); - - ++mVorbisStreams; - } //end while looking for more vorbis headers - - //Not finished with Headers, get some more file data - if (ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage) > 0) - { - if (mTheoraStreams) ogg_stream_pagein(&mInfo.TheoraStreamState, &mInfo.OggPage); - if (mVorbisStreams) ogg_stream_pagein(&mInfo.VorbisStreamState, &mInfo.OggPage); - } - else - { - char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096); - int bytes_read = mStream->read(buffer, 4096); - ogg_sync_wrote(&mInfo.OggSyncState, bytes_read); - - if (bytes_read == 0) - throw TheoraGenericException("End of file found prematurely"); - } - } //end while looking for all headers - // writelog("Vorbis Headers: " + str(mVorbisHeaders) + " Theora Headers : " + str(mTheoraHeaders)); -} - -void TheoraVideoClip_Theora::decodedAudioCheck() -{ - if (!mAudioInterface || mTimer->isPaused()) return; - - mAudioMutex->lock(); - flushAudioPackets(mAudioInterface); - mAudioMutex->unlock(); -} - -float TheoraVideoClip_Theora::decodeAudio() -{ - if (mRestarted) return -1; - - ogg_packet opVorbis; - float **pcm; - int len = 0; - float timestamp = -1; - bool read_past_timestamp = 0; - - float factor = 1.0f / mAudioFrequency; - float videoTime = (float) mLastDecodedFrameNumber / mFPS; - float min = mFrameQueue->getSize() / mFPS + 1.0f; - - for (;;) - { - len = vorbis_synthesis_pcmout(&mInfo.VorbisDSPState, &pcm); - if (len == 0) - { - if (ogg_stream_packetout(&mInfo.VorbisStreamState, &opVorbis) > 0) - { - if (vorbis_synthesis(&mInfo.VorbisBlock, &opVorbis) == 0) - { - if (timestamp < 0 && opVorbis.granulepos >= 0) - { - timestamp = (float) vorbis_granule_time(&mInfo.VorbisDSPState, opVorbis.granulepos); - } - else if (timestamp >= 0) read_past_timestamp = 1; - vorbis_synthesis_blockin(&mInfo.VorbisDSPState, &mInfo.VorbisBlock); - } - continue; - } - else - { - float audioTime = mReadAudioSamples * factor; - // always buffer up of audio ahead of the frames - if (audioTime - videoTime < min) - { - if (!_readData()) break; - } - else - break; - } - } - addAudioPacket(pcm, len, mAudioGain); - mReadAudioSamples += len; - if (read_past_timestamp) timestamp += (float) len / mInfo.VorbisInfo.rate; - vorbis_synthesis_read(&mInfo.VorbisDSPState, len); // tell vorbis we read a number of samples - } - return timestamp; -} - -long TheoraVideoClip_Theora::seekPage(long targetFrame, bool return_keyframe) -{ - int i,seek_min = 0, seek_max = (int) mStream->size(); - long frame; - ogg_int64_t granule = 0; - - if (targetFrame == 0) mStream->seek(0); - for (i = (targetFrame == 0) ? 100 : 0; i < 100; ++i) - { - ogg_sync_reset(&mInfo.OggSyncState); - mStream->seek((seek_min + seek_max) / 2); // do a binary search - memset(&mInfo.OggPage, 0, sizeof(ogg_page)); - ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage); - - for (;i < 1000;) - { - int ret = ogg_sync_pageout(&mInfo.OggSyncState, &mInfo.OggPage); - if (ret == 1) - { - int serno = ogg_page_serialno(&mInfo.OggPage); - if (serno == mInfo.TheoraStreamState.serialno) - { - granule = ogg_page_granulepos(&mInfo.OggPage); - if (granule >= 0) - { - frame = (long) th_granule_frame(mInfo.TheoraDecoder, granule); - if (frame < targetFrame && targetFrame - frame < 10) - { - // we're close enough, let's break this. - i = 1000; - break; - } - // we're not close enough, let's shorten the borders of the binary search - if (targetFrame - 1 > frame) seek_min = (seek_min + seek_max) / 2; - else seek_max = (seek_min + seek_max) / 2; - break; - } - } - } - else - { - char *buffer = ogg_sync_buffer(&mInfo.OggSyncState, 4096); - int bytes_read = mStream->read(buffer, 4096); - if (bytes_read == 0) break; - ogg_sync_wrote(&mInfo.OggSyncState, bytes_read); - } - } - } - if (return_keyframe) return (long) (granule >> mInfo.TheoraInfo.keyframe_granule_shift); - - ogg_sync_reset(&mInfo.OggSyncState); - memset(&mInfo.OggPage, 0, sizeof(ogg_page)); - ogg_sync_pageseek(&mInfo.OggSyncState, &mInfo.OggPage); - if (targetFrame == 0) return -1; - mStream->seek((seek_min + seek_max) / 2); // do a binary search - return -1; -} - -void TheoraVideoClip_Theora::doSeek() -{ -#if _DEBUG - th_writelog(mName + " [seek]: seeking to frame " + str(mSeekFrame)); -#endif - int frame; - float time = mSeekFrame / getFPS(); - mTimer->seek(time); - bool paused = mTimer->isPaused(); - if (!paused) mTimer->pause(); // pause until seeking is done - - mEndOfFile = false; - mRestarted = false; - - resetFrameQueue(); - // reset the video decoder. - ogg_stream_reset(&mInfo.TheoraStreamState); - th_decode_free(mInfo.TheoraDecoder); - mInfo.TheoraDecoder = th_decode_alloc(&mInfo.TheoraInfo, mInfo.TheoraSetup); - - if (mAudioInterface) - { - mAudioMutex->lock(); - ogg_stream_reset(&mInfo.VorbisStreamState); - vorbis_synthesis_restart(&mInfo.VorbisDSPState); - destroyAllAudioPackets(); - } - // first seek to desired frame, then figure out the location of the - // previous keyframe and seek to it. - // then by setting the correct time, the decoder will skip N frames untill - // we get the frame we want. - frame = (int) seekPage(mSeekFrame, 1); // find the keyframe nearest to the target frame -#ifdef _DEBUG - // th_writelog(mName + " [seek]: nearest keyframe for frame " + str(mSeekFrame) + " is frame: " + str(frame)); -#endif - seekPage(std::max(0, frame - 1), 0); - - ogg_packet opTheora; - ogg_int64_t granulePos; - bool granule_set = 0; - if (frame <= 1) - { - if (mInfo.TheoraInfo.version_major == 3 && mInfo.TheoraInfo.version_minor == 2 && mInfo.TheoraInfo.version_subminor == 0) - granulePos = 0; - else - granulePos = 1; // because of difference in granule interpretation in theora streams 3.2.0 and newer ones - th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &granulePos, sizeof(granulePos)); - granule_set = 1; - } - - // now that we've found the keyframe that preceeds our desired frame, lets keep on decoding frames until we - // reach our target frame. - - int status, ret; - for (;mSeekFrame != 0;) - { - ret = ogg_stream_packetout(&mInfo.TheoraStreamState, &opTheora); - if (ret > 0) - { - if (!granule_set) - { - // theora decoder requires to set the granule pos after seek to be able to determine the current frame - if (opTheora.granulepos >= 0) - { - th_decode_ctl(mInfo.TheoraDecoder, TH_DECCTL_SET_GRANPOS, &opTheora.granulepos, sizeof(opTheora.granulepos)); - granule_set = 1; - } - else continue; // ignore prev delta frames until we hit a keyframe - } - status = th_decode_packetin(mInfo.TheoraDecoder, &opTheora, &granulePos); - if (status != 0 && status != TH_DUPFRAME) continue; - frame = (int) th_granule_frame(mInfo.TheoraDecoder, granulePos); - if (frame >= mSeekFrame - 1) break; - } - else - { - if (!_readData()) - { - th_writelog(mName + " [seek]: fineseeking failed, _readData failed!"); - if (mAudioInterface) mAudioMutex->unlock(); - return; - } - } - } -#ifdef _DEBUG - // th_writelog(mName + " [seek]: fineseeked to frame " + str(frame + 1) + ", requested: " + str(mSeekFrame)); -#endif - if (mAudioInterface) - { - // read audio data until we reach a timestamp. this usually takes only one iteration, but just in case let's - // wrap it in a loop - float timestamp; - for (;;) - { - timestamp = decodeAudio(); - if (timestamp >= 0) break; - else _readData(); - } - float rate = (float) mAudioFrequency * mNumAudioChannels; - float queued_time = getAudioPacketQueueLength(); - // at this point there are only 2 possibilities: either we have too much packets and we have to delete - // the first N ones, or we don't have enough, so let's fill the gap with silence. - if (time > timestamp - queued_time) - { - while (mTheoraAudioPacketQueue != NULL) - { - if (time > timestamp - queued_time + mTheoraAudioPacketQueue->numSamples / rate) - { - queued_time -= mTheoraAudioPacketQueue->numSamples / rate; - destroyAudioPacket(popAudioPacket()); - } - else - { - int n_trim = (int) ((timestamp - queued_time + mTheoraAudioPacketQueue->numSamples / rate - time) * rate); - if (mTheoraAudioPacketQueue->numSamples - n_trim <= 0) - destroyAudioPacket(popAudioPacket()); // if there's no data to be left, just destroy it - else - { - for (int i = n_trim, j = 0; i < mTheoraAudioPacketQueue->numSamples; ++i, ++j) - mTheoraAudioPacketQueue->pcm[j] = mTheoraAudioPacketQueue->pcm[i]; - mTheoraAudioPacketQueue->numSamples -= n_trim; - } - break; - } - } - } - else - { - // expand the first packet with silence. - if (mTheoraAudioPacketQueue) // just in case! - { - int i, j, nmissing = (int) ((timestamp - queued_time - time) * rate); - if (nmissing > 0) - { - float* samples = new float[nmissing + mTheoraAudioPacketQueue->numSamples]; - for (i = 0; i < nmissing; ++i) samples[i] = 0; - for (j = 0; i < nmissing + mTheoraAudioPacketQueue->numSamples; ++i, ++j) - samples[i] = mTheoraAudioPacketQueue->pcm[j]; - delete [] mTheoraAudioPacketQueue->pcm; - mTheoraAudioPacketQueue->pcm = samples; - } - } - } - mLastDecodedFrameNumber = mSeekFrame; - mReadAudioSamples = (unsigned int) (timestamp * mAudioFrequency); - - mAudioMutex->unlock(); - } - if (!paused) mTimer->play(); - mSeekFrame = -1; -} -#endif diff --git a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.h b/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.h deleted file mode 100644 index c64c183029..0000000000 --- a/drivers/theoraplayer/src/Theora/TheoraVideoClip_Theora.h +++ /dev/null @@ -1,64 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#if defined(__THEORA) && !defined(_TheoraVideoClip_Theora_h) -#define _TheoraVideoClip_Theora_h - -#include <ogg/ogg.h> -#include <vorbis/vorbisfile.h> -#include <theora/theoradec.h> -#include "TheoraAudioPacketQueue.h" -#include "TheoraVideoClip.h" - -struct TheoraInfoStruct -{ - // ogg/vorbis/theora variables - ogg_sync_state OggSyncState; - ogg_page OggPage; - ogg_stream_state VorbisStreamState; - ogg_stream_state TheoraStreamState; - //Theora State - th_info TheoraInfo; - th_comment TheoraComment; - th_setup_info* TheoraSetup; - th_dec_ctx* TheoraDecoder; - //Vorbis State - vorbis_info VorbisInfo; - vorbis_dsp_state VorbisDSPState; - vorbis_block VorbisBlock; - vorbis_comment VorbisComment; -}; - -class TheoraVideoClip_Theora : public TheoraVideoClip, public TheoraAudioPacketQueue -{ -protected: - TheoraInfoStruct mInfo; // a pointer is used to avoid having to include theora & vorbis headers - int mTheoraStreams, mVorbisStreams; // Keeps track of Theora and Vorbis Streams - - long seekPage(long targetFrame, bool return_keyframe); - void doSeek(); - void readTheoraVorbisHeaders(); - unsigned int mReadAudioSamples; - unsigned long mLastDecodedFrameNumber; -public: - TheoraVideoClip_Theora(TheoraDataSource* data_source, - TheoraOutputMode output_mode, - int nPrecachedFrames, - bool usePower2Stride); - ~TheoraVideoClip_Theora(); - - bool _readData(); - bool decodeNextFrame(); - void _restart(); - void load(TheoraDataSource* source); - float decodeAudio(); - void decodedAudioCheck(); - std::string getDecoderName() { return "Theora"; } -}; - -#endif diff --git a/drivers/theoraplayer/src/TheoraAsync.cpp b/drivers/theoraplayer/src/TheoraAsync.cpp deleted file mode 100644 index cc3b7a4bf5..0000000000 --- a/drivers/theoraplayer/src/TheoraAsync.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ - -#include <stdio.h> -#include <stdlib.h> - -#ifdef _WIN32 -#include <windows.h> -#else -#include <unistd.h> -#include <pthread.h> -#endif - -#include "TheoraAsync.h" -#include "TheoraUtil.h" - -#ifdef _WINRT -#include <wrl.h> -#endif - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Mutex -/////////////////////////////////////////////////////////////////////////////////////////////////// - -TheoraMutex::TheoraMutex() -{ -#ifdef _WIN32 -#ifndef _WINRT // WinXP does not have CreateTheoraMutexEx() - mHandle = CreateMutex(0, 0, 0); -#else - mHandle = CreateMutexEx(NULL, NULL, 0, SYNCHRONIZE); -#endif -#else - mHandle = (pthread_mutex_t*)malloc(sizeof(pthread_mutex_t)); - pthread_mutex_init((pthread_mutex_t*)mHandle, 0); -#endif -} - -TheoraMutex::~TheoraMutex() -{ -#ifdef _WIN32 - CloseHandle(mHandle); -#else - pthread_mutex_destroy((pthread_mutex_t*)mHandle); - free((pthread_mutex_t*)mHandle); - mHandle = NULL; -#endif -} - -void TheoraMutex::lock() -{ -#ifdef _WIN32 - WaitForSingleObjectEx(mHandle, INFINITE, FALSE); -#else - pthread_mutex_lock((pthread_mutex_t*)mHandle); -#endif -} - -void TheoraMutex::unlock() -{ -#ifdef _WIN32 - ReleaseMutex(mHandle); -#else - pthread_mutex_unlock((pthread_mutex_t*)mHandle); -#endif -} - -/////////////////////////////////////////////////////////////////////////////////////////////////// -// Thread -/////////////////////////////////////////////////////////////////////////////////////////////////// - -#ifdef _WINRT -using namespace Windows::Foundation; -using namespace Windows::System::Threading; -#endif - -#ifdef _WIN32 -unsigned long WINAPI theoraAsyncCall(void* param) -#else -void* theoraAsyncCall(void* param) -#endif -{ - TheoraThread* t = (TheoraThread*)param; - t->execute(); -#ifdef _WIN32 - return 0; -#else - pthread_exit(NULL); - return NULL; -#endif -} - -#ifdef _WINRT -struct TheoraAsyncActionWrapper -{ -public: - IAsyncAction^ mAsyncAction; - TheoraAsyncActionWrapper(IAsyncAction^ asyncAction) - { - mAsyncAction = asyncAction; - } -}; -#endif - -TheoraThread::TheoraThread() : mRunning(false), mId(0) -{ -#ifndef _WIN32 - mId = (pthread_t*)malloc(sizeof(pthread_t)); -#endif -} - -TheoraThread::~TheoraThread() -{ - if (mRunning) - { - stop(); - } - if (mId != NULL) - { -#ifdef _WIN32 -#ifndef _WINRT - CloseHandle(mId); -#else - delete mId; -#endif -#else - free((pthread_t*)mId); -#endif - mId = NULL; - } -} - -void TheoraThread::start() -{ - mRunning = true; -#ifdef _WIN32 -#ifndef _WINRT - mId = CreateThread(0, 0, &theoraAsyncCall, this, 0, 0); -#else - mId = new TheoraAsyncActionWrapper(ThreadPool::RunAsync( - ref new WorkItemHandler([&](IAsyncAction^ work_item) - { - execute(); - }), - WorkItemPriority::Normal, WorkItemOptions::TimeSliced)); -#endif -#else - pthread_create((pthread_t*)mId, NULL, &theoraAsyncCall, this); -#endif -} - -bool TheoraThread::isRunning() -{ - bool ret; - mRunningMutex.lock(); - ret = mRunning; - mRunningMutex.unlock(); - - return ret; -} - -void TheoraThread::join() -{ - mRunningMutex.lock(); - mRunning = false; - mRunningMutex.unlock(); -#ifdef _WIN32 -#ifndef _WINRT - WaitForSingleObject(mId, INFINITE); - if (mId != NULL) - { - CloseHandle(mId); - mId = NULL; - } -#else - IAsyncAction^ action = ((TheoraAsyncActionWrapper*)mId)->mAsyncAction; - int i = 0; - while (action->Status != AsyncStatus::Completed && - action->Status != AsyncStatus::Canceled && - action->Status != AsyncStatus::Error && - i < 100) - { - _psleep(50); - ++i; - } - if (i >= 100) - { - i = 0; - action->Cancel(); - while (action->Status != AsyncStatus::Completed && - action->Status != AsyncStatus::Canceled && - action->Status != AsyncStatus::Error && - i < 100) - { - _psleep(50); - ++i; - } - } -#endif -#else - pthread_join(*((pthread_t*)mId), 0); -#endif -} - -void TheoraThread::resume() -{ -#ifdef _WIN32 -#ifndef _WINRT - ResumeThread(mId); -#else - // not available in WinRT -#endif -#endif -} - -void TheoraThread::pause() -{ -#ifdef _WIN32 -#ifndef _WINRT - SuspendThread(mId); -#else - // not available in WinRT -#endif -#endif -} - -void TheoraThread::stop() -{ - if (mRunning) - { - mRunningMutex.lock(); - mRunning = false; - mRunningMutex.unlock(); -#ifdef _WIN32 -#ifndef _WINRT - TerminateThread(mId, 0); -#else - ((TheoraAsyncActionWrapper*)mId)->mAsyncAction->Cancel(); -#endif -#elif defined(_ANDROID) - pthread_kill(*((pthread_t*)mId), 0); -#else - pthread_cancel(*((pthread_t*)mId)); -#endif - } -} - diff --git a/drivers/theoraplayer/src/TheoraAudioInterface.cpp b/drivers/theoraplayer/src/TheoraAudioInterface.cpp deleted file mode 100644 index a265cb57b5..0000000000 --- a/drivers/theoraplayer/src/TheoraAudioInterface.cpp +++ /dev/null @@ -1,21 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include "TheoraAudioInterface.h" - -TheoraAudioInterface::TheoraAudioInterface(TheoraVideoClip* owner, int nChannels, int freq) -{ - mFreq = freq; - mNumChannels = nChannels; - mClip = owner; -} - -TheoraAudioInterface::~TheoraAudioInterface() -{ - -} diff --git a/drivers/theoraplayer/src/TheoraAudioPacketQueue.cpp b/drivers/theoraplayer/src/TheoraAudioPacketQueue.cpp deleted file mode 100644 index be5e1018f9..0000000000 --- a/drivers/theoraplayer/src/TheoraAudioPacketQueue.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include <stdlib.h> -#include "TheoraAudioPacketQueue.h" -#include "TheoraAudioInterface.h" - -TheoraAudioPacketQueue::TheoraAudioPacketQueue() -{ - mTheoraAudioPacketQueue = NULL; -} - -TheoraAudioPacketQueue::~TheoraAudioPacketQueue() -{ - destroyAllAudioPackets(); -} - -float TheoraAudioPacketQueue::getAudioPacketQueueLength() -{ - float len = 0; - for (TheoraAudioPacket* p = mTheoraAudioPacketQueue; p != NULL; p = p->next) - len += p->numSamples; - - return len / (mAudioFrequency * mNumAudioChannels); -} - -void TheoraAudioPacketQueue::_addAudioPacket(float* data, int numSamples) -{ - TheoraAudioPacket* packet = new TheoraAudioPacket; - packet->pcm = data; - packet->numSamples = numSamples; - packet->next = NULL; - - - if (mTheoraAudioPacketQueue == NULL) mTheoraAudioPacketQueue = packet; - else - { - TheoraAudioPacket* last = mTheoraAudioPacketQueue; - for (TheoraAudioPacket* p = last; p != NULL; p = p->next) - last = p; - last->next = packet; - } -} - -void TheoraAudioPacketQueue::addAudioPacket(float** buffer, int numSamples, float gain) -{ - float* data = new float[numSamples * mNumAudioChannels]; - float* dataptr = data; - int i; - unsigned int j; - - if (gain < 1.0f) - { - // apply gain, let's attenuate the samples - for (i = 0; i < numSamples; ++i) - for (j = 0; j < mNumAudioChannels; j++, ++dataptr) - *dataptr = buffer[i][j] * gain; - } - else - { - // do a simple copy, faster then the above method, when gain is 1.0f - for (i = 0; i < numSamples; ++i) - for (j = 0; j < mNumAudioChannels; j++, ++dataptr) - *dataptr = buffer[j][i]; - } - - _addAudioPacket(data, numSamples * mNumAudioChannels); -} - -void TheoraAudioPacketQueue::addAudioPacket(float* buffer, int numSamples, float gain) -{ - float* data = new float[numSamples * mNumAudioChannels]; - float* dataptr = data; - int i, numFloats = numSamples * mNumAudioChannels; - - if (gain < 1.0f) - { - // apply gain, let's attenuate the samples - for (i = 0; i < numFloats; ++i, dataptr++) - *dataptr = buffer[i] * gain; - } - else - { - // do a simple copy, faster then the above method, when gain is 1.0f - for (i = 0; i < numFloats; ++i, dataptr++) - *dataptr = buffer[i]; - } - - _addAudioPacket(data, numFloats); -} - -TheoraAudioPacket* TheoraAudioPacketQueue::popAudioPacket() -{ - if (mTheoraAudioPacketQueue == NULL) return NULL; - TheoraAudioPacket* p = mTheoraAudioPacketQueue; - mTheoraAudioPacketQueue = mTheoraAudioPacketQueue->next; - return p; -} - -void TheoraAudioPacketQueue::destroyAudioPacket(TheoraAudioPacket* p) -{ - if (p == NULL) return; - delete [] p->pcm; - delete p; -} - -void TheoraAudioPacketQueue::destroyAllAudioPackets() -{ - for (TheoraAudioPacket* p = popAudioPacket(); p != NULL; p = popAudioPacket()) - destroyAudioPacket(p); -} - -void TheoraAudioPacketQueue::flushAudioPackets(TheoraAudioInterface* audioInterface) -{ - - for (TheoraAudioPacket* p = popAudioPacket(); p != NULL; p = popAudioPacket()) - { - audioInterface->insertData(p->pcm, p->numSamples); - destroyAudioPacket(p); - } -}
\ No newline at end of file diff --git a/drivers/theoraplayer/src/TheoraDataSource.cpp b/drivers/theoraplayer/src/TheoraDataSource.cpp deleted file mode 100644 index 6011dc6783..0000000000 --- a/drivers/theoraplayer/src/TheoraDataSource.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include <stdio.h> -#include <memory.h> -#include "TheoraDataSource.h" -#include "TheoraException.h" -#include "TheoraVideoManager.h" -#include "TheoraUtil.h" - -TheoraDataSource::~TheoraDataSource() -{ - -} - -TheoraFileDataSource::TheoraFileDataSource(std::string filename) -{ - mFilename = filename; - mFilePtr = NULL; -} - -TheoraFileDataSource::~TheoraFileDataSource() -{ - if (mFilePtr) - { - fclose(mFilePtr); - mFilePtr = NULL; - } -} - -void TheoraFileDataSource::openFile() -{ - if (mFilePtr == NULL) - { - mFilePtr=fopen(mFilename.c_str(), "rb"); - if (!mFilePtr) - { - std::string msg = "Can't open video file: " + mFilename; - th_writelog(msg); - throw TheoraGenericException(msg); - } - fseek(mFilePtr, 0, SEEK_END); - mSize = ftell(mFilePtr); - fseek(mFilePtr, 0, SEEK_SET); - } -} - -int TheoraFileDataSource::read(void* output, int nBytes) -{ - if (mFilePtr == NULL) openFile(); - size_t n = fread(output, 1, nBytes, mFilePtr); - return (int) n; -} - -void TheoraFileDataSource::seek(unsigned long byte_index) -{ - if (mFilePtr == NULL) openFile(); - fseek(mFilePtr, byte_index, SEEK_SET); -} - -unsigned long TheoraFileDataSource::size() -{ - if (mFilePtr == NULL) openFile(); - return mSize; -} - -unsigned long TheoraFileDataSource::tell() -{ - if (mFilePtr == NULL) return 0; - return ftell(mFilePtr); -} - -TheoraMemoryFileDataSource::TheoraMemoryFileDataSource(std::string filename) : - mReadPointer(0), - mData(0) -{ - mFilename=filename; - FILE* f=fopen(filename.c_str(),"rb"); - if (!f) throw TheoraGenericException("Can't open video file: "+filename); - fseek(f,0,SEEK_END); - mSize=ftell(f); - fseek(f,0,SEEK_SET); - mData=new unsigned char[mSize]; - fread(mData,1,mSize,f); - fclose(f); -} - -TheoraMemoryFileDataSource::TheoraMemoryFileDataSource(unsigned char* data, long size, const std::string& filename) -{ - mFilename = filename; - mData = data; - mSize = size; - mReadPointer = 0; -} - -TheoraMemoryFileDataSource::~TheoraMemoryFileDataSource() -{ - if (mData) delete [] mData; -} - -int TheoraMemoryFileDataSource::read(void* output, int nBytes) -{ - int n = (int) ((mReadPointer+nBytes <= mSize) ? nBytes : mSize - mReadPointer); - if (!n) return 0; - memcpy(output, mData + mReadPointer, n); - mReadPointer += n; - return n; -} - -void TheoraMemoryFileDataSource::seek(unsigned long byte_index) -{ - mReadPointer=byte_index; -} - -unsigned long TheoraMemoryFileDataSource::size() -{ - return mSize; -} - -unsigned long TheoraMemoryFileDataSource::tell() -{ - return mReadPointer; -} diff --git a/drivers/theoraplayer/src/TheoraException.cpp b/drivers/theoraplayer/src/TheoraException.cpp deleted file mode 100644 index 4588a81397..0000000000 --- a/drivers/theoraplayer/src/TheoraException.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include "TheoraException.h" -#include "TheoraUtil.h" -#include "TheoraVideoManager.h" -#include <stdio.h> - -_TheoraGenericException::_TheoraGenericException(const std::string& errorText, std::string type, std::string file, int line) -{ - mErrText = errorText; - int src = (int) file.find("src"); - if (src >= 0) file = file.substr(src + 4, 1000); - mLineNumber = line; - mFile = file; -} - - -std::string _TheoraGenericException::repr() -{ - std::string text = getType(); - if (text != "") text += ": "; - - if (mFile != "") text += "[" + mFile + ":" + str(mLineNumber) + "] - "; - - return text + getErrorText(); -} - -void _TheoraGenericException::writeOutput() -{ - th_writelog("----------------\nException Error!\n\n" + repr() + "\n----------------"); -} diff --git a/drivers/theoraplayer/src/TheoraFrameQueue.cpp b/drivers/theoraplayer/src/TheoraFrameQueue.cpp deleted file mode 100644 index f402144795..0000000000 --- a/drivers/theoraplayer/src/TheoraFrameQueue.cpp +++ /dev/null @@ -1,174 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include "TheoraFrameQueue.h" -#include "TheoraVideoFrame.h" -#include "TheoraVideoManager.h" -#include "TheoraUtil.h" - - -TheoraFrameQueue::TheoraFrameQueue(TheoraVideoClip* parent) -{ - mParent = parent; -} - -TheoraFrameQueue::~TheoraFrameQueue() -{ - foreach_l(TheoraVideoFrame*, mQueue) - { - delete (*it); - } - mQueue.clear(); -} - -TheoraVideoFrame* TheoraFrameQueue::createFrameInstance(TheoraVideoClip* clip) -{ - TheoraVideoFrame* frame = new TheoraVideoFrame(clip); - if (frame->getBuffer() == NULL) // This can happen if you run out of memory - { - delete frame; - return NULL; - } - return frame; -} - -void TheoraFrameQueue::setSize(int n) -{ - mMutex.lock(); - if (mQueue.size() > 0) - { - foreach_l (TheoraVideoFrame*, mQueue) - { - delete (*it); - } - mQueue.clear(); - } - TheoraVideoFrame* frame; - for (int i = 0;i < n; ++i) - { - frame = createFrameInstance(mParent); - if (frame != NULL) mQueue.push_back(frame); - else - { - TheoraVideoManager::getSingleton().logMessage("TheoraFrameQueue: unable to create " + str(n) + " frames, out of memory. Created " + str((int) mQueue.size()) + " frames."); - break; - } - } - mMutex.unlock(); -} - -int TheoraFrameQueue::getSize() -{ - return (int) mQueue.size(); -} - -TheoraVideoFrame* TheoraFrameQueue::_getFirstAvailableFrame() -{ - TheoraVideoFrame* frame = mQueue.front(); - if (frame->mReady) return frame; - else return NULL; -} - -TheoraVideoFrame* TheoraFrameQueue::getFirstAvailableFrame() -{ - mMutex.lock(); - TheoraVideoFrame* frame = _getFirstAvailableFrame(); - mMutex.unlock(); - return frame; -} - -void TheoraFrameQueue::clear() -{ - mMutex.lock(); - foreach_l (TheoraVideoFrame*, mQueue) - (*it)->clear(); - mMutex.unlock(); -} - -void TheoraFrameQueue::_pop(int n) -{ - for (int i = 0; i < n; ++i) - { - TheoraVideoFrame* first = mQueue.front(); - first->clear(); - mQueue.pop_front(); - mQueue.push_back(first); - } -} - -void TheoraFrameQueue::pop(int n) -{ - mMutex.lock(); - _pop(n); - mMutex.unlock(); -} - -TheoraVideoFrame* TheoraFrameQueue::requestEmptyFrame() -{ - TheoraVideoFrame* frame = NULL; - mMutex.lock(); - foreach_l (TheoraVideoFrame*, mQueue) - { - if (!(*it)->mInUse) - { - (*it)->mInUse = 1; - (*it)->mReady = 0; - frame = (*it); - break; - } - } - mMutex.unlock(); - return frame; -} - -int TheoraFrameQueue::getUsedCount() -{ - mMutex.lock(); - int n=0; - foreach_l(TheoraVideoFrame*,mQueue) - if ((*it)->mInUse) ++n; - mMutex.unlock(); - return n; -} - -int TheoraFrameQueue::_getReadyCount() -{ - int n = 0; - foreach_l (TheoraVideoFrame*, mQueue) - if ((*it)->mReady) ++n; - return n; -} - - -int TheoraFrameQueue::getReadyCount() -{ - mMutex.lock(); - int n = _getReadyCount(); - mMutex.unlock(); - return n; -} - -bool TheoraFrameQueue::isFull() -{ - return getReadyCount() == mQueue.size(); -} - -void TheoraFrameQueue::lock() -{ - mMutex.lock(); -} - -void TheoraFrameQueue::unlock() -{ - mMutex.unlock(); -} - -std::list<TheoraVideoFrame*>& TheoraFrameQueue::_getFrameQueue() -{ - return mQueue; -} diff --git a/drivers/theoraplayer/src/TheoraTimer.cpp b/drivers/theoraplayer/src/TheoraTimer.cpp deleted file mode 100644 index 644d1c2ab7..0000000000 --- a/drivers/theoraplayer/src/TheoraTimer.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include "TheoraTimer.h" - -TheoraTimer::TheoraTimer() -{ - mTime = 0; - mPaused = 0; - mSpeed = 1.0f; -} - -TheoraTimer::~TheoraTimer() -{ - -} - -void TheoraTimer::update(float timeDelta) -{ - if (!isPaused()) - { - mTime += timeDelta * mSpeed; - } -} - -float TheoraTimer::getTime() -{ - return mTime; -} - -void TheoraTimer::pause() -{ - mPaused = true; -} - -void TheoraTimer::play() -{ - mPaused = false; -} - - -bool TheoraTimer::isPaused() -{ - return mPaused; -} - -void TheoraTimer::stop() -{ - -} - -void TheoraTimer::seek(float time) -{ - mTime = time; -} - -void TheoraTimer::setSpeed(float speed) -{ - mSpeed = speed; -} - -float TheoraTimer::getSpeed() -{ - return mSpeed; -} diff --git a/drivers/theoraplayer/src/TheoraUtil.cpp b/drivers/theoraplayer/src/TheoraUtil.cpp deleted file mode 100644 index 8f1ad0c9c1..0000000000 --- a/drivers/theoraplayer/src/TheoraUtil.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include <stdio.h> -#include <algorithm> -#include <math.h> -#include <map> -#ifndef _WIN32 -#include <unistd.h> -#include <pthread.h> -#endif - -#include "TheoraUtil.h" -#include "TheoraException.h" - -#ifdef _WIN32 -#include <windows.h> -#pragma warning( disable: 4996 ) // MSVC++ -#endif - -std::string str(int i) -{ - char s[32]; - sprintf(s, "%d", i); - return std::string(s); -} - -std::string strf(float i) -{ - char s[32]; - sprintf(s, "%.3f", i); - return std::string(s); -} - -void _psleep(int miliseconds) -{ -#ifdef _WIN32 -#ifndef _WINRT - Sleep(miliseconds); -#else - WaitForSingleObjectEx(GetCurrentThread(), miliseconds, 0); -#endif -#else - usleep(miliseconds * 1000); -#endif -} - - -int _nextPow2(int x) -{ - int y; - for (y = 1; y < x; y *= 2); - return y; -} diff --git a/drivers/theoraplayer/src/TheoraVideoClip.cpp b/drivers/theoraplayer/src/TheoraVideoClip.cpp deleted file mode 100644 index 16897ee80e..0000000000 --- a/drivers/theoraplayer/src/TheoraVideoClip.cpp +++ /dev/null @@ -1,496 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include "TheoraVideoClip.h" -#include "TheoraVideoManager.h" -#include "TheoraVideoFrame.h" -#include "TheoraFrameQueue.h" -#include "TheoraAudioInterface.h" -#include "TheoraTimer.h" -#include "TheoraDataSource.h" -#include "TheoraUtil.h" -#include "TheoraException.h" - -#include "core/os/memory.h" - -TheoraVideoClip::TheoraVideoClip(TheoraDataSource* data_source, - TheoraOutputMode output_mode, - int nPrecachedFrames, - bool usePower2Stride): - mAudioInterface(NULL), - mNumDroppedFrames(0), - mNumDisplayedFrames(0), - mSeekFrame(-1), - mDuration(-1), - mNumFrames(-1), - mFPS(1), - mUseAlpha(0), - mFrameDuration(0), - mName(data_source->repr()), - mStride(usePower2Stride), - mSubFrameWidth(0), - mSubFrameHeight(0), - mSubFrameOffsetX(0), - mSubFrameOffsetY(0), - mAudioGain(1), - mRequestedOutputMode(output_mode), - mAutoRestart(0), - mEndOfFile(0), - mRestarted(0), - mIteration(0), - mPlaybackIteration(0), - mStream(0), - mThreadAccessCount(0), - mPriority(1), - mFirstFrameDisplayed(0), - mWaitingForCache(false), - mOutputMode(TH_UNDEFINED) -{ - - audio_track=0; - mAudioMutex = NULL; - mThreadAccessMutex = new TheoraMutex(); - mTimer = mDefaultTimer = new TheoraTimer(); - - mFrameQueue = NULL; - mAssignedWorkerThread = NULL; - mNumPrecachedFrames = nPrecachedFrames; - setOutputMode(output_mode); -} - -TheoraVideoClip::~TheoraVideoClip() -{ - // wait untill a worker thread is done decoding the frame - mThreadAccessMutex->lock(); - - delete mDefaultTimer; - - if (mStream) memdelete(mStream); - - if (mFrameQueue) delete mFrameQueue; - - if (mAudioInterface) - { - mAudioMutex->lock(); // ensure a thread isn't using this mutex - delete mAudioInterface; // notify audio interface it's time to call it a day - mAudioMutex ->unlock(); - delete mAudioMutex; - } - - mThreadAccessMutex->unlock(); - - delete mThreadAccessMutex; -} - -TheoraTimer* TheoraVideoClip::getTimer() -{ - return mTimer; -} - -void TheoraVideoClip::setTimer(TheoraTimer* timer) -{ - if (!timer) mTimer = mDefaultTimer; - else mTimer = timer; -} - -void TheoraVideoClip::resetFrameQueue() -{ - mFrameQueue->clear(); - mPlaybackIteration = mIteration = 0; -} - -void TheoraVideoClip::restart() -{ - mEndOfFile = true; //temp, to prevent threads to decode while restarting - mThreadAccessMutex->lock(); - _restart(); - mTimer->seek(0); - mFirstFrameDisplayed = false; - resetFrameQueue(); - mEndOfFile = false; - mRestarted = false; - mSeekFrame = -1; - mThreadAccessMutex->unlock(); -} - -void TheoraVideoClip::update(float timeDelta) -{ - if (mTimer->isPaused()) - { - mTimer->update(0); // update timer in case there is some code that needs to execute each frame - return; - } - float time = mTimer->getTime(), speed = mTimer->getSpeed(); - if (time + timeDelta * speed >= mDuration) - { - if (mAutoRestart && mRestarted) - { - float seekTime = time + timeDelta * speed; - for (;seekTime >= mDuration;) - { - seekTime -= mDuration; - ++mPlaybackIteration; - } - - mTimer->seek(seekTime); - } - else - { - if (time != mDuration) - { - mTimer->update((mDuration - time) / speed); - } - } - } - else - { - mTimer->update(timeDelta); - } -} - -float TheoraVideoClip::updateToNextFrame() -{ - TheoraVideoFrame* f = mFrameQueue->getFirstAvailableFrame(); - if (!f) return 0; - - float time = f->mTimeToDisplay - mTimer->getTime(); - update(time); - return time; -} - -TheoraFrameQueue* TheoraVideoClip::getFrameQueue() -{ - return mFrameQueue; -} - -void TheoraVideoClip::popFrame() -{ - ++mNumDisplayedFrames; - - // after transfering frame data to the texture, free the frame - // so it can be used again - if (!mFirstFrameDisplayed) - { - mFrameQueue->lock(); - mFrameQueue->_pop(1); - mFirstFrameDisplayed = true; - mFrameQueue->unlock(); - } - else - { - mFrameQueue->pop(); - } -} - -int TheoraVideoClip::getWidth() -{ - return mUseAlpha ? mWidth / 2 : mWidth; -} - -int TheoraVideoClip::getHeight() -{ - return mHeight; -} - -int TheoraVideoClip::getSubFrameWidth() -{ - return mUseAlpha ? mWidth / 2 : mSubFrameWidth; -} - -int TheoraVideoClip::getSubFrameHeight() -{ - return mUseAlpha ? mHeight : mSubFrameHeight; -} - -int TheoraVideoClip::getSubFrameOffsetX() -{ - return mUseAlpha ? 0 : mSubFrameOffsetX; -} - -int TheoraVideoClip::getSubFrameOffsetY() -{ - return mUseAlpha ? 0 : mSubFrameOffsetY; -} - -float TheoraVideoClip::getAbsPlaybackTime() -{ - return mTimer->getTime() + mPlaybackIteration * mDuration; -} - -int TheoraVideoClip::discardOutdatedFrames(float absTime) -{ - int nReady = mFrameQueue->_getReadyCount(); - // only drop frames if you have more frames to show. otherwise even the late frame will do.. - if (nReady == 1) return 0; - float time = absTime; - - int nPop = 0; - TheoraVideoFrame* frame; - float timeToDisplay; - - std::list<TheoraVideoFrame*>& queue = mFrameQueue->_getFrameQueue(); - foreach_l (TheoraVideoFrame*, queue) - { - frame = *it; - if (!frame->mReady) break; - timeToDisplay = frame->mTimeToDisplay + frame->mIteration * mDuration; - if (time > timeToDisplay + mFrameDuration) - { - ++nPop; - if (nReady - nPop == 1) break; // always leave at least one in the queue - } - else break; - } - - if (nPop > 0) - { -#define _DEBUG -#ifdef _DEBUG - std::string log = getName() + ": dropped frame "; - - int i = nPop; - foreach_l (TheoraVideoFrame*, queue) - { - log += str((int) (*it)->getFrameNumber()); - if (i-- > 1) - { - log += ", "; - } - else break; - } - th_writelog(log); -#endif - mNumDroppedFrames += nPop; - mFrameQueue->_pop(nPop); - } - - return nPop; -} - -TheoraVideoFrame* TheoraVideoClip::getNextFrame() -{ - TheoraVideoFrame* frame; - // if we are about to seek, then the current frame queue is invalidated - // (will be cleared when a worker thread does the actual seek) - if (mSeekFrame != -1) return NULL; - - mFrameQueue->lock(); - float time = getAbsPlaybackTime(); - discardOutdatedFrames(time); - - frame = mFrameQueue->_getFirstAvailableFrame(); - if (frame != NULL) - { - if (frame->mTimeToDisplay + frame->mIteration * mDuration > time && mFirstFrameDisplayed) - { - frame = NULL; // frame is ready but it's not yet time to display it, except when we haven't displayed any frames yet - } - } - - mFrameQueue->unlock(); - return frame; -} - -std::string TheoraVideoClip::getName() -{ - return mName; -} - -bool TheoraVideoClip::isBusy() -{ - return mAssignedWorkerThread || mOutputMode != mRequestedOutputMode; -} - -TheoraOutputMode TheoraVideoClip::getOutputMode() -{ - return mOutputMode; -} - -void TheoraVideoClip::setOutputMode(TheoraOutputMode mode) -{ - if (mode == TH_UNDEFINED) throw TheoraGenericException("Invalid output mode: TH_UNDEFINED for video: " + mName); - if (mOutputMode == mode) return; - mRequestedOutputMode = mode; - mUseAlpha = (mode == TH_RGBA || - mode == TH_ARGB || - mode == TH_BGRA || - mode == TH_ABGR || - mode == TH_GREY3A || - mode == TH_AGREY3 || - mode == TH_YUVA || - mode == TH_AYUV); - if (mAssignedWorkerThread) - { - mThreadAccessMutex->lock(); - // discard current frames and recreate them - mFrameQueue->setSize(mFrameQueue->getSize()); - mThreadAccessMutex->unlock(); - - } - mOutputMode = mRequestedOutputMode; -} - -float TheoraVideoClip::getTimePosition() -{ - return mTimer->getTime(); -} - -int TheoraVideoClip::getNumPrecachedFrames() -{ - return mFrameQueue->getSize(); -} - -void TheoraVideoClip::setNumPrecachedFrames(int n) -{ - if (mFrameQueue->getSize() != n) - mFrameQueue->setSize(n); -} - -int TheoraVideoClip::_getNumReadyFrames() -{ - if (mSeekFrame != -1) return 0; - return mFrameQueue->_getReadyCount(); -} - -int TheoraVideoClip::getNumReadyFrames() -{ - if (mSeekFrame != -1) return 0; // we are about to seek, consider frame queue empty even though it will be emptied upon seek - return mFrameQueue->getReadyCount(); -} - -float TheoraVideoClip::getDuration() -{ - return mDuration; -} - -float TheoraVideoClip::getFPS() -{ - return mFPS; -} - -void TheoraVideoClip::play() -{ - mTimer->play(); -} - -void TheoraVideoClip::pause() -{ - mTimer->pause(); -} - -bool TheoraVideoClip::isPaused() -{ - return mTimer->isPaused(); -} - -bool TheoraVideoClip::isDone() -{ - return mEndOfFile && !mFrameQueue->getFirstAvailableFrame(); -} - -void TheoraVideoClip::stop() -{ - pause(); - resetFrameQueue(); - mFirstFrameDisplayed = false; - seek(0); -} - -void TheoraVideoClip::setPlaybackSpeed(float speed) -{ - mTimer->setSpeed(speed); -} - -float TheoraVideoClip::getPlaybackSpeed() -{ - return mTimer->getSpeed(); -} - -void TheoraVideoClip::seek(float time) -{ - seekToFrame((int) (time * getFPS())); -} - -void TheoraVideoClip::seekToFrame(int frame) -{ - if (frame < 0) mSeekFrame = 0; - else if (frame > mNumFrames) mSeekFrame = mNumFrames; - else mSeekFrame = frame; - - mFirstFrameDisplayed = false; - mEndOfFile = false; -} - -void TheoraVideoClip::waitForCache(float desired_cache_factor, float max_wait_time) -{ - mWaitingForCache = true; - bool paused = mTimer->isPaused(); - if (!paused) mTimer->pause(); - int elapsed = 0; - int desired_num_precached_frames = (int) (desired_cache_factor * getNumPrecachedFrames()); - while (getNumReadyFrames() < desired_num_precached_frames) - { - _psleep(10); - elapsed += 10; - if (elapsed >= max_wait_time * 1000) break; - } - if (!paused) mTimer->play(); - mWaitingForCache = false; -} - -float TheoraVideoClip::getPriority() -{ - return mPriority; -} - -void TheoraVideoClip::setPriority(float priority) -{ - mPriority = priority; -} - -float TheoraVideoClip::getPriorityIndex() -{ - float priority = (float) getNumReadyFrames(); - if (mTimer->isPaused()) priority += getNumPrecachedFrames() / 2; - - return priority; -} - -void TheoraVideoClip::setAudioInterface(TheoraAudioInterface* iface) -{ - mAudioInterface = iface; - if (iface && !mAudioMutex) mAudioMutex = new TheoraMutex; - if (!iface && mAudioMutex) - { - delete mAudioMutex; - mAudioMutex = NULL; - } -} - -TheoraAudioInterface* TheoraVideoClip::getAudioInterface() -{ - return mAudioInterface; -} - -void TheoraVideoClip::setAudioGain(float gain) -{ - if (gain > 1) mAudioGain=1; - if (gain < 0) mAudioGain=0; - else mAudioGain=gain; -} - -float TheoraVideoClip::getAudioGain() -{ - return mAudioGain; -} - -void TheoraVideoClip::setAutoRestart(bool value) -{ - mAutoRestart = value; - if (value) mEndOfFile = false; -} diff --git a/drivers/theoraplayer/src/TheoraVideoFrame.cpp b/drivers/theoraplayer/src/TheoraVideoFrame.cpp deleted file mode 100644 index b70253dabf..0000000000 --- a/drivers/theoraplayer/src/TheoraVideoFrame.cpp +++ /dev/null @@ -1,159 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include <memory.h> -#include "TheoraPixelTransform.h" -#include "TheoraVideoClip.h" -#include "TheoraVideoFrame.h" -#include "TheoraVideoManager.h" - -//#define YUV_TEST // uncomment this if you want to benchmark YUV decoding functions - -extern "C" -{ -void decodeRGB (struct TheoraPixelTransform* t); -void decodeRGBA (struct TheoraPixelTransform* t); -void decodeRGBX (struct TheoraPixelTransform* t); -void decodeARGB (struct TheoraPixelTransform* t); -void decodeXRGB (struct TheoraPixelTransform* t); -void decodeBGR (struct TheoraPixelTransform* t); -void decodeBGRA (struct TheoraPixelTransform* t); -void decodeBGRX (struct TheoraPixelTransform* t); -void decodeABGR (struct TheoraPixelTransform* t); -void decodeXBGR (struct TheoraPixelTransform* t); -void decodeGrey (struct TheoraPixelTransform* t); -void decodeGrey3(struct TheoraPixelTransform* t); -void decodeGreyA(struct TheoraPixelTransform* t); -void decodeGreyX(struct TheoraPixelTransform* t); -void decodeAGrey(struct TheoraPixelTransform* t); -void decodeXGrey(struct TheoraPixelTransform* t); -void decodeYUV (struct TheoraPixelTransform* t); -void decodeYUVA (struct TheoraPixelTransform* t); -void decodeYUVX (struct TheoraPixelTransform* t); -void decodeAYUV (struct TheoraPixelTransform* t); -void decodeXYUV (struct TheoraPixelTransform* t); -} - -static void (*conversion_functions[])(struct TheoraPixelTransform*) = {0, - decodeRGB, - decodeRGBA, - decodeRGBX, - decodeARGB, - decodeXRGB, - decodeBGR, - decodeBGRA, - decodeBGRX, - decodeABGR, - decodeXBGR, - decodeGrey, - decodeGrey3, - decodeGreyA, - decodeGreyX, - decodeAGrey, - decodeXGrey, - decodeYUV, - decodeYUVA, - decodeYUVX, - decodeAYUV, - decodeXYUV -}; - -TheoraVideoFrame::TheoraVideoFrame(TheoraVideoClip* parent) -{ - mReady = mInUse = false; - mParent = parent; - mIteration = 0; - // number of bytes based on output mode - int bytemap[]={0, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4, 1, 3, 4, 4, 4, 4, 3, 4, 4, 4, 4}; - mBpp = bytemap[mParent->getOutputMode()]; - unsigned int size = mParent->getStride() * mParent->mHeight * mBpp; - try - { - mBuffer = new unsigned char[size]; - } - catch (std::bad_alloc) - { - mBuffer = NULL; - return; - } - memset(mBuffer, 255, size); -} - -TheoraVideoFrame::~TheoraVideoFrame() -{ - if (mBuffer) delete [] mBuffer; -} - -int TheoraVideoFrame::getWidth() -{ - return mParent->getWidth(); -} - -int TheoraVideoFrame::getStride() -{ - return mParent->mStride; -} - -int TheoraVideoFrame::getHeight() -{ - return mParent->getHeight(); -} - -unsigned char* TheoraVideoFrame::getBuffer() -{ - return mBuffer; -} - -void TheoraVideoFrame::decode(struct TheoraPixelTransform* t) -{ - if (t->raw != NULL) - { - int bufferStride = mParent->getWidth() * mBpp; - if (bufferStride == t->rawStride) - { - memcpy(mBuffer, t->raw, t->rawStride * mParent->getHeight()); - } - else - { - unsigned char *buff = mBuffer, *src = t->raw; - int i, h = mParent->getHeight(); - for (i = 0; i < h; ++i, buff += bufferStride, src += t->rawStride) - { - memcpy(buff, src, bufferStride); - } - } - } - else - { - t->out = mBuffer; - t->w = mParent->getWidth(); - t->h = mParent->getHeight(); - -#ifdef YUV_TEST // when benchmarking yuv conversion functions during development, do a timed average - #define N 1000 - clock_t time = clock(); - for (int i = 0; i < N; ++i) - { - conversion_functions[mParent->getOutputMode()](t); - } - float diff = (clock() - time) * 1000.0f / CLOCKS_PER_SEC; - - char s[128]; - sprintf(s, "%.2f", diff / N); - TheoraVideoManager::getSingleton().logMessage("YUV Decoding time: " + std::string(s) + " ms\n"); -#else - conversion_functions[mParent->getOutputMode()](t); -#endif - } - mReady = true; -} - -void TheoraVideoFrame::clear() -{ - mInUse = mReady = false; -} diff --git a/drivers/theoraplayer/src/TheoraVideoManager.cpp b/drivers/theoraplayer/src/TheoraVideoManager.cpp deleted file mode 100644 index 53b211374a..0000000000 --- a/drivers/theoraplayer/src/TheoraVideoManager.cpp +++ /dev/null @@ -1,485 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include "TheoraVideoManager.h" -#include "TheoraWorkerThread.h" -#include "TheoraVideoClip.h" -#include "TheoraFrameQueue.h" -#include "TheoraAudioInterface.h" -#include "TheoraUtil.h" -#include "TheoraDataSource.h" -#include "TheoraException.h" -#ifdef __THEORA - #include <theora/codec.h> - #include <vorbis/codec.h> - #include "TheoraVideoClip_Theora.h" -#endif -#ifdef __AVFOUNDATION - #include "TheoraVideoClip_AVFoundation.h" -#endif -#ifdef __FFMPEG - #include "TheoraVideoClip_FFmpeg.h" -#endif -#ifdef _ANDROID //libtheoraplayer addition for cpu feature detection - #include "cpu-features.h" -#endif -// declaring function prototype here so I don't have to put it in a header file -// it only needs to be used by this plugin and called once -extern "C" -{ - void initYUVConversionModule(); -} - -#include "core/os/memory.h" - -//#define _DECODING_BENCHMARK //uncomment to test average decoding time on a given device - - -// -------------------------- -//#define _SCHEDULING_DEBUG -#ifdef _SCHEDULING_DEBUG -float gThreadDiagnosticTimer = 0; -#endif -// -------------------------- - -#ifdef _DECODING_BENCHMARK -void benchmark(TheoraVideoClip* clip) -{ - int nPrecached = 256; - int n = nPrecached; - char msg[1024]; - clock_t t = clock(); - while (n > 0) - { - clip->waitForCache(1.0f, 1000000); - n -= 32; - clip->getFrameQueue()->clear(); - } - float diff = ((float) (clock() - t) * 1000.0f) / CLOCKS_PER_SEC; - sprintf(msg, "BENCHMARK: %s: Decoding %d frames took %.1fms (%.2fms average per frame)\n",clip->getName().c_str(), nPrecached, diff, diff / nPrecached); - TheoraVideoManager::getSingleton().logMessage(msg); - clip->seek(0); -} -#endif - -struct TheoraWorkCandidate -{ - TheoraVideoClip* clip; - float priority, queuedTime, workTime, entitledTime; -}; - -TheoraVideoManager* g_ManagerSingleton = NULL; - -void theora_writelog(std::string output) -{ - printf("%s\n", output.c_str()); -} - -void (*g_LogFuction)(std::string) = theora_writelog; - -void TheoraVideoManager::setLogFunction(void (*fn)(std::string)) -{ - g_LogFuction = fn; -} - -TheoraVideoManager* TheoraVideoManager::getSingletonPtr() -{ - return g_ManagerSingleton; -} - -TheoraVideoManager& TheoraVideoManager::getSingleton() -{ - return *g_ManagerSingleton; -} - -TheoraVideoManager::TheoraVideoManager(int num_worker_threads) : - mDefaultNumPrecachedFrames(8) -{ - if (num_worker_threads < 1) throw TheoraGenericException("Unable to create TheoraVideoManager, at least one worker thread is reqired"); - - g_ManagerSingleton = this; - - std::string msg = "Initializing Theora Playback Library (" + getVersionString() + ")\n"; -#ifdef __THEORA - msg += " - libtheora version: " + std::string(th_version_string()) + "\n" + - " - libvorbis version: " + std::string(vorbis_version_string()) + "\n"; -#endif -#ifdef _ANDROID - uint64_t features = android_getCpuFeaturesExt(); - char s[128]; - sprintf(s, " - Android: CPU Features: %u\n", (unsigned int) features); - msg += s; - if ((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0) - msg += " - Android: NEON features NOT SUPPORTED by CPU\n"; - else - msg += " - Android: Detected NEON CPU features\n"; -#endif - -#ifdef __AVFOUNDATION - msg += " - using Apple AVFoundation classes.\n"; -#endif -#ifdef __FFMPEG - msg += " - using FFmpeg library.\n"; -#endif - - logMessage(msg + "------------------------------------"); - mAudioFactory = NULL; - mWorkMutex = new TheoraMutex(); - - // for CPU based yuv2rgb decoding - initYUVConversionModule(); - - createWorkerThreads(num_worker_threads); -} - -TheoraVideoManager::~TheoraVideoManager() -{ - destroyWorkerThreads(); - - mWorkMutex->lock(); - ClipList::iterator ci; - for (ci = mClips.begin(); ci != mClips.end(); ++ci) - delete (*ci); - mClips.clear(); - mWorkMutex->unlock(); - delete mWorkMutex; -} - -void TheoraVideoManager::logMessage(std::string msg) -{ - g_LogFuction(msg); -} - -TheoraVideoClip* TheoraVideoManager::getVideoClipByName(std::string name) -{ - TheoraVideoClip* clip = NULL; - mWorkMutex->lock(); - - foreach(TheoraVideoClip*, mClips) - { - if ((*it)->getName() == name) - { - clip = *it; - break; - } - } - mWorkMutex->unlock(); - - return clip; -} - -void TheoraVideoManager::setAudioInterfaceFactory(TheoraAudioInterfaceFactory* factory) -{ - mAudioFactory = factory; -} - -TheoraAudioInterfaceFactory* TheoraVideoManager::getAudioInterfaceFactory() -{ - return mAudioFactory; -} - -TheoraVideoClip* TheoraVideoManager::createVideoClip(std::string filename, - TheoraOutputMode output_mode, - int numPrecachedOverride, - bool usePower2Stride, - int p_track) -{ - TheoraDataSource* src=memnew(TheoraFileDataSource(filename)); - return createVideoClip(src,output_mode,numPrecachedOverride,usePower2Stride, p_track); -} - -TheoraVideoClip* TheoraVideoManager::createVideoClip(TheoraDataSource* data_source, - TheoraOutputMode output_mode, - int numPrecachedOverride, - bool usePower2Stride, - int p_audio_track) -{ - mWorkMutex->lock(); - - TheoraVideoClip* clip = NULL; - int nPrecached = numPrecachedOverride ? numPrecachedOverride : mDefaultNumPrecachedFrames; - logMessage("Creating video from data source: " + data_source->repr() + " [" + str(nPrecached) + " precached frames]."); - -#ifdef __AVFOUNDATION - TheoraFileDataSource* fileDataSource = dynamic_cast<TheoraFileDataSource*>(data_source); - std::string filename; - if (fileDataSource == NULL) - { - TheoraMemoryFileDataSource* memoryDataSource = dynamic_cast<TheoraMemoryFileDataSource*>(data_source); - if (memoryDataSource != NULL) filename = memoryDataSource->getFilename(); - // if the user has his own data source, it's going to be a problem for AVAssetReader since it only supports reading from files... - } - else filename = fileDataSource->getFilename(); - - if (filename.size() > 4 && filename.substr(filename.size() - 4, filename.size()) == ".mp4") - { - clip = new TheoraVideoClip_AVFoundation(data_source, output_mode, nPrecached, usePower2Stride); - } -#endif -#if defined(__AVFOUNDATION) && defined(__THEORA) - else -#endif -#ifdef __THEORA - clip = new TheoraVideoClip_Theora(data_source, output_mode, nPrecached, usePower2Stride); -#endif -#ifdef __FFMPEG - clip = new TheoraVideoClip_FFmpeg(data_source, output_mode, nPrecached, usePower2Stride); -#endif - - clip->set_audio_track(p_audio_track); - clip->load(data_source); - clip->decodeNextFrame(); // ensure the first frame is always preloaded and have the main thread do it to prevent potential thread starvatio - - mClips.push_back(clip); - mWorkMutex->unlock(); - -#ifdef _DECODING_BENCHMARK - benchmark(clip); -#endif - return clip; -} - -void TheoraVideoManager::destroyVideoClip(TheoraVideoClip* clip) -{ - if (clip) - { - th_writelog("Destroying video clip: " + clip->getName()); - mWorkMutex->lock(); - bool reported = 0; - while (clip->mAssignedWorkerThread) - { - if (!reported) - { - th_writelog(" - Waiting for WorkerThread to finish decoding in order to destroy"); - reported = 1; - } - _psleep(1); - } - if (reported) th_writelog(" - WorkerThread done, destroying..."); - - // erase the clip from the clip list - foreach (TheoraVideoClip*, mClips) - { - if ((*it) == clip) - { - mClips.erase(it); - break; - } - } - // remove all it's references from the work log - mWorkLog.remove(clip); - - // delete the actual clip - delete clip; -#ifdef _DEBUG - th_writelog("Destroyed video."); -#endif - mWorkMutex->unlock(); - } -} - -TheoraVideoClip* TheoraVideoManager::requestWork(TheoraWorkerThread* caller) -{ - if (!mWorkMutex) return NULL; - mWorkMutex->lock(); - - TheoraVideoClip* selectedClip = NULL; - float maxQueuedTime = 0, totalAccessCount = 0, prioritySum = 0, diff, maxDiff = -1; - int nReadyFrames; - std::vector<TheoraWorkCandidate> candidates; - TheoraVideoClip* clip; - TheoraWorkCandidate candidate; - - // first pass is for playing videos, but if no such videos are available for decoding - // paused videos are selected in the second pass. - // Note that paused videos that are waiting for cache are considered equal to playing - // videos in the scheduling context - - for (int i = 0; i < 2 && candidates.size() == 0; ++i) - { - foreach (TheoraVideoClip*, mClips) - { - clip = *it; - if (clip->isBusy() || (i == 0 && clip->isPaused() && !clip->mWaitingForCache)) continue; - nReadyFrames = clip->getNumReadyFrames(); - if (nReadyFrames == clip->getFrameQueue()->getSize()) continue; - - candidate.clip = clip; - candidate.priority = clip->getPriority(); - candidate.queuedTime = (float) nReadyFrames / (clip->getFPS() * clip->getPlaybackSpeed()); - candidate.workTime = (float) clip->mThreadAccessCount; - - totalAccessCount += candidate.workTime; - if (maxQueuedTime < candidate.queuedTime) maxQueuedTime = candidate.queuedTime; - - candidates.push_back(candidate); - } - } - - // prevent division by zero - if (totalAccessCount == 0) totalAccessCount = 1; - if (maxQueuedTime == 0) maxQueuedTime = 1; - - // normalize candidate values - foreach (TheoraWorkCandidate, candidates) - { - it->workTime /= totalAccessCount; - // adjust user priorities to favor clips that have fewer frames queued - it->priority *= 1.0f - (it->queuedTime / maxQueuedTime) * 0.5f; - prioritySum += it->priority; - } - foreach (TheoraWorkCandidate, candidates) - { - it->entitledTime = it->priority / prioritySum; - } - - // now, based on how much access time has been given to each clip in the work log - // and how much time should be given to each clip based on calculated priorities, - // we choose a best suited clip for this worker thread to decode next - foreach (TheoraWorkCandidate, candidates) - { - diff = it->entitledTime - it->workTime; - - if (maxDiff < diff) - { - maxDiff = diff; - selectedClip = it->clip; - } - } - - if (selectedClip) - { - selectedClip->mAssignedWorkerThread = caller; - - int nClips = (int) mClips.size(); - unsigned int maxWorkLogSize = (nClips - 1) * 50; - - if (nClips > 1) - { - mWorkLog.push_front(selectedClip); - ++selectedClip->mThreadAccessCount; - } - - TheoraVideoClip* c; - while (mWorkLog.size() > maxWorkLogSize) - { - c = mWorkLog.back(); - mWorkLog.pop_back(); - c->mThreadAccessCount--; - } -#ifdef _SCHEDULING_DEBUG - if (mClips.size() > 1) - { - int accessCount = mWorkLog.size(); - if (gThreadDiagnosticTimer > 2.0f) - { - gThreadDiagnosticTimer = 0; - std::string logstr = "-----\nTheora Playback Library debug CPU time analysis (" + str(accessCount) + "):\n"; - int percent; - foreach (TheoraVideoClip*, mClips) - { - percent = ((float) (*it)->mThreadAccessCount / mWorkLog.size()) * 100.0f; - logstr += (*it)->getName() + " (" + str((*it)->getPriority()) + "): " + str((*it)->mThreadAccessCount) + ", " + str(percent) + "%\n"; - } - logstr += "-----"; - th_writelog(logstr); - } - } -#endif - } - - mWorkMutex->unlock(); - return selectedClip; -} - -void TheoraVideoManager::update(float timeDelta) -{ - mWorkMutex->lock(); - foreach (TheoraVideoClip*, mClips) - { - (*it)->update(timeDelta); - (*it)->decodedAudioCheck(); - } - mWorkMutex->unlock(); -#ifdef _SCHEDULING_DEBUG - gThreadDiagnosticTimer += timeDelta; -#endif -} - -int TheoraVideoManager::getNumWorkerThreads() -{ - return (int) mWorkerThreads.size(); -} - -void TheoraVideoManager::createWorkerThreads(int n) -{ - TheoraWorkerThread* t; - for (int i=0;i<n;++i) - { - t=new TheoraWorkerThread(); - t->start(); - mWorkerThreads.push_back(t); - } -} - -void TheoraVideoManager::destroyWorkerThreads() -{ - foreach(TheoraWorkerThread*,mWorkerThreads) - { - (*it)->join(); - delete (*it); - } - mWorkerThreads.clear(); -} - -void TheoraVideoManager::setNumWorkerThreads(int n) -{ - if (n == getNumWorkerThreads()) return; - if (n < 1) throw TheoraGenericException("Unable to change the number of worker threads in TheoraVideoManager, at least one worker thread is reqired"); - - th_writelog("changing number of worker threats to: "+str(n)); - - destroyWorkerThreads(); - createWorkerThreads(n); -} - -std::string TheoraVideoManager::getVersionString() -{ - int a, b, c; - getVersion(&a, &b, &c); - std::string out = str(a) + "." + str(b); - if (c != 0) - { - if (c < 0) out += " RC" + str(-c); - else out += "." + str(c); - } - return out; -} - -void TheoraVideoManager::getVersion(int* a, int* b, int* c) // TODO, return a struct instead of the current solution. -{ - *a = 1; - *b = 1; - *c = 0; -} - -std::vector<std::string> TheoraVideoManager::getSupportedDecoders() -{ - std::vector<std::string> lst; -#ifdef __THEORA - lst.push_back("Theora"); -#endif -#ifdef __AVFOUNDATION - lst.push_back("AVFoundation"); -#endif -#ifdef __FFMPEG - lst.push_back("FFmpeg"); -#endif - - return lst; -} diff --git a/drivers/theoraplayer/src/TheoraWorkerThread.cpp b/drivers/theoraplayer/src/TheoraWorkerThread.cpp deleted file mode 100644 index cef8545b8d..0000000000 --- a/drivers/theoraplayer/src/TheoraWorkerThread.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifdef _WIN32 -#pragma warning( disable: 4251 ) // MSVC++ -#endif -#include "TheoraWorkerThread.h" -#include "TheoraVideoManager.h" -#include "TheoraVideoClip.h" -#include "TheoraUtil.h" - -TheoraWorkerThread::TheoraWorkerThread() : TheoraThread() -{ - mClip = NULL; -} - -TheoraWorkerThread::~TheoraWorkerThread() -{ - -} - -void TheoraWorkerThread::execute() -{ - while (isRunning()) - { - mClip = TheoraVideoManager::getSingleton().requestWork(this); - if (!mClip) - { - _psleep(100); - continue; - } - - mClip->mThreadAccessMutex->lock(); - // if user requested seeking, do that then. - if (mClip->mSeekFrame >= 0) mClip->doSeek(); - - if (!mClip->decodeNextFrame()) - _psleep(1); // this happens when the video frame queue is full. - - mClip->mAssignedWorkerThread = NULL; - mClip->mThreadAccessMutex->unlock(); - mClip = NULL; - } -} diff --git a/drivers/theoraplayer/src/YUV/C/yuv420_grey_c.c b/drivers/theoraplayer/src/YUV/C/yuv420_grey_c.c deleted file mode 100644 index 8af5dd1f58..0000000000 --- a/drivers/theoraplayer/src/YUV/C/yuv420_grey_c.c +++ /dev/null @@ -1,56 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include "yuv_util.h" - -static void _decodeGrey3(struct TheoraPixelTransform* t, int stride, int nBytes) -{ - unsigned char *ySrc = t->y, *yLineEnd, *out = t->out; - unsigned int y; - for (y = 0; y < t->h; ++y, ySrc += t->yStride - t->w, out += stride-t->w * nBytes) - for (yLineEnd = ySrc + t->w; ySrc != yLineEnd; ++ySrc, out += nBytes) - out[0] = out[1] = out[2] = *ySrc; -} - -void decodeGrey(struct TheoraPixelTransform* t) -{ - unsigned char *ySrc = t->y, *yLineEnd, *out = t->out; - unsigned int y; - for (y = 0; y < t->h; ++y, ySrc += t->yStride - t->w) - for (yLineEnd = ySrc + t->w; ySrc != yLineEnd; ++ySrc, ++out) - *out = *ySrc; - -} - -void decodeGrey3(struct TheoraPixelTransform* t) -{ - _decodeGrey3(t, t->w * 3, 3); -} - -void decodeGreyA(struct TheoraPixelTransform* t) -{ - _decodeGrey3(t, t->w * 4, 4); - _decodeAlpha(incOut(t, 3), t->w * 4); -} - -void decodeGreyX(struct TheoraPixelTransform* t) -{ - _decodeGrey3(t, t->w * 4, 4); -} - -void decodeAGrey(struct TheoraPixelTransform* t) -{ - _decodeGrey3(incOut(t, 1), t->w * 4, 4); - _decodeAlpha(t, t->w * 4); -} - -void decodeXGrey(struct TheoraPixelTransform* t) -{ - _decodeGrey3(incOut(t, 1), t->w * 4, 4); -} - diff --git a/drivers/theoraplayer/src/YUV/C/yuv420_rgb_c.c b/drivers/theoraplayer/src/YUV/C/yuv420_rgb_c.c deleted file mode 100644 index e981e75ead..0000000000 --- a/drivers/theoraplayer/src/YUV/C/yuv420_rgb_c.c +++ /dev/null @@ -1,358 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifdef _YUV_C -#include "yuv_util.h" - -int YTable [256]; -int BUTable[256]; -int GUTable[256]; -int GVTable[256]; -int RVTable[256]; - -#define CLIP_RGB_COLOR(dst, x) \ - tmp = (x) >> 13;\ - if ((tmp & ~0xFF) == 0) dst = tmp;\ - else dst = (-tmp) >> 31; - -#define _decodeRGB(t, stride, nBytes, maxWidth, i1, i2, i3, j1, j2, j3)\ - register int tmp;\ - int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;\ - unsigned int y;\ - unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;\ - \ - for (y = 0; y < t->h; y += 2)\ - {\ - ySrcEven = t->y + y * t->yStride;\ - ySrcOdd = t->y + (y + 1) * t->yStride;\ - uSrc = t->u + y * t->uStride / 2;\ - vSrc = t->v + y * t->vStride / 2;\ - out1 = t->out + y * stride;\ - out2 = t->out + (y + 1) * stride;\ - \ - for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)\ - {\ - cu = *uSrc; ++uSrc;\ - cv = *vSrc; ++vSrc;\ - rV = RVTable[cv];\ - gUV = GUTable[cu] + GVTable[cv];\ - bU = BUTable[cu];\ - \ - rgbY1 = YTable[*ySrcEven]; ++ySrcEven;\ - rgbY2 = YTable[*ySrcOdd]; ++ySrcOdd;\ - rgbY3 = YTable[*ySrcEven]; ++ySrcEven;\ - rgbY4 = YTable[*ySrcOdd]; ++ySrcOdd;\ - \ - CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );\ - CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);\ - CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );\ - \ - CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );\ - CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);\ - CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );\ - \ - CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );\ - CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);\ - CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );\ - \ - CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );\ - CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);\ - CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );\ - \ - out1 += nBytes2; out2 += nBytes2;\ - }\ - } - -// The 'trick' with this function is that it skips decoding YUV pixels if the alpha value is 0, thus improving the decoding speed of a frame -#define _decodeRGBA(t, stride, nBytes, maxWidth, i1, i2, i3, j1, j2, j3, aindex1, aindex2)\ -\ - register int tmp;\ - int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, a1, a2, a3, a4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth;\ - int alphaStride = t->w;\ - unsigned int y;\ - unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2;\ - \ - for (y = 0; y < t->h; y += 2)\ - {\ - ySrcEven = t->y + y * t->yStride;\ - ySrcOdd = t->y + (y + 1) * t->yStride;\ - uSrc = t->u + y * t->uStride / 2;\ - vSrc = t->v + y * t->vStride / 2;\ - out1 = t->out + y * stride;\ - out2 = t->out + (y + 1) * stride;\ - \ - for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;)\ - {\ - cu = *uSrc; ++uSrc;\ - cv = *vSrc; ++vSrc;\ - rV = RVTable[cv];\ - gUV = GUTable[cu] + GVTable[cv];\ - bU = BUTable[cu];\ - \ - rgbY1 = YTable[*ySrcEven]; a1 = ySrcEven[alphaStride]; ++ySrcEven;\ - rgbY2 = YTable[*ySrcOdd]; a2 = ySrcOdd [alphaStride]; ++ySrcOdd;\ - rgbY3 = YTable[*ySrcEven]; a3 = ySrcEven[alphaStride]; ++ySrcEven;\ - rgbY4 = YTable[*ySrcOdd]; a4 = ySrcOdd [alphaStride]; ++ySrcOdd;\ - \ - if (a1 > 16)\ - {\ - CLIP_RGB_COLOR(out1[i1], rgbY1 + rV );\ - CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV);\ - CLIP_RGB_COLOR(out1[i3], rgbY1 + bU );\ - out1[aindex1] = a1 >= 235 ? 255 : (unsigned char) (((a1 - 16) * 255) / 219);\ - }\ - else *((unsigned int*) out1) = 0;\ - \ - if (a2 > 16)\ - {\ - CLIP_RGB_COLOR(out2[i1], rgbY2 + rV );\ - CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV);\ - CLIP_RGB_COLOR(out2[i3], rgbY2 + bU );\ - out2[aindex1] = a2 >= 235 ? 255 : (unsigned char) (((a2 - 16) * 255) / 219);\ - }\ - else *((unsigned int*) out2) = 0;\ - \ - if (a3 > 16)\ - {\ - CLIP_RGB_COLOR(out1[j1], rgbY3 + rV );\ - CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV);\ - CLIP_RGB_COLOR(out1[j3], rgbY3 + bU );\ - out1[aindex2] = a3 >= 235 ? 255 : (unsigned char) (((a3 - 16) * 255) / 219);\ - }\ - else *((unsigned int*) &out1[4]) = 0;\ - \ - if (a4 > 16)\ - {\ - CLIP_RGB_COLOR(out2[j1], rgbY4 + rV );\ - CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV);\ - CLIP_RGB_COLOR(out2[j3], rgbY4 + bU );\ - out2[aindex2] = a4 >= 235 ? 255 : (unsigned char) (((a4 - 16) * 255) / 219);\ - }\ - else *((unsigned int*) &out2[4]) = 0;\ - \ - out1 += nBytes2; out2 += nBytes2;\ - }\ - }\ - -void decodeRGB(struct TheoraPixelTransform* t) -{ - _decodeRGB(t, t->w * 3, 3, 0, 0, 1, 2, 3, 4, 5); -} - -void decodeRGBA(struct TheoraPixelTransform* t) -{ - _decodeRGBA(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6, 3, 7); -// This is the old 2-phase version, leaving it here in case more debugging is needed -// _decodeRGB(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6); -// _decodeAlpha(incOut(t, 3), t->w * 4); -} - -void decodeRGBX(struct TheoraPixelTransform* t) -{ - _decodeRGB(t, t->w * 4, 4, 0, 0, 1, 2, 4, 5, 6); -} - -void decodeARGB(struct TheoraPixelTransform* t) -{ - _decodeRGBA(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7, 0, 4); -// This is the old 2-phase version, leaving it here in case more debugging is needed -// _decodeRGB(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7); -// _decodeAlpha(t, t->w * 4); -} - -void decodeXRGB(struct TheoraPixelTransform* t) -{ - _decodeRGB(t, t->w * 4, 4, 0, 1, 2, 3, 5, 6, 7); -} - -void decodeBGR(struct TheoraPixelTransform* t) -{ - _decodeRGB(t, t->w * 3, 3, 0, 2, 1, 0, 5, 4, 3); -} - -void decodeBGRA(struct TheoraPixelTransform* t) -{ - _decodeRGBA(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4, 3, 7); -// This is the old 2-phase version, leaving it here in case more debugging is needed -// _decodeRGB(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4); -// _decodeAlpha(incOut(t, 3), t->w * 4); -} - -void decodeBGRX(struct TheoraPixelTransform* t) -{ - _decodeRGB(t, t->w * 4, 4, 0, 2, 1, 0, 6, 5, 4); -} - -void decodeABGR(struct TheoraPixelTransform* t) -{ - _decodeRGBA(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5, 0, 4); -// This is the old 2-phase version, leaving it here in case more debugging is needed -// _decodeRGB(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5); -// _decodeAlpha(t, t->w * 4); -} - -void decodeXBGR(struct TheoraPixelTransform* t) -{ - _decodeRGB(t, t->w * 4, 4, 0, 3, 2, 1, 7, 6, 5); -} - -void initYUVConversionModule() -{ - //used to bring the table into the high side (scale up) so we - //can maintain high precision and not use floats (FIXED POINT) - - // this is the pseudocode for yuv->rgb conversion - // r = 1.164*(*ySrc - 16) + 1.596*(cv - 128); - // b = 1.164*(*ySrc - 16) + 2.018*(cu - 128); - // g = 1.164*(*ySrc - 16) - 0.813*(cv - 128) - 0.391*(cu - 128); - - double scale = 1L << 13, temp; - - int i; - for (i = 0; i < 256; ++i) - { - temp = i - 128; - - YTable[i] = (int)((1.164 * scale + 0.5) * (i - 16)); //Calc Y component - RVTable[i] = (int)((1.596 * scale + 0.5) * temp); //Calc R component - GUTable[i] = (int)((0.391 * scale + 0.5) * temp); //Calc G u & v components - GVTable[i] = (int)((0.813 * scale + 0.5) * temp); - BUTable[i] = (int)((2.018 * scale + 0.5) * temp); //Calc B component - } -} - -/* - * Below are the function versions of the above macros, use those for debugging, but leave the macros for maximum CPU execution speed - * - * - * - * - -void _decodeRGB(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth, int i1, int i2, int i3, int j1, int j2, int j3) -{ - register int tmp; - int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth; - unsigned int y; - unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2; - - for (y = 0; y < t->h; y += 2) - { - ySrcEven = t->y + y * t->yStride; - ySrcOdd = t->y + (y + 1) * t->yStride; - uSrc = t->u + y * t->uStride / 2; - vSrc = t->v + y * t->vStride / 2; - out1 = t->out + y * stride; - out2 = t->out + (y + 1) * stride; - - for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;) - { - cu = *uSrc; ++uSrc; - cv = *vSrc; ++vSrc; - rV = RVTable[cv]; - gUV = GUTable[cu] + GVTable[cv]; - bU = BUTable[cu]; - - rgbY1 = YTable[*ySrcEven]; ++ySrcEven; - rgbY2 = YTable[*ySrcOdd]; ++ySrcOdd; - rgbY3 = YTable[*ySrcEven]; ++ySrcEven; - rgbY4 = YTable[*ySrcOdd]; ++ySrcOdd; - - CLIP_RGB_COLOR(out1[i1], rgbY1 + rV ); - CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV); - CLIP_RGB_COLOR(out1[i3], rgbY1 + bU ); - - CLIP_RGB_COLOR(out2[i1], rgbY2 + rV ); - CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV); - CLIP_RGB_COLOR(out2[i3], rgbY2 + bU ); - - CLIP_RGB_COLOR(out1[j1], rgbY3 + rV ); - CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV); - CLIP_RGB_COLOR(out1[j3], rgbY3 + bU ); - - CLIP_RGB_COLOR(out2[j1], rgbY4 + rV ); - CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV); - CLIP_RGB_COLOR(out2[j3], rgbY4 + bU ); - - out1 += nBytes2; out2 += nBytes2; - } - } -} - -void _decodeRGBA(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth, int i1, int i2, int i3, int j1, int j2, int j3, int aindex1, int aindex2) -{ - register int tmp; - int nBytes2 = nBytes * 2, cv, cu, rgbY1, rgbY2, rgbY3, rgbY4, a1, a2, a3, a4, rV, gUV, bU, width = maxWidth == 0 ? t->w : maxWidth; - int alphaStride = t->w; - unsigned int y; - unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2; - - for (y = 0; y < t->h; y += 2) - { - ySrcEven = t->y + y * t->yStride; - ySrcOdd = t->y + (y + 1) * t->yStride; - uSrc = t->u + y * t->uStride / 2; - vSrc = t->v + y * t->vStride / 2; - out1 = t->out + y * stride; - out2 = t->out + (y + 1) * stride; - - for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;) - { - cu = *uSrc; ++uSrc; - cv = *vSrc; ++vSrc; - rV = RVTable[cv]; - gUV = GUTable[cu] + GVTable[cv]; - bU = BUTable[cu]; - - rgbY1 = YTable[*ySrcEven]; a1 = ySrcEven[alphaStride]; ++ySrcEven; - rgbY2 = YTable[*ySrcOdd]; a2 = ySrcOdd [alphaStride]; ++ySrcOdd; - rgbY3 = YTable[*ySrcEven]; a3 = ySrcEven[alphaStride]; ++ySrcEven; - rgbY4 = YTable[*ySrcOdd]; a4 = ySrcOdd [alphaStride]; ++ySrcOdd; - - if (a1 >= 32) - { - CLIP_RGB_COLOR(out1[i1], rgbY1 + rV ); - CLIP_RGB_COLOR(out1[i2], rgbY1 - gUV); - CLIP_RGB_COLOR(out1[i3], rgbY1 + bU ); - out1[aindex1] = a1 > 224 ? 255 : a1; - } - else *((unsigned int*) out1) = 0; - - if (a2 >= 32) - { - CLIP_RGB_COLOR(out2[i1], rgbY2 + rV ); - CLIP_RGB_COLOR(out2[i2], rgbY2 - gUV); - CLIP_RGB_COLOR(out2[i3], rgbY2 + bU ); - out2[aindex1] = a2 > 224 ? 255 : a2; - } - else *((unsigned int*) out2) = 0; - - - if (a3 >= 32) - { - CLIP_RGB_COLOR(out1[j1], rgbY3 + rV ); - CLIP_RGB_COLOR(out1[j2], rgbY3 - gUV); - CLIP_RGB_COLOR(out1[j3], rgbY3 + bU ); - out1[aindex2] = a3 > 224 ? 255 : a3; - } - else *((unsigned int*) &out1[4]) = 0; - - if (a4 >= 32) - { - CLIP_RGB_COLOR(out2[j1], rgbY4 + rV ); - CLIP_RGB_COLOR(out2[j2], rgbY4 - gUV); - CLIP_RGB_COLOR(out2[j3], rgbY4 + bU ); - out2[aindex2] = a4 > 224 ? 255 : a4; - } - else *((unsigned int*) &out2[4]) = 0; - - out1 += nBytes2; out2 += nBytes2; - } - } -} -*/ -#endif diff --git a/drivers/theoraplayer/src/YUV/C/yuv420_yuv_c.c b/drivers/theoraplayer/src/YUV/C/yuv420_yuv_c.c deleted file mode 100644 index fea74eca71..0000000000 --- a/drivers/theoraplayer/src/YUV/C/yuv420_yuv_c.c +++ /dev/null @@ -1,86 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include "yuv_util.h" - -static void _decodeYUV(struct TheoraPixelTransform* t, int stride, int nBytes, int maxWidth) -{ - int cv, cu, y1, y2, y3, y4, width = maxWidth == 0 ? t->w : maxWidth; - unsigned char *ySrcEven, *ySrcOdd, *yLineEnd, *uSrc, *vSrc, *out1, *out2; - unsigned int y; - - for (y=0; y < t->h; y += 2) - { - ySrcEven = t->y + y * t->yStride; - ySrcOdd = t->y + (y + 1) * t->yStride; - uSrc = t->u + y * t->uStride / 2; - vSrc = t->v + y * t->vStride / 2; - out1 = t->out + y * stride; - out2 = t->out + (y + 1) * stride; - - for (yLineEnd = ySrcEven + width; ySrcEven != yLineEnd;) - { - // EVEN columns - cu = *uSrc; ++uSrc; - cv = *vSrc; ++vSrc; - - y1 = *ySrcEven; ++ySrcEven; - y2 = *ySrcOdd; ++ySrcOdd; - y3 = *ySrcEven; ++ySrcEven; - y4 = *ySrcOdd; ++ySrcOdd; - - // EVEN columns - out1[0] = y1; - out1[1] = cu; - out1[2] = cv; - - out2[0] = y2; - out2[1] = cu; - out2[2] = cv; - - out1 += nBytes; out2 += nBytes; - // ODD columns - out1[0] = y3; - out1[1] = cu; - out1[2] = cv; - - out2[0] = y4; - out2[1] = cu; - out2[2] = cv; - out1 += nBytes; out2 += nBytes; - } - } -} - -void decodeYUV(struct TheoraPixelTransform* t) -{ - _decodeYUV(t, t->w * 3, 3, 0); -} - -void decodeYUVA(struct TheoraPixelTransform* t) -{ - _decodeYUV(t, t->w * 4, 4, 0); - _decodeAlpha(incOut(t, 3), t->w * 4); -} - -void decodeYUVX(struct TheoraPixelTransform* t) -{ - _decodeYUV(t, t->w * 4, 4, 0); -} - -void decodeAYUV(struct TheoraPixelTransform* t) -{ - _decodeYUV(incOut(t, 1), t->w * 4, 4, 0); - _decodeAlpha(t, t->w * 4); -} - -void decodeXYUV(struct TheoraPixelTransform* t) -{ - _decodeYUV(incOut(t, 1), t->w * 4, 4, 0); -} - diff --git a/drivers/theoraplayer/src/YUV/android/cpu-features.c b/drivers/theoraplayer/src/YUV/android/cpu-features.c deleted file mode 100644 index 623dc94e0e..0000000000 --- a/drivers/theoraplayer/src/YUV/android/cpu-features.c +++ /dev/null @@ -1,1095 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* ChangeLog for this library: - * - * NDK r8d: Add android_setCpu(). - * - * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16, - * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt. - * - * Rewrite the code to parse /proc/self/auxv instead of - * the "Features" field in /proc/cpuinfo. - * - * Dynamically allocate the buffer that hold the content - * of /proc/cpuinfo to deal with newer hardware. - * - * NDK r7c: Fix CPU count computation. The old method only reported the - * number of _active_ CPUs when the library was initialized, - * which could be less than the real total. - * - * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7 - * for an ARMv6 CPU (see below). - * - * Handle kernels that only report 'neon', and not 'vfpv3' - * (VFPv3 is mandated by the ARM architecture is Neon is implemented) - * - * Handle kernels that only report 'vfpv3d16', and not 'vfpv3' - * - * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in - * android_getCpuFamily(). - * - * NDK r4: Initial release - */ - -#if 0 - -#ifdef _ANDROID -#if defined(__le32__) - -// When users enter this, we should only provide interface and -// libportable will give the implementations. - -#else // !__le32__ - -#include <sys/system_properties.h> -#include <pthread.h> -#include "cpu-features.h" -#include <stdio.h> -#include <stdlib.h> -#include <fcntl.h> -#include <errno.h> - -static pthread_once_t g_once; -static int g_inited; -static AndroidCpuFamily g_cpuFamily; -static uint64_t g_cpuFeatures; -static int g_cpuCount; - -#ifdef __arm__ -static uint32_t g_cpuIdArm; -#endif - -static const int android_cpufeatures_debug = 0; - -#ifdef __arm__ -# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_ARM -#elif defined __i386__ -# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_X86 -#else -# define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_UNKNOWN -#endif - -#define D(...) \ - do { \ - if (android_cpufeatures_debug) { \ - printf(__VA_ARGS__); fflush(stdout); \ - } \ - } while (0) - -#ifdef __i386__ -static __inline__ void x86_cpuid(int func, int values[4]) -{ - int a, b, c, d; - /* We need to preserve ebx since we're compiling PIC code */ - /* this means we can't use "=b" for the second output register */ - __asm__ __volatile__ ( \ - "push %%ebx\n" - "cpuid\n" \ - "mov %%ebx, %1\n" - "pop %%ebx\n" - : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ - : "a" (func) \ - ); - values[0] = a; - values[1] = b; - values[2] = c; - values[3] = d; -} -#endif - -/* Get the size of a file by reading it until the end. This is needed - * because files under /proc do not always return a valid size when - * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed. - */ -static int -get_file_size(const char* pathname) -{ - int fd, ret, result = 0; - char buffer[256]; - - fd = open(pathname, O_RDONLY); - if (fd < 0) { - D("Can't open %s: %s\n", pathname, strerror(errno)); - return -1; - } - - for (;;) { - int ret = read(fd, buffer, sizeof buffer); - if (ret < 0) { - if (errno == EINTR) - continue; - D("Error while reading %s: %s\n", pathname, strerror(errno)); - break; - } - if (ret == 0) - break; - - result += ret; - } - close(fd); - return result; -} - -/* Read the content of /proc/cpuinfo into a user-provided buffer. - * Return the length of the data, or -1 on error. Does *not* - * zero-terminate the content. Will not read more - * than 'buffsize' bytes. - */ -static int -read_file(const char* pathname, char* buffer, size_t buffsize) -{ - int fd, count; - - fd = open(pathname, O_RDONLY); - if (fd < 0) { - D("Could not open %s: %s\n", pathname, strerror(errno)); - return -1; - } - count = 0; - while (count < (int)buffsize) { - int ret = read(fd, buffer + count, buffsize - count); - if (ret < 0) { - if (errno == EINTR) - continue; - D("Error while reading from %s: %s\n", pathname, strerror(errno)); - if (count == 0) - count = -1; - break; - } - if (ret == 0) - break; - count += ret; - } - close(fd); - return count; -} - -/* Extract the content of a the first occurence of a given field in - * the content of /proc/cpuinfo and return it as a heap-allocated - * string that must be freed by the caller. - * - * Return NULL if not found - */ -static char* -extract_cpuinfo_field(const char* buffer, int buflen, const char* field) -{ - int fieldlen = strlen(field); - const char* bufend = buffer + buflen; - char* result = NULL; - int len, ignore; - const char *p, *q; - - /* Look for first field occurence, and ensures it starts the line. */ - p = buffer; - for (;;) { - p = memmem(p, bufend-p, field, fieldlen); - if (p == NULL) - goto EXIT; - - if (p == buffer || p[-1] == '\n') - break; - - p += fieldlen; - } - - /* Skip to the first column followed by a space */ - p += fieldlen; - p = memchr(p, ':', bufend-p); - if (p == NULL || p[1] != ' ') - goto EXIT; - - /* Find the end of the line */ - p += 2; - q = memchr(p, '\n', bufend-p); - if (q == NULL) - q = bufend; - - /* Copy the line into a heap-allocated buffer */ - len = q-p; - result = malloc(len+1); - if (result == NULL) - goto EXIT; - - memcpy(result, p, len); - result[len] = '\0'; - -EXIT: - return result; -} - -/* Checks that a space-separated list of items contains one given 'item'. - * Returns 1 if found, 0 otherwise. - */ -static int -has_list_item(const char* list, const char* item) -{ - const char* p = list; - int itemlen = strlen(item); - - if (list == NULL) - return 0; - - while (*p) { - const char* q; - - /* skip spaces */ - while (*p == ' ' || *p == '\t') - p++; - - /* find end of current list item */ - q = p; - while (*q && *q != ' ' && *q != '\t') - q++; - - if (itemlen == q-p && !memcmp(p, item, itemlen)) - return 1; - - /* skip to next item */ - p = q; - } - return 0; -} - -/* Parse a number starting from 'input', but not going further - * than 'limit'. Return the value into '*result'. - * - * NOTE: Does not skip over leading spaces, or deal with sign characters. - * NOTE: Ignores overflows. - * - * The function returns NULL in case of error (bad format), or the new - * position after the decimal number in case of success (which will always - * be <= 'limit'). - */ -static const char* -parse_number(const char* input, const char* limit, int base, int* result) -{ - const char* p = input; - int val = 0; - while (p < limit) { - int d = (*p - '0'); - if ((unsigned)d >= 10U) { - d = (*p - 'a'); - if ((unsigned)d >= 6U) - d = (*p - 'A'); - if ((unsigned)d >= 6U) - break; - d += 10; - } - if (d >= base) - break; - val = val*base + d; - p++; - } - if (p == input) - return NULL; - - *result = val; - return p; -} - -static const char* -parse_decimal(const char* input, const char* limit, int* result) -{ - return parse_number(input, limit, 10, result); -} - -static const char* -parse_hexadecimal(const char* input, const char* limit, int* result) -{ - return parse_number(input, limit, 16, result); -} - -/* This small data type is used to represent a CPU list / mask, as read - * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt - * - * For now, we don't expect more than 32 cores on mobile devices, so keep - * everything simple. - */ -typedef struct { - uint32_t mask; -} CpuList; - -static __inline__ void -cpulist_init(CpuList* list) { - list->mask = 0; -} - -static __inline__ void -cpulist_and(CpuList* list1, CpuList* list2) { - list1->mask &= list2->mask; -} - -static __inline__ void -cpulist_set(CpuList* list, int index) { - if ((unsigned)index < 32) { - list->mask |= (uint32_t)(1U << index); - } -} - -static __inline__ int -cpulist_count(CpuList* list) { - return __builtin_popcount(list->mask); -} - -/* Parse a textual list of cpus and store the result inside a CpuList object. - * Input format is the following: - * - comma-separated list of items (no spaces) - * - each item is either a single decimal number (cpu index), or a range made - * of two numbers separated by a single dash (-). Ranges are inclusive. - * - * Examples: 0 - * 2,4-127,128-143 - * 0-1 - */ -static void -cpulist_parse(CpuList* list, const char* line, int line_len) -{ - const char* p = line; - const char* end = p + line_len; - const char* q; - - /* NOTE: the input line coming from sysfs typically contains a - * trailing newline, so take care of it in the code below - */ - while (p < end && *p != '\n') - { - int val, start_value, end_value; - - /* Find the end of current item, and put it into 'q' */ - q = memchr(p, ',', end-p); - if (q == NULL) { - q = end; - } - - /* Get first value */ - p = parse_decimal(p, q, &start_value); - if (p == NULL) - goto BAD_FORMAT; - - end_value = start_value; - - /* If we're not at the end of the item, expect a dash and - * and integer; extract end value. - */ - if (p < q && *p == '-') { - p = parse_decimal(p+1, q, &end_value); - if (p == NULL) - goto BAD_FORMAT; - } - - /* Set bits CPU list bits */ - for (val = start_value; val <= end_value; val++) { - cpulist_set(list, val); - } - - /* Jump to next item */ - p = q; - if (p < end) - p++; - } - -BAD_FORMAT: - ; -} - -/* Read a CPU list from one sysfs file */ -static void -cpulist_read_from(CpuList* list, const char* filename) -{ - char file[64]; - int filelen; - - cpulist_init(list); - - filelen = read_file(filename, file, sizeof file); - if (filelen < 0) { - D("Could not read %s: %s\n", filename, strerror(errno)); - return; - } - - cpulist_parse(list, file, filelen); -} - -// See <asm/hwcap.h> kernel header. -#define HWCAP_VFP (1 << 6) -#define HWCAP_IWMMXT (1 << 9) -#define HWCAP_NEON (1 << 12) -#define HWCAP_VFPv3 (1 << 13) -#define HWCAP_VFPv3D16 (1 << 14) -#define HWCAP_VFPv4 (1 << 16) -#define HWCAP_IDIVA (1 << 17) -#define HWCAP_IDIVT (1 << 18) - -#define AT_HWCAP 16 - -#if defined(__arm__) -/* Compute the ELF HWCAP flags. - */ -static uint32_t -get_elf_hwcap(const char* cpuinfo, int cpuinfo_len) -{ - /* IMPORTANT: - * Accessing /proc/self/auxv doesn't work anymore on all - * platform versions. More specifically, when running inside - * a regular application process, most of /proc/self/ will be - * non-readable, including /proc/self/auxv. This doesn't - * happen however if the application is debuggable, or when - * running under the "shell" UID, which is why this was not - * detected appropriately. - */ -#if 0 - uint32_t result = 0; - const char filepath[] = "/proc/self/auxv"; - int fd = open(filepath, O_RDONLY); - if (fd < 0) { - D("Could not open %s: %s\n", filepath, strerror(errno)); - return 0; - } - - struct { uint32_t tag; uint32_t value; } entry; - - for (;;) { - int ret = read(fd, (char*)&entry, sizeof entry); - if (ret < 0) { - if (errno == EINTR) - continue; - D("Error while reading %s: %s\n", filepath, strerror(errno)); - break; - } - // Detect end of list. - if (ret == 0 || (entry.tag == 0 && entry.value == 0)) - break; - if (entry.tag == AT_HWCAP) { - result = entry.value; - break; - } - } - close(fd); - return result; -#else - // Recreate ELF hwcaps by parsing /proc/cpuinfo Features tag. - uint32_t hwcaps = 0; - - char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features"); - - if (cpuFeatures != NULL) { - D("Found cpuFeatures = '%s'\n", cpuFeatures); - - if (has_list_item(cpuFeatures, "vfp")) - hwcaps |= HWCAP_VFP; - if (has_list_item(cpuFeatures, "vfpv3")) - hwcaps |= HWCAP_VFPv3; - if (has_list_item(cpuFeatures, "vfpv3d16")) - hwcaps |= HWCAP_VFPv3D16; - if (has_list_item(cpuFeatures, "vfpv4")) - hwcaps |= HWCAP_VFPv4; - if (has_list_item(cpuFeatures, "neon")) - hwcaps |= HWCAP_NEON; - if (has_list_item(cpuFeatures, "idiva")) - hwcaps |= HWCAP_IDIVA; - if (has_list_item(cpuFeatures, "idivt")) - hwcaps |= HWCAP_IDIVT; - if (has_list_item(cpuFeatures, "idiv")) - hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT; - if (has_list_item(cpuFeatures, "iwmmxt")) - hwcaps |= HWCAP_IWMMXT; - - free(cpuFeatures); - } - return hwcaps; -#endif -} -#endif /* __arm__ */ - -/* Return the number of cpus present on a given device. - * - * To handle all weird kernel configurations, we need to compute the - * intersection of the 'present' and 'possible' CPU lists and count - * the result. - */ -static int -get_cpu_count(void) -{ - CpuList cpus_present[1]; - CpuList cpus_possible[1]; - - cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present"); - cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible"); - - /* Compute the intersection of both sets to get the actual number of - * CPU cores that can be used on this device by the kernel. - */ - cpulist_and(cpus_present, cpus_possible); - - return cpulist_count(cpus_present); -} - -static void -android_cpuInitFamily(void) -{ -#if defined(__arm__) - g_cpuFamily = ANDROID_CPU_FAMILY_ARM; -#elif defined(__i386__) - g_cpuFamily = ANDROID_CPU_FAMILY_X86; -#elif defined(__mips64) -/* Needs to be before __mips__ since the compiler defines both */ - g_cpuFamily = ANDROID_CPU_FAMILY_MIPS64; -#elif defined(__mips__) - g_cpuFamily = ANDROID_CPU_FAMILY_MIPS; -#elif defined(__aarch64__) - g_cpuFamily = ANDROID_CPU_FAMILY_ARM64; -#elif defined(__x86_64__) - g_cpuFamily = ANDROID_CPU_FAMILY_X86_64; -#else - g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN; -#endif -} - -static void -android_cpuInit(void) -{ - char* cpuinfo = NULL; - int cpuinfo_len; - - android_cpuInitFamily(); - - g_cpuFeatures = 0; - g_cpuCount = 1; - g_inited = 1; - - cpuinfo_len = get_file_size("/proc/cpuinfo"); - if (cpuinfo_len < 0) { - D("cpuinfo_len cannot be computed!"); - return; - } - cpuinfo = malloc(cpuinfo_len); - if (cpuinfo == NULL) { - D("cpuinfo buffer could not be allocated"); - return; - } - cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len); - D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len, - cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo); - - if (cpuinfo_len < 0) /* should not happen */ { - free(cpuinfo); - return; - } - - /* Count the CPU cores, the value may be 0 for single-core CPUs */ - g_cpuCount = get_cpu_count(); - if (g_cpuCount == 0) { - g_cpuCount = 1; - } - - D("found cpuCount = %d\n", g_cpuCount); - -#ifdef __arm__ - { - char* features = NULL; - char* architecture = NULL; - - /* Extract architecture from the "CPU Architecture" field. - * The list is well-known, unlike the the output of - * the 'Processor' field which can vary greatly. - * - * See the definition of the 'proc_arch' array in - * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in - * same file. - */ - char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture"); - - if (cpuArch != NULL) { - char* end; - long archNumber; - int hasARMv7 = 0; - - D("found cpuArch = '%s'\n", cpuArch); - - /* read the initial decimal number, ignore the rest */ - archNumber = strtol(cpuArch, &end, 10); - - /* Here we assume that ARMv8 will be upwards compatible with v7 - * in the future. Unfortunately, there is no 'Features' field to - * indicate that Thumb-2 is supported. - */ - if (end > cpuArch && archNumber >= 7) { - hasARMv7 = 1; - } - - /* Unfortunately, it seems that certain ARMv6-based CPUs - * report an incorrect architecture number of 7! - * - * See http://code.google.com/p/android/issues/detail?id=10812 - * - * We try to correct this by looking at the 'elf_format' - * field reported by the 'Processor' field, which is of the - * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for - * an ARMv6-one. - */ - if (hasARMv7) { - char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len, - "Processor"); - if (cpuProc != NULL) { - D("found cpuProc = '%s'\n", cpuProc); - if (has_list_item(cpuProc, "(v6l)")) { - D("CPU processor and architecture mismatch!!\n"); - hasARMv7 = 0; - } - free(cpuProc); - } - } - - if (hasARMv7) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7; - } - - /* The LDREX / STREX instructions are available from ARMv6 */ - if (archNumber >= 6) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX; - } - - free(cpuArch); - } - - /* Extract the list of CPU features from ELF hwcaps */ - uint32_t hwcaps = get_elf_hwcap(cpuinfo, cpuinfo_len); - - if (hwcaps != 0) { - int has_vfp = (hwcaps & HWCAP_VFP); - int has_vfpv3 = (hwcaps & HWCAP_VFPv3); - int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16); - int has_vfpv4 = (hwcaps & HWCAP_VFPv4); - int has_neon = (hwcaps & HWCAP_NEON); - int has_idiva = (hwcaps & HWCAP_IDIVA); - int has_idivt = (hwcaps & HWCAP_IDIVT); - int has_iwmmxt = (hwcaps & HWCAP_IWMMXT); - - // The kernel does a poor job at ensuring consistency when - // describing CPU features. So lots of guessing is needed. - - // 'vfpv4' implies VFPv3|VFP_FMA|FP16 - if (has_vfpv4) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | - ANDROID_CPU_ARM_FEATURE_VFP_FP16 | - ANDROID_CPU_ARM_FEATURE_VFP_FMA; - - // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC, - // a value of 'vfpv3' doesn't necessarily mean that the D32 - // feature is present, so be conservative. All CPUs in the - // field that support D32 also support NEON, so this should - // not be a problem in practice. - if (has_vfpv3 || has_vfpv3d16) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; - - // 'vfp' is super ambiguous. Depending on the kernel, it can - // either mean VFPv2 or VFPv3. Make it depend on ARMv7. - if (has_vfp) { - if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; - else - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2; - } - - // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA - if (has_neon) { - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | - ANDROID_CPU_ARM_FEATURE_NEON | - ANDROID_CPU_ARM_FEATURE_VFP_D32; - if (has_vfpv4) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA; - } - - // VFPv3 implies VFPv2 and ARMv7 - if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 | - ANDROID_CPU_ARM_FEATURE_ARMv7; - - if (has_idiva) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; - if (has_idivt) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2; - - if (has_iwmmxt) - g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt; - } - - /* Extract the cpuid value from various fields */ - // The CPUID value is broken up in several entries in /proc/cpuinfo. - // This table is used to rebuild it from the entries. - static const struct CpuIdEntry { - const char* field; - char format; - char bit_lshift; - char bit_length; - } cpu_id_entries[] = { - { "CPU implementer", 'x', 24, 8 }, - { "CPU variant", 'x', 20, 4 }, - { "CPU part", 'x', 4, 12 }, - { "CPU revision", 'd', 0, 4 }, - }; - size_t i; - D("Parsing /proc/cpuinfo to recover CPUID\n"); - for (i = 0; - i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]); - ++i) { - const struct CpuIdEntry* entry = &cpu_id_entries[i]; - char* value = extract_cpuinfo_field(cpuinfo, - cpuinfo_len, - entry->field); - if (value == NULL) - continue; - - D("field=%s value='%s'\n", entry->field, value); - char* value_end = value + strlen(value); - int val = 0; - const char* start = value; - const char* p; - if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) { - start += 2; - p = parse_hexadecimal(start, value_end, &val); - } else if (entry->format == 'x') - p = parse_hexadecimal(value, value_end, &val); - else - p = parse_decimal(value, value_end, &val); - - if (p > (const char*)start) { - val &= ((1 << entry->bit_length)-1); - val <<= entry->bit_lshift; - g_cpuIdArm |= (uint32_t) val; - } - - free(value); - } - - // Handle kernel configuration bugs that prevent the correct - // reporting of CPU features. - static const struct CpuFix { - uint32_t cpuid; - uint64_t or_flags; - } cpu_fixes[] = { - /* The Nexus 4 (Qualcomm Krait) kernel configuration - * forgets to report IDIV support. */ - { 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM | - ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 }, - { 0x510006f3, ANDROID_CPU_ARM_FEATURE_IDIV_ARM | - ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 }, - }; - size_t n; - for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) { - const struct CpuFix* entry = &cpu_fixes[n]; - - if (g_cpuIdArm == entry->cpuid) - g_cpuFeatures |= entry->or_flags; - } - - } -#endif /* __arm__ */ - -#ifdef __i386__ - int regs[4]; - -/* According to http://en.wikipedia.org/wiki/CPUID */ -#define VENDOR_INTEL_b 0x756e6547 -#define VENDOR_INTEL_c 0x6c65746e -#define VENDOR_INTEL_d 0x49656e69 - - x86_cpuid(0, regs); - int vendorIsIntel = (regs[1] == VENDOR_INTEL_b && - regs[2] == VENDOR_INTEL_c && - regs[3] == VENDOR_INTEL_d); - - x86_cpuid(1, regs); - if ((regs[2] & (1 << 9)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3; - } - if ((regs[2] & (1 << 23)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT; - } - if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) { - g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE; - } -#endif - - free(cpuinfo); -} - - -AndroidCpuFamily -android_getCpuFamily(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuFamily; -} - - -uint64_t -android_getCpuFeaturesExt(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuFeatures; -} - - -int -android_getCpuCount(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuCount; -} - -static void -android_cpuInitDummy(void) -{ - g_inited = 1; -} - -int -android_setCpu(int cpu_count, uint64_t cpu_features) -{ - /* Fail if the library was already initialized. */ - if (g_inited) - return 0; - - android_cpuInitFamily(); - g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count); - g_cpuFeatures = cpu_features; - pthread_once(&g_once, android_cpuInitDummy); - - return 1; -} - -#ifdef __arm__ -uint32_t -android_getCpuIdArm(void) -{ - pthread_once(&g_once, android_cpuInit); - return g_cpuIdArm; -} - -int -android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id) -{ - if (!android_setCpu(cpu_count, cpu_features)) - return 0; - - g_cpuIdArm = cpu_id; - return 1; -} -#endif /* __arm__ */ - -/* - * Technical note: Making sense of ARM's FPU architecture versions. - * - * FPA was ARM's first attempt at an FPU architecture. There is no Android - * device that actually uses it since this technology was already obsolete - * when the project started. If you see references to FPA instructions - * somewhere, you can be sure that this doesn't apply to Android at all. - * - * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of - * new versions / additions to it. ARM considers this obsolete right now, - * and no known Android device implements it either. - * - * VFPv2 added a few instructions to VFPv1, and is an *optional* extension - * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device - * supporting the 'armeabi' ABI doesn't necessarily support these. - * - * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used - * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated - * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means - * that it provides 16 double-precision FPU registers (d0-d15) and 32 - * single-precision ones (s0-s31) which happen to be mapped to the same - * register banks. - * - * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16 - * additional double precision registers (d16-d31). Note that there are - * still only 32 single precision registers. - * - * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision - * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which - * are not supported by Android. Note that it is not compatible with VFPv2. - * - * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32 - * depending on context. For example GCC uses it for VFPv3-D32, but - * the Linux kernel code uses it for VFPv3-D16 (especially in - * /proc/cpuinfo). Always try to use the full designation when - * possible. - * - * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides - * instructions to perform parallel computations on vectors of 8, 16, - * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all - * NEON registers are also mapped to the same register banks. - * - * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to - * perform fused multiply-accumulate on VFP registers, as well as - * half-precision (16-bit) conversion operations. - * - * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision - * registers. - * - * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused - * multiply-accumulate instructions that work on the NEON registers. - * - * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32 - * depending on context. - * - * The following information was determined by scanning the binutils-2.22 - * sources: - * - * Basic VFP instruction subsets: - * - * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set. - * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns. - * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1. - * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision. - * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision. - * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns. - * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31. - * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions. - * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add - * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add - * - * FPU types (excluding NEON) - * - * FPU_VFP_V1xD (EXT_V1xD) - * | - * +--------------------------+ - * | | - * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD) - * | | - * | | - * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA) - * | - * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3) - * | - * +--------------------------+ - * | | - * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA) - * | | - * | FPU_VFP_V4 (+EXT_D32) - * | - * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA) - * - * VFP architectures: - * - * ARCH_VFP_V1xD (EXT_V1xD) - * | - * +------------------+ - * | | - * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD) - * | | - * | ARCH_VFP_V3xD_FP16 (+EXT_FP16) - * | | - * | ARCH_VFP_V4_SP_D16 (+EXT_FMA) - * | - * ARCH_VFP_V1 (+EXT_V1) - * | - * ARCH_VFP_V2 (+EXT_V2) - * | - * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) - * | | - * | ARCH_VFP_V4 (+EXT_D32) - * | | - * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) - * | - * ARCH_VFP_V3 (+EXT_D32) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3_FP16 (+EXT_FP16) - * | - * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) - * | - * ARCH_NEON_FP16 (+EXT_FP16) - * - * -fpu=<name> values and their correspondance with FPU architectures above: - * - * {"vfp", FPU_ARCH_VFP_V2}, - * {"vfp9", FPU_ARCH_VFP_V2}, - * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility. - * {"vfp10", FPU_ARCH_VFP_V2}, - * {"vfp10-r0", FPU_ARCH_VFP_V1}, - * {"vfpxd", FPU_ARCH_VFP_V1xD}, - * {"vfpv2", FPU_ARCH_VFP_V2}, - * {"vfpv3", FPU_ARCH_VFP_V3}, - * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16}, - * {"vfpv3-d16", FPU_ARCH_VFP_V3D16}, - * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16}, - * {"vfpv3xd", FPU_ARCH_VFP_V3xD}, - * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16}, - * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1}, - * {"neon-fp16", FPU_ARCH_NEON_FP16}, - * {"vfpv4", FPU_ARCH_VFP_V4}, - * {"vfpv4-d16", FPU_ARCH_VFP_V4D16}, - * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16}, - * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4}, - * - * - * Simplified diagram that only includes FPUs supported by Android: - * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI, - * all others are optional and must be probed at runtime. - * - * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) - * | | - * | ARCH_VFP_V4 (+EXT_D32) - * | | - * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) - * | - * ARCH_VFP_V3 (+EXT_D32) - * | - * +-------------------+ - * | | - * | ARCH_VFP_V3_FP16 (+EXT_FP16) - * | - * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) - * | - * ARCH_NEON_FP16 (+EXT_FP16) - * - */ - -#endif // defined(__le32__) -#endif - -#endif diff --git a/drivers/theoraplayer/src/YUV/android/cpu-features.h b/drivers/theoraplayer/src/YUV/android/cpu-features.h deleted file mode 100644 index 12d3ad5645..0000000000 --- a/drivers/theoraplayer/src/YUV/android/cpu-features.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#ifndef CPU_FEATURES_H -#define CPU_FEATURES_H - -#include <sys/cdefs.h> -#include <stdint.h> - -__BEGIN_DECLS - -typedef enum { - ANDROID_CPU_FAMILY_UNKNOWN = 0, - ANDROID_CPU_FAMILY_ARM, - ANDROID_CPU_FAMILY_X86, - ANDROID_CPU_FAMILY_MIPS, - - ANDROID_CPU_FAMILY_MAX /* do not remove */ - -} AndroidCpuFamily; - -/* Return family of the device's CPU */ -extern AndroidCpuFamily android_getCpuFamily(void); - -/* The list of feature flags for ARM CPUs that can be recognized by the - * library. Value details are: - * - * VFPv2: - * CPU supports the VFPv2 instruction set. Many, but not all, ARMv6 CPUs - * support these instructions. VFPv2 is a subset of VFPv3 so this will - * be set whenever VFPv3 is set too. - * - * ARMv7: - * CPU supports the ARMv7-A basic instruction set. - * This feature is mandated by the 'armeabi-v7a' ABI. - * - * VFPv3: - * CPU supports the VFPv3-D16 instruction set, providing hardware FPU - * support for single and double precision floating point registers. - * Note that only 16 FPU registers are available by default, unless - * the D32 bit is set too. This feature is also mandated by the - * 'armeabi-v7a' ABI. - * - * VFP_D32: - * CPU VFP optional extension that provides 32 FPU registers, - * instead of 16. Note that ARM mandates this feature is the 'NEON' - * feature is implemented by the CPU. - * - * NEON: - * CPU FPU supports "ARM Advanced SIMD" instructions, also known as - * NEON. Note that this mandates the VFP_D32 feature as well, per the - * ARM Architecture specification. - * - * VFP_FP16: - * Half-width floating precision VFP extension. If set, the CPU - * supports instructions to perform floating-point operations on - * 16-bit registers. This is part of the VFPv4 specification, but - * not mandated by any Android ABI. - * - * VFP_FMA: - * Fused multiply-accumulate VFP instructions extension. Also part of - * the VFPv4 specification, but not mandated by any Android ABI. - * - * NEON_FMA: - * Fused multiply-accumulate NEON instructions extension. Optional - * extension from the VFPv4 specification, but not mandated by any - * Android ABI. - * - * IDIV_ARM: - * Integer division available in ARM mode. Only available - * on recent CPUs (e.g. Cortex-A15). - * - * IDIV_THUMB2: - * Integer division available in Thumb-2 mode. Only available - * on recent CPUs (e.g. Cortex-A15). - * - * iWMMXt: - * Optional extension that adds MMX registers and operations to an - * ARM CPU. This is only available on a few XScale-based CPU designs - * sold by Marvell. Pretty rare in practice. - * - * If you want to tell the compiler to generate code that targets one of - * the feature set above, you should probably use one of the following - * flags (for more details, see technical note at the end of this file): - * - * -mfpu=vfp - * -mfpu=vfpv2 - * These are equivalent and tell GCC to use VFPv2 instructions for - * floating-point operations. Use this if you want your code to - * run on *some* ARMv6 devices, and any ARMv7-A device supported - * by Android. - * - * Generated code requires VFPv2 feature. - * - * -mfpu=vfpv3-d16 - * Tell GCC to use VFPv3 instructions (using only 16 FPU registers). - * This should be generic code that runs on any CPU that supports the - * 'armeabi-v7a' Android ABI. Note that no ARMv6 CPU supports this. - * - * Generated code requires VFPv3 feature. - * - * -mfpu=vfpv3 - * Tell GCC to use VFPv3 instructions with 32 FPU registers. - * Generated code requires VFPv3|VFP_D32 features. - * - * -mfpu=neon - * Tell GCC to use VFPv3 instructions with 32 FPU registers, and - * also support NEON intrinsics (see <arm_neon.h>). - * Generated code requires VFPv3|VFP_D32|NEON features. - * - * -mfpu=vfpv4-d16 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA features. - * - * -mfpu=vfpv4 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32 features. - * - * -mfpu=neon-vfpv4 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32|NEON|NEON_FMA - * features. - * - * -mcpu=cortex-a7 - * -mcpu=cortex-a15 - * Generated code requires VFPv3|VFP_FP16|VFP_FMA|VFP_D32| - * NEON|NEON_FMA|IDIV_ARM|IDIV_THUMB2 - * This flag implies -mfpu=neon-vfpv4. - * - * -mcpu=iwmmxt - * Allows the use of iWMMXt instrinsics with GCC. - */ -enum { - ANDROID_CPU_ARM_FEATURE_ARMv7 = (1 << 0), - ANDROID_CPU_ARM_FEATURE_VFPv3 = (1 << 1), - ANDROID_CPU_ARM_FEATURE_NEON = (1 << 2), - ANDROID_CPU_ARM_FEATURE_LDREX_STREX = (1 << 3), - ANDROID_CPU_ARM_FEATURE_VFPv2 = (1 << 4), - ANDROID_CPU_ARM_FEATURE_VFP_D32 = (1 << 5), - ANDROID_CPU_ARM_FEATURE_VFP_FP16 = (1 << 6), - ANDROID_CPU_ARM_FEATURE_VFP_FMA = (1 << 7), - ANDROID_CPU_ARM_FEATURE_NEON_FMA = (1 << 8), - ANDROID_CPU_ARM_FEATURE_IDIV_ARM = (1 << 9), - ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 = (1 << 10), - ANDROID_CPU_ARM_FEATURE_iWMMXt = (1 << 11), -}; - -enum { - ANDROID_CPU_X86_FEATURE_SSSE3 = (1 << 0), - ANDROID_CPU_X86_FEATURE_POPCNT = (1 << 1), - ANDROID_CPU_X86_FEATURE_MOVBE = (1 << 2), -}; - -// libtheoraplayer addition, renamed this to "Ext" as not to conflict with your own project if you've included cpu-features.c in it -//extern uint64_t android_getCpuFeaturesExt(void); -#define android_getCpuFeaturesExt android_getCpuFeatures - -/* Return the number of CPU cores detected on this device. */ -extern int android_getCpuCount(void); - -/* The following is used to force the CPU count and features - * mask in sandboxed processes. Under 4.1 and higher, these processes - * cannot access /proc, which is the only way to get information from - * the kernel about the current hardware (at least on ARM). - * - * It _must_ be called only once, and before any android_getCpuXXX - * function, any other case will fail. - * - * This function return 1 on success, and 0 on failure. - */ -extern int android_setCpu(int cpu_count, - uint64_t cpu_features); - -#ifdef __arm__ -/* Retrieve the ARM 32-bit CPUID value from the kernel. - * Note that this cannot work on sandboxed processes under 4.1 and - * higher, unless you called android_setCpuArm() before. - */ -extern uint32_t android_getCpuIdArm(void); - -/* An ARM-specific variant of android_setCpu() that also allows you - * to set the ARM CPUID field. - */ -extern int android_setCpuArm(int cpu_count, - uint64_t cpu_features, - uint32_t cpu_id); -#endif - -__END_DECLS - -#endif /* CPU_FEATURES_H */ diff --git a/drivers/theoraplayer/src/YUV/libyuv/LICENSE b/drivers/theoraplayer/src/YUV/libyuv/LICENSE deleted file mode 100755 index c911747a6b..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/LICENSE +++ /dev/null @@ -1,29 +0,0 @@ -Copyright 2011 The LibYuv Project Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in - the documentation and/or other materials provided with the - distribution. - - * Neither the name of Google nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/drivers/theoraplayer/src/YUV/libyuv/LICENSE_THIRD_PARTY b/drivers/theoraplayer/src/YUV/libyuv/LICENSE_THIRD_PARTY deleted file mode 100755 index a71591e771..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/LICENSE_THIRD_PARTY +++ /dev/null @@ -1,8 +0,0 @@ -This source tree contains third party source code which is governed by third -party licenses. This file contains references to files which are under other -licenses than the one provided in the LICENSE file in the root of the source -tree. - -Files governed by third party licenses: -source/x86inc.asm - diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv.h deleted file mode 100755 index 3bebe642cc..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_H_ // NOLINT -#define INCLUDE_LIBYUV_H_ - -#include "libyuv/basic_types.h" -#include "libyuv/compare.h" -#include "libyuv/convert.h" -#include "libyuv/convert_argb.h" -#include "libyuv/convert_from.h" -#include "libyuv/convert_from_argb.h" -#include "libyuv/cpu_id.h" -#include "libyuv/format_conversion.h" -#include "libyuv/mjpeg_decoder.h" -#include "libyuv/planar_functions.h" -#include "libyuv/rotate.h" -#include "libyuv/rotate_argb.h" -#include "libyuv/row.h" -#include "libyuv/scale.h" -#include "libyuv/scale_argb.h" -#include "libyuv/scale_row.h" -#include "libyuv/version.h" -#include "libyuv/video_common.h" - -#endif // INCLUDE_LIBYUV_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/basic_types.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/basic_types.h deleted file mode 100755 index beb750ba65..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/basic_types.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ // NOLINT -#define INCLUDE_LIBYUV_BASIC_TYPES_H_ - -#include <stddef.h> // for NULL, size_t - -#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600)) -#include <sys/types.h> // for uintptr_t on x86 -#else -#include <stdint.h> // for uintptr_t -#endif - -#ifndef GG_LONGLONG -#ifndef INT_TYPES_DEFINED -#define INT_TYPES_DEFINED -#ifdef COMPILER_MSVC -typedef unsigned __int64 uint64; -typedef __int64 int64; -#ifndef INT64_C -#define INT64_C(x) x ## I64 -#endif -#ifndef UINT64_C -#define UINT64_C(x) x ## UI64 -#endif -#define INT64_F "I64" -#else // COMPILER_MSVC -#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) -typedef unsigned long uint64; // NOLINT -typedef long int64; // NOLINT -#ifndef INT64_C -#define INT64_C(x) x ## L -#endif -#ifndef UINT64_C -#define UINT64_C(x) x ## UL -#endif -#define INT64_F "l" -#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__) -typedef unsigned long long uint64; // NOLINT -typedef long long int64; // NOLINT -#ifndef INT64_C -#define INT64_C(x) x ## LL -#endif -#ifndef UINT64_C -#define UINT64_C(x) x ## ULL -#endif -#define INT64_F "ll" -#endif // __LP64__ -#endif // COMPILER_MSVC -typedef unsigned int uint32; -typedef int int32; -typedef unsigned short uint16; // NOLINT -typedef short int16; // NOLINT -typedef unsigned char uint8; -typedef signed char int8; -#endif // INT_TYPES_DEFINED -#endif // GG_LONGLONG - -// Detect compiler is for x86 or x64. -#if defined(__x86_64__) || defined(_M_X64) || \ - defined(__i386__) || defined(_M_IX86) -#define CPU_X86 1 -#endif -// Detect compiler is for ARM. -#if defined(__arm__) || defined(_M_ARM) -#define CPU_ARM 1 -#endif - -#ifndef ALIGNP -#ifdef __cplusplus -#define ALIGNP(p, t) \ - (reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \ - ((t) - 1)) & ~((t) - 1)))) -#else -#define ALIGNP(p, t) \ - ((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1)))) /* NOLINT */ -#endif -#endif - -#if !defined(LIBYUV_API) -#if defined(_WIN32) || defined(__CYGWIN__) -#if defined(LIBYUV_BUILDING_SHARED_LIBRARY) -#define LIBYUV_API __declspec(dllexport) -#elif defined(LIBYUV_USING_SHARED_LIBRARY) -#define LIBYUV_API __declspec(dllimport) -#else -#define LIBYUV_API -#endif // LIBYUV_BUILDING_SHARED_LIBRARY -#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \ - (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \ - defined(LIBYUV_USING_SHARED_LIBRARY)) -#define LIBYUV_API __attribute__ ((visibility ("default"))) -#else -#define LIBYUV_API -#endif // __GNUC__ -#endif // LIBYUV_API - -#define LIBYUV_BOOL int -#define LIBYUV_FALSE 0 -#define LIBYUV_TRUE 1 - -// Visual C x86 or GCC little endian. -#if defined(__x86_64__) || defined(_M_X64) || \ - defined(__i386__) || defined(_M_IX86) || \ - defined(__arm__) || defined(_M_ARM) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -#define LIBYUV_LITTLE_ENDIAN -#endif - -#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/compare.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/compare.h deleted file mode 100755 index 5dfac7c86a..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/compare.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_COMPARE_H_ // NOLINT -#define INCLUDE_LIBYUV_COMPARE_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Compute a hash for specified memory. Seed of 5381 recommended. -LIBYUV_API -uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed); - -// Sum Square Error - used to compute Mean Square Error or PSNR. -LIBYUV_API -uint64 ComputeSumSquareError(const uint8* src_a, - const uint8* src_b, int count); - -LIBYUV_API -uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height); - -static const int kMaxPsnr = 128; - -LIBYUV_API -double SumSquareErrorToPsnr(uint64 sse, uint64 count); - -LIBYUV_API -double CalcFramePsnr(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height); - -LIBYUV_API -double I420Psnr(const uint8* src_y_a, int stride_y_a, - const uint8* src_u_a, int stride_u_a, - const uint8* src_v_a, int stride_v_a, - const uint8* src_y_b, int stride_y_b, - const uint8* src_u_b, int stride_u_b, - const uint8* src_v_b, int stride_v_b, - int width, int height); - -LIBYUV_API -double CalcFrameSsim(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height); - -LIBYUV_API -double I420Ssim(const uint8* src_y_a, int stride_y_a, - const uint8* src_u_a, int stride_u_a, - const uint8* src_v_a, int stride_v_a, - const uint8* src_y_b, int stride_y_b, - const uint8* src_u_b, int stride_u_b, - const uint8* src_v_b, int stride_v_b, - int width, int height); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_COMPARE_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert.h deleted file mode 100755 index 1bd45c837f..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert.h +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_CONVERT_H_ // NOLINT -#define INCLUDE_LIBYUV_CONVERT_H_ - -#include "libyuv/basic_types.h" -// TODO(fbarchard): Remove the following headers includes. -#include "libyuv/convert_from.h" -#include "libyuv/planar_functions.h" -#include "libyuv/rotate.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Convert I444 to I420. -LIBYUV_API -int I444ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I422 to I420. -LIBYUV_API -int I422ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I411 to I420. -LIBYUV_API -int I411ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Copy I420 to I420. -#define I420ToI420 I420Copy -LIBYUV_API -int I420Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I400 (grey) to I420. -LIBYUV_API -int I400ToI420(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert NV12 to I420. -LIBYUV_API -int NV12ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert NV21 to I420. -LIBYUV_API -int NV21ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert YUY2 to I420. -LIBYUV_API -int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert UYVY to I420. -LIBYUV_API -int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert M420 to I420. -LIBYUV_API -int M420ToI420(const uint8* src_m420, int src_stride_m420, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert Q420 to I420. -LIBYUV_API -int Q420ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// ARGB little endian (bgra in memory) to I420. -LIBYUV_API -int ARGBToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// BGRA little endian (argb in memory) to I420. -LIBYUV_API -int BGRAToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// ABGR little endian (rgba in memory) to I420. -LIBYUV_API -int ABGRToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// RGBA little endian (abgr in memory) to I420. -LIBYUV_API -int RGBAToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// RGB little endian (bgr in memory) to I420. -LIBYUV_API -int RGB24ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// RGB big endian (rgb in memory) to I420. -LIBYUV_API -int RAWToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// RGB16 (RGBP fourcc) little endian to I420. -LIBYUV_API -int RGB565ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// RGB15 (RGBO fourcc) little endian to I420. -LIBYUV_API -int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// RGB12 (R444 fourcc) little endian to I420. -LIBYUV_API -int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -#ifdef HAVE_JPEG -// src_width/height provided by capture. -// dst_width/height for clipping determine final size. -LIBYUV_API -int MJPGToI420(const uint8* sample, size_t sample_size, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_width, int src_height, - int dst_width, int dst_height); - -// Query size of MJPG in pixels. -LIBYUV_API -int MJPGSize(const uint8* sample, size_t sample_size, - int* width, int* height); -#endif - -// Note Bayer formats (BGGR) To I420 are in format_conversion.h - -// Convert camera sample to I420 with cropping, rotation and vertical flip. -// "src_size" is needed to parse MJPG. -// "dst_stride_y" number of bytes in a row of the dst_y plane. -// Normally this would be the same as dst_width, with recommended alignment -// to 16 bytes for better efficiency. -// If rotation of 90 or 270 is used, stride is affected. The caller should -// allocate the I420 buffer according to rotation. -// "dst_stride_u" number of bytes in a row of the dst_u plane. -// Normally this would be the same as (dst_width + 1) / 2, with -// recommended alignment to 16 bytes for better efficiency. -// If rotation of 90 or 270 is used, stride is affected. -// "crop_x" and "crop_y" are starting position for cropping. -// To center, crop_x = (src_width - dst_width) / 2 -// crop_y = (src_height - dst_height) / 2 -// "src_width" / "src_height" is size of src_frame in pixels. -// "src_height" can be negative indicating a vertically flipped image source. -// "crop_width" / "crop_height" is the size to crop the src to. -// Must be less than or equal to src_width/src_height -// Cropping parameters are pre-rotation. -// "rotation" can be 0, 90, 180 or 270. -// "format" is a fourcc. ie 'I420', 'YUY2' -// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. -LIBYUV_API -int ConvertToI420(const uint8* src_frame, size_t src_size, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int crop_x, int crop_y, - int src_width, int src_height, - int crop_width, int crop_height, - enum RotationMode rotation, - uint32 format); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_CONVERT_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_argb.h deleted file mode 100755 index a18014ca2c..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_argb.h +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ // NOLINT -#define INCLUDE_LIBYUV_CONVERT_ARGB_H_ - -#include "libyuv/basic_types.h" -// TODO(fbarchard): Remove the following headers includes -#include "libyuv/convert_from.h" -#include "libyuv/planar_functions.h" -#include "libyuv/rotate.h" - -// TODO(fbarchard): This set of functions should exactly match convert.h -// Add missing Q420. -// TODO(fbarchard): Add tests. Create random content of right size and convert -// with C vs Opt and or to I420 and compare. -// TODO(fbarchard): Some of these functions lack parameter setting. - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Alias. -#define ARGBToARGB ARGBCopy - -// Copy ARGB to ARGB. -LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert I420 to ARGB. -LIBYUV_API -int I420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert I422 to ARGB. -LIBYUV_API -int I422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert I444 to ARGB. -LIBYUV_API -int I444ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert I411 to ARGB. -LIBYUV_API -int I411ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert I400 (grey) to ARGB. -LIBYUV_API -int I400ToARGB(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Alias. -#define YToARGB I400ToARGB_Reference - -// Convert I400 to ARGB. Reverse of ARGBToI400. -LIBYUV_API -int I400ToARGB_Reference(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert NV12 to ARGB. -LIBYUV_API -int NV12ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert NV21 to ARGB. -LIBYUV_API -int NV21ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert M420 to ARGB. -LIBYUV_API -int M420ToARGB(const uint8* src_m420, int src_stride_m420, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// TODO(fbarchard): Convert Q420 to ARGB. -// LIBYUV_API -// int Q420ToARGB(const uint8* src_y, int src_stride_y, -// const uint8* src_yuy2, int src_stride_yuy2, -// uint8* dst_argb, int dst_stride_argb, -// int width, int height); - -// Convert YUY2 to ARGB. -LIBYUV_API -int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert UYVY to ARGB. -LIBYUV_API -int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// BGRA little endian (argb in memory) to ARGB. -LIBYUV_API -int BGRAToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// ABGR little endian (rgba in memory) to ARGB. -LIBYUV_API -int ABGRToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// RGBA little endian (abgr in memory) to ARGB. -LIBYUV_API -int RGBAToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Deprecated function name. -#define BG24ToARGB RGB24ToARGB - -// RGB little endian (bgr in memory) to ARGB. -LIBYUV_API -int RGB24ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// RGB big endian (rgb in memory) to ARGB. -LIBYUV_API -int RAWToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// RGB16 (RGBP fourcc) little endian to ARGB. -LIBYUV_API -int RGB565ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// RGB15 (RGBO fourcc) little endian to ARGB. -LIBYUV_API -int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// RGB12 (R444 fourcc) little endian to ARGB. -LIBYUV_API -int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -#ifdef HAVE_JPEG -// src_width/height provided by capture -// dst_width/height for clipping determine final size. -LIBYUV_API -int MJPGToARGB(const uint8* sample, size_t sample_size, - uint8* dst_argb, int dst_stride_argb, - int src_width, int src_height, - int dst_width, int dst_height); -#endif - -// Note Bayer formats (BGGR) to ARGB are in format_conversion.h. - -// Convert camera sample to ARGB with cropping, rotation and vertical flip. -// "src_size" is needed to parse MJPG. -// "dst_stride_argb" number of bytes in a row of the dst_argb plane. -// Normally this would be the same as dst_width, with recommended alignment -// to 16 bytes for better efficiency. -// If rotation of 90 or 270 is used, stride is affected. The caller should -// allocate the I420 buffer according to rotation. -// "dst_stride_u" number of bytes in a row of the dst_u plane. -// Normally this would be the same as (dst_width + 1) / 2, with -// recommended alignment to 16 bytes for better efficiency. -// If rotation of 90 or 270 is used, stride is affected. -// "crop_x" and "crop_y" are starting position for cropping. -// To center, crop_x = (src_width - dst_width) / 2 -// crop_y = (src_height - dst_height) / 2 -// "src_width" / "src_height" is size of src_frame in pixels. -// "src_height" can be negative indicating a vertically flipped image source. -// "crop_width" / "crop_height" is the size to crop the src to. -// Must be less than or equal to src_width/src_height -// Cropping parameters are pre-rotation. -// "rotation" can be 0, 90, 180 or 270. -// "format" is a fourcc. ie 'I420', 'YUY2' -// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. -LIBYUV_API -int ConvertToARGB(const uint8* src_frame, size_t src_size, - uint8* dst_argb, int dst_stride_argb, - int crop_x, int crop_y, - int src_width, int src_height, - int crop_width, int crop_height, - enum RotationMode rotation, - uint32 format); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from.h deleted file mode 100755 index b1cf57f7dc..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from.h +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ // NOLINT -#define INCLUDE_LIBYUV_CONVERT_FROM_H_ - -#include "libyuv/basic_types.h" -#include "libyuv/rotate.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// See Also convert.h for conversions from formats to I420. - -// I420Copy in convert to I420ToI420. - -LIBYUV_API -int I420ToI422(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -LIBYUV_API -int I420ToI444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -LIBYUV_API -int I420ToI411(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21. -LIBYUV_API -int I400Copy(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// TODO(fbarchard): I420ToM420 -// TODO(fbarchard): I420ToQ420 - -LIBYUV_API -int I420ToNV12(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); - -LIBYUV_API -int I420ToNV21(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); - -LIBYUV_API -int I420ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -LIBYUV_API -int I420ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -LIBYUV_API -int I420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -LIBYUV_API -int I420ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height); - -LIBYUV_API -int I420ToRGB24(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToRAW(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToARGB1555(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToARGB4444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -// Note Bayer formats (BGGR) To I420 are in format_conversion.h. - -// Convert I420 to specified format. -// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the -// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal. -LIBYUV_API -int ConvertFromI420(const uint8* y, int y_stride, - const uint8* u, int u_stride, - const uint8* v, int v_stride, - uint8* dst_sample, int dst_sample_stride, - int width, int height, - uint32 format); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from_argb.h deleted file mode 100755 index f0343a77d3..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/convert_from_argb.h +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT -#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Copy ARGB to ARGB. -#define ARGBToARGB ARGBCopy -LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To BGRA. (alias) -#define ARGBToBGRA BGRAToARGB -LIBYUV_API -int BGRAToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To ABGR. (alias) -#define ARGBToABGR ABGRToARGB -LIBYUV_API -int ABGRToARGB(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To RGBA. -LIBYUV_API -int ARGBToRGBA(const uint8* src_frame, int src_stride_frame, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert ARGB To RGB24. -LIBYUV_API -int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height); - -// Convert ARGB To RAW. -LIBYUV_API -int ARGBToRAW(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb, int dst_stride_rgb, - int width, int height); - -// Convert ARGB To RGB565. -LIBYUV_API -int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height); - -// Convert ARGB To ARGB1555. -LIBYUV_API -int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb1555, int dst_stride_argb1555, - int width, int height); - -// Convert ARGB To ARGB4444. -LIBYUV_API -int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb4444, int dst_stride_argb4444, - int width, int height); - -// Convert ARGB To I444. -LIBYUV_API -int ARGBToI444(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB To I422. -LIBYUV_API -int ARGBToI422(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB To I420. (also in convert.h) -LIBYUV_API -int ARGBToI420(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB to J420. (JPeg full range I420). -LIBYUV_API -int ARGBToJ420(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB To I411. -LIBYUV_API -int ARGBToI411(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert ARGB to J400. (JPeg full range). -LIBYUV_API -int ARGBToJ400(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - int width, int height); - -// Convert ARGB to I400. -LIBYUV_API -int ARGBToI400(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Convert ARGB To NV12. -LIBYUV_API -int ARGBToNV12(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height); - -// Convert ARGB To NV21. -LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); - -// Convert ARGB To NV21. -LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height); - -// Convert ARGB To YUY2. -LIBYUV_API -int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height); - -// Convert ARGB To UYVY. -LIBYUV_API -int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/cpu_id.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/cpu_id.h deleted file mode 100755 index dc858a814a..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/cpu_id.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_CPU_ID_H_ // NOLINT -#define INCLUDE_LIBYUV_CPU_ID_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// TODO(fbarchard): Consider overlapping bits for different architectures. -// Internal flag to indicate cpuid requires initialization. -#define kCpuInit 0x1 - -// These flags are only valid on ARM processors. -static const int kCpuHasARM = 0x2; -static const int kCpuHasNEON = 0x4; -// 0x8 reserved for future ARM flag. - -// These flags are only valid on x86 processors. -static const int kCpuHasX86 = 0x10; -static const int kCpuHasSSE2 = 0x20; -static const int kCpuHasSSSE3 = 0x40; -static const int kCpuHasSSE41 = 0x80; -static const int kCpuHasSSE42 = 0x100; -static const int kCpuHasAVX = 0x200; -static const int kCpuHasAVX2 = 0x400; -static const int kCpuHasERMS = 0x800; -static const int kCpuHasFMA3 = 0x1000; -// 0x2000, 0x4000, 0x8000 reserved for future X86 flags. - -// These flags are only valid on MIPS processors. -static const int kCpuHasMIPS = 0x10000; -static const int kCpuHasMIPS_DSP = 0x20000; -static const int kCpuHasMIPS_DSPR2 = 0x40000; - -// Internal function used to auto-init. -LIBYUV_API -int InitCpuFlags(void); - -// Internal function for parsing /proc/cpuinfo. -LIBYUV_API -int ArmCpuCaps(const char* cpuinfo_name); - -// Detect CPU has SSE2 etc. -// Test_flag parameter should be one of kCpuHas constants above. -// returns non-zero if instruction set is detected -static __inline int TestCpuFlag(int test_flag) { - LIBYUV_API extern int cpu_info_; - return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag; -} - -// For testing, allow CPU flags to be disabled. -// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. -// MaskCpuFlags(-1) to enable all cpu specific optimizations. -// MaskCpuFlags(0) to disable all cpu specific optimizations. -LIBYUV_API -void MaskCpuFlags(int enable_flags); - -// Low level cpuid for X86. Returns zeros on other CPUs. -// eax is the info type that you want. -// ecx is typically the cpu number, and should normally be zero. -LIBYUV_API -void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_CPU_ID_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/format_conversion.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/format_conversion.h deleted file mode 100755 index b18bf05343..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/format_conversion.h +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_ // NOLINT -#define INCLUDE_LIBYUV_FORMATCONVERSION_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Convert Bayer RGB formats to I420. -LIBYUV_API -int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -LIBYUV_API -int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -LIBYUV_API -int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -LIBYUV_API -int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Temporary API mapper. -#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \ - BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f) - -LIBYUV_API -int BayerToI420(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height, - uint32 src_fourcc_bayer); - -// Convert I420 to Bayer RGB formats. -LIBYUV_API -int I420ToBayerBGGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToBayerGBRG(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToBayerGRBG(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -LIBYUV_API -int I420ToBayerRGGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -// Temporary API mapper. -#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \ - I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f) - -LIBYUV_API -int I420ToBayer(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height, - uint32 dst_fourcc_bayer); - -// Convert Bayer RGB formats to ARGB. -LIBYUV_API -int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -LIBYUV_API -int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -LIBYUV_API -int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -LIBYUV_API -int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Temporary API mapper. -#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f) - -LIBYUV_API -int BayerToARGB(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_argb, int dst_stride_argb, - int width, int height, - uint32 src_fourcc_bayer); - -// Converts ARGB to Bayer RGB formats. -LIBYUV_API -int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb, - uint8* dst_bayer, int dst_stride_bayer, - int width, int height); - -LIBYUV_API -int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb, - uint8* dst_bayer, int dst_stride_bayer, - int width, int height); - -LIBYUV_API -int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb, - uint8* dst_bayer, int dst_stride_bayer, - int width, int height); - -LIBYUV_API -int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb, - uint8* dst_bayer, int dst_stride_bayer, - int width, int height); - -// Temporary API mapper. -#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f) - -LIBYUV_API -int ARGBToBayer(const uint8* src_argb, int src_stride_argb, - uint8* dst_bayer, int dst_stride_bayer, - int width, int height, - uint32 dst_fourcc_bayer); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_FORMATCONVERSION_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/mjpeg_decoder.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/mjpeg_decoder.h deleted file mode 100755 index faffaea8fa..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/mjpeg_decoder.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ // NOLINT -#define INCLUDE_LIBYUV_MJPEG_DECODER_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -// NOTE: For a simplified public API use convert.h MJPGToI420(). - -struct jpeg_common_struct; -struct jpeg_decompress_struct; -struct jpeg_source_mgr; - -namespace libyuv { - -#ifdef __cplusplus -extern "C" { -#endif - -LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size); - -#ifdef __cplusplus -} // extern "C" -#endif - -static const uint32 kUnknownDataSize = 0xFFFFFFFF; - -enum JpegSubsamplingType { - kJpegYuv420, - kJpegYuv422, - kJpegYuv411, - kJpegYuv444, - kJpegYuv400, - kJpegUnknown -}; - -struct SetJmpErrorMgr; - -// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are -// simply independent JPEG images with a fixed huffman table (which is omitted). -// It is rarely used in video transmission, but is common as a camera capture -// format, especially in Logitech devices. This class implements a decoder for -// MJPEG frames. -// -// See http://tools.ietf.org/html/rfc2435 -class LIBYUV_API MJpegDecoder { - public: - typedef void (*CallbackFunction)(void* opaque, - const uint8* const* data, - const int* strides, - int rows); - - static const int kColorSpaceUnknown; - static const int kColorSpaceGrayscale; - static const int kColorSpaceRgb; - static const int kColorSpaceYCbCr; - static const int kColorSpaceCMYK; - static const int kColorSpaceYCCK; - - MJpegDecoder(); - ~MJpegDecoder(); - - // Loads a new frame, reads its headers, and determines the uncompressed - // image format. - // Returns LIBYUV_TRUE if image looks valid and format is supported. - // If return value is LIBYUV_TRUE, then the values for all the following - // getters are populated. - // src_len is the size of the compressed mjpeg frame in bytes. - LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len); - - // Returns width of the last loaded frame in pixels. - int GetWidth(); - - // Returns height of the last loaded frame in pixels. - int GetHeight(); - - // Returns format of the last loaded frame. The return value is one of the - // kColorSpace* constants. - int GetColorSpace(); - - // Number of color components in the color space. - int GetNumComponents(); - - // Sample factors of the n-th component. - int GetHorizSampFactor(int component); - - int GetVertSampFactor(int component); - - int GetHorizSubSampFactor(int component); - - int GetVertSubSampFactor(int component); - - // Public for testability. - int GetImageScanlinesPerImcuRow(); - - // Public for testability. - int GetComponentScanlinesPerImcuRow(int component); - - // Width of a component in bytes. - int GetComponentWidth(int component); - - // Height of a component. - int GetComponentHeight(int component); - - // Width of a component in bytes with padding for DCTSIZE. Public for testing. - int GetComponentStride(int component); - - // Size of a component in bytes. - int GetComponentSize(int component); - - // Call this after LoadFrame() if you decide you don't want to decode it - // after all. - LIBYUV_BOOL UnloadFrame(); - - // Decodes the entire image into a one-buffer-per-color-component format. - // dst_width must match exactly. dst_height must be <= to image height; if - // less, the image is cropped. "planes" must have size equal to at least - // GetNumComponents() and they must point to non-overlapping buffers of size - // at least GetComponentSize(i). The pointers in planes are incremented - // to point to after the end of the written data. - // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. - LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height); - - // Decodes the entire image and passes the data via repeated calls to a - // callback function. Each call will get the data for a whole number of - // image scanlines. - // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. - LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque, - int dst_width, int dst_height); - - // The helper function which recognizes the jpeg sub-sampling type. - static JpegSubsamplingType JpegSubsamplingTypeHelper( - int* subsample_x, int* subsample_y, int number_of_components); - - private: - struct Buffer { - const uint8* data; - int len; - }; - - struct BufferVector { - Buffer* buffers; - int len; - int pos; - }; - - // Methods that are passed to jpeglib. - static int fill_input_buffer(jpeg_decompress_struct* cinfo); - static void init_source(jpeg_decompress_struct* cinfo); - static void skip_input_data(jpeg_decompress_struct* cinfo, - long num_bytes); // NOLINT - static void term_source(jpeg_decompress_struct* cinfo); - - static void ErrorHandler(jpeg_common_struct* cinfo); - - void AllocOutputBuffers(int num_outbufs); - void DestroyOutputBuffers(); - - LIBYUV_BOOL StartDecode(); - LIBYUV_BOOL FinishDecode(); - - void SetScanlinePointers(uint8** data); - LIBYUV_BOOL DecodeImcuRow(); - - int GetComponentScanlinePadding(int component); - - // A buffer holding the input data for a frame. - Buffer buf_; - BufferVector buf_vec_; - - jpeg_decompress_struct* decompress_struct_; - jpeg_source_mgr* source_mgr_; - SetJmpErrorMgr* error_mgr_; - - // LIBYUV_TRUE iff at least one component has scanline padding. (i.e., - // GetComponentScanlinePadding() != 0.) - LIBYUV_BOOL has_scanline_padding_; - - // Temporaries used to point to scanline outputs. - int num_outbufs_; // Outermost size of all arrays below. - uint8*** scanlines_; - int* scanlines_sizes_; - // Temporary buffer used for decoding when we can't decode directly to the - // output buffers. Large enough for just one iMCU row. - uint8** databuf_; - int* databuf_strides_; -}; - -} // namespace libyuv - -#endif // __cplusplus -#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/planar_functions.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/planar_functions.h deleted file mode 100755 index ac516c5ba5..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/planar_functions.h +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ // NOLINT -#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ - -#include "libyuv/basic_types.h" - -// TODO(fbarchard): Remove the following headers includes. -#include "libyuv/convert.h" -#include "libyuv/convert_argb.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Copy a plane of data. -LIBYUV_API -void CopyPlane(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Set a plane of data to a 32 bit value. -LIBYUV_API -void SetPlane(uint8* dst_y, int dst_stride_y, - int width, int height, - uint32 value); - -// Copy I400. Supports inverting. -LIBYUV_API -int I400ToI400(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - - -// Copy I422 to I422. -#define I422ToI422 I422Copy -LIBYUV_API -int I422Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Copy I444 to I444. -#define I444ToI444 I444Copy -LIBYUV_API -int I444Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert YUY2 to I422. -LIBYUV_API -int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert UYVY to I422. -LIBYUV_API -int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Convert I420 to I400. (calls CopyPlane ignoring u/v). -LIBYUV_API -int I420ToI400(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Alias -#define I420ToI420Mirror I420Mirror - -// I420 mirror. -LIBYUV_API -int I420Mirror(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height); - -// Alias -#define I400ToI400Mirror I400Mirror - -// I400 mirror. A single plane is mirrored horizontally. -// Pass negative height to achieve 180 degree rotation. -LIBYUV_API -int I400Mirror(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Alias -#define ARGBToARGBMirror ARGBMirror - -// ARGB mirror. -LIBYUV_API -int ARGBMirror(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert NV12 to RGB565. -LIBYUV_API -int NV12ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height); - -// Convert NV21 to RGB565. -LIBYUV_API -int NV21ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height); - -// I422ToARGB is in convert_argb.h -// Convert I422 to BGRA. -LIBYUV_API -int I422ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height); - -// Convert I422 to ABGR. -LIBYUV_API -int I422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height); - -// Convert I422 to RGBA. -LIBYUV_API -int I422ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height); - -// Draw a rectangle into I420. -LIBYUV_API -int I420Rect(uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int x, int y, int width, int height, - int value_y, int value_u, int value_v); - -// Draw a rectangle into ARGB. -LIBYUV_API -int ARGBRect(uint8* dst_argb, int dst_stride_argb, - int x, int y, int width, int height, uint32 value); - -// Convert ARGB to gray scale ARGB. -LIBYUV_API -int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Make a rectangle of ARGB gray scale. -LIBYUV_API -int ARGBGray(uint8* dst_argb, int dst_stride_argb, - int x, int y, int width, int height); - -// Make a rectangle of ARGB Sepia tone. -LIBYUV_API -int ARGBSepia(uint8* dst_argb, int dst_stride_argb, - int x, int y, int width, int height); - -// Apply a matrix rotation to each ARGB pixel. -// matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2. -// The first 4 coefficients apply to B, G, R, A and produce B of the output. -// The next 4 coefficients apply to B, G, R, A and produce G of the output. -// The next 4 coefficients apply to B, G, R, A and produce R of the output. -// The last 4 coefficients apply to B, G, R, A and produce A of the output. -LIBYUV_API -int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const int8* matrix_argb, - int width, int height); - -// Deprecated. Use ARGBColorMatrix instead. -// Apply a matrix rotation to each ARGB pixel. -// matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1. -// The first 4 coefficients apply to B, G, R, A and produce B of the output. -// The next 4 coefficients apply to B, G, R, A and produce G of the output. -// The last 4 coefficients apply to B, G, R, A and produce R of the output. -LIBYUV_API -int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, - const int8* matrix_rgb, - int x, int y, int width, int height); - -// Apply a color table each ARGB pixel. -// Table contains 256 ARGB values. -LIBYUV_API -int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int x, int y, int width, int height); - -// Apply a color table each ARGB pixel but preserve destination alpha. -// Table contains 256 ARGB values. -LIBYUV_API -int RGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int x, int y, int width, int height); - -// Apply a luma/color table each ARGB pixel but preserve destination alpha. -// Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from -// RGB (YJ style) and C is an 8 bit color component (R, G or B). -LIBYUV_API -int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const uint8* luma_rgb_table, - int width, int height); - -// Apply a 3 term polynomial to ARGB values. -// poly points to a 4x4 matrix. The first row is constants. The 2nd row is -// coefficients for b, g, r and a. The 3rd row is coefficients for b squared, -// g squared, r squared and a squared. The 4rd row is coefficients for b to -// the 3, g to the 3, r to the 3 and a to the 3. The values are summed and -// result clamped to 0 to 255. -// A polynomial approximation can be dirived using software such as 'R'. - -LIBYUV_API -int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const float* poly, - int width, int height); - -// Quantize a rectangle of ARGB. Alpha unaffected. -// scale is a 16 bit fractional fixed point scaler between 0 and 65535. -// interval_size should be a value between 1 and 255. -// interval_offset should be a value between 0 and 255. -LIBYUV_API -int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, - int scale, int interval_size, int interval_offset, - int x, int y, int width, int height); - -// Copy ARGB to ARGB. -LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Copy ARGB to ARGB. -LIBYUV_API -int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Copy ARGB to ARGB. -LIBYUV_API -int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -typedef void (*ARGBBlendRow)(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width); - -// Get function to Alpha Blend ARGB pixels and store to destination. -LIBYUV_API -ARGBBlendRow GetARGBBlend(); - -// Alpha Blend ARGB images and store to destination. -// Alpha of destination is set to 255. -LIBYUV_API -int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255. -LIBYUV_API -int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Add ARGB image with ARGB image. Saturates to 255. -LIBYUV_API -int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0. -LIBYUV_API -int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert I422 to YUY2. -LIBYUV_API -int I422ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -// Convert I422 to UYVY. -LIBYUV_API -int I422ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_frame, int dst_stride_frame, - int width, int height); - -// Convert unattentuated ARGB to preattenuated ARGB. -LIBYUV_API -int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert preattentuated ARGB to unattenuated ARGB. -LIBYUV_API -int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Convert MJPG to ARGB. -LIBYUV_API -int MJPGToARGB(const uint8* sample, size_t sample_size, - uint8* argb, int argb_stride, - int w, int h, int dw, int dh); - -// Internal function - do not call directly. -// Computes table of cumulative sum for image where the value is the sum -// of all values above and to the left of the entry. Used by ARGBBlur. -LIBYUV_API -int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, - int32* dst_cumsum, int dst_stride32_cumsum, - int width, int height); - -// Blur ARGB image. -// dst_cumsum table of width * (height + 1) * 16 bytes aligned to -// 16 byte boundary. -// dst_stride32_cumsum is number of ints in a row (width * 4). -// radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5. -// Blur is optimized for radius of 5 (11x11) or less. -LIBYUV_API -int ARGBBlur(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int32* dst_cumsum, int dst_stride32_cumsum, - int width, int height, int radius); - -// Multiply ARGB image by ARGB value. -LIBYUV_API -int ARGBShade(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height, uint32 value); - -// Interpolate between two ARGB images using specified amount of interpolation -// (0 to 255) and store to destination. -// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0 -// and 255 means 1% src_argb0 and 99% src_argb1. -// Internally uses ARGBScale bilinear filtering. -// Caveat: This function will write up to 16 bytes beyond the end of dst_argb. -LIBYUV_API -int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height, int interpolation); - -#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ - defined(TARGET_IPHONE_SIMULATOR) -#define LIBYUV_DISABLE_X86 -#endif - -// Row functions for copying a pixels from a source with a slope to a row -// of destination. Useful for scaling, rotation, mirror, texture mapping. -LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); -// The following are available on all x86 platforms: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); -#define HAS_ARGBAFFINEROW_SSE2 -#endif // LIBYUV_DISABLE_X86 - -// Shuffle ARGB channel order. e.g. BGRA to ARGB. -// shuffler is 16 bytes and must be aligned. -LIBYUV_API -int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_argb, int dst_stride_argb, - const uint8* shuffler, int width, int height); - -// Sobel ARGB effect with planar output. -LIBYUV_API -int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height); - -// Sobel ARGB effect. -LIBYUV_API -int ARGBSobel(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -// Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB. -LIBYUV_API -int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate.h deleted file mode 100755 index 8af60b8955..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_ROTATE_H_ // NOLINT -#define INCLUDE_LIBYUV_ROTATE_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Supported rotation. -typedef enum RotationMode { - kRotate0 = 0, // No rotation. - kRotate90 = 90, // Rotate 90 degrees clockwise. - kRotate180 = 180, // Rotate 180 degrees. - kRotate270 = 270, // Rotate 270 degrees clockwise. - - // Deprecated. - kRotateNone = 0, - kRotateClockwise = 90, - kRotateCounterClockwise = 270, -} RotationModeEnum; - -// Rotate I420 frame. -LIBYUV_API -int I420Rotate(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_width, int src_height, enum RotationMode mode); - -// Rotate NV12 input and store in I420. -LIBYUV_API -int NV12ToI420Rotate(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_width, int src_height, enum RotationMode mode); - -// Rotate a plane by 0, 90, 180, or 270. -LIBYUV_API -int RotatePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int src_width, int src_height, enum RotationMode mode); - -// Rotate planes by 90, 180, 270. Deprecated. -LIBYUV_API -void RotatePlane90(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); - -LIBYUV_API -void RotatePlane180(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); - -LIBYUV_API -void RotatePlane270(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); - -LIBYUV_API -void RotateUV90(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); - -// Rotations for when U and V are interleaved. -// These functions take one input pointer and -// split the data into two buffers while -// rotating them. Deprecated. -LIBYUV_API -void RotateUV180(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); - -LIBYUV_API -void RotateUV270(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); - -// The 90 and 270 functions are based on transposes. -// Doing a transpose with reversing the read/write -// order will result in a rotation by +- 90 degrees. -// Deprecated. -LIBYUV_API -void TransposePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height); - -LIBYUV_API -void TransposeUV(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_ROTATE_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate_argb.h deleted file mode 100755 index 660ff5573e..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/rotate_argb.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ // NOLINT -#define INCLUDE_LIBYUV_ROTATE_ARGB_H_ - -#include "libyuv/basic_types.h" -#include "libyuv/rotate.h" // For RotationMode. - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Rotate ARGB frame -LIBYUV_API -int ARGBRotate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int src_width, int src_height, enum RotationMode mode); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/row.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/row.h deleted file mode 100755 index 757020da86..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/row.h +++ /dev/null @@ -1,1694 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_ROW_H_ // NOLINT -#define INCLUDE_LIBYUV_ROW_H_ - -#include <stdlib.h> // For malloc. - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) - -#ifdef __cplusplus -#define align_buffer_64(var, size) \ - uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63)); \ - uint8* var = reinterpret_cast<uint8*> \ - ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63) -#else -#define align_buffer_64(var, size) \ - uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \ - uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ -#endif - -#define free_aligned_buffer_64(var) \ - free(var##_mem); \ - var = 0 - -#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ - defined(TARGET_IPHONE_SIMULATOR) -#define LIBYUV_DISABLE_X86 -#endif -// True if compiling for SSSE3 as a requirement. -#if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3)) -#define LIBYUV_SSSE3_ONLY -#endif - -// Enable for NaCL pepper 33 for bundle and AVX2 support. -// #define NEW_BINUTILS - -// The following are available on all x86 platforms: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -// Effects: -#define HAS_ARGBADDROW_SSE2 -#define HAS_ARGBAFFINEROW_SSE2 -#define HAS_ARGBATTENUATEROW_SSSE3 -#define HAS_ARGBBLENDROW_SSSE3 -#define HAS_ARGBCOLORMATRIXROW_SSSE3 -#define HAS_ARGBCOLORTABLEROW_X86 -#define HAS_ARGBCOPYALPHAROW_SSE2 -#define HAS_ARGBCOPYYTOALPHAROW_SSE2 -#define HAS_ARGBGRAYROW_SSSE3 -#define HAS_ARGBLUMACOLORTABLEROW_SSSE3 -#define HAS_ARGBMIRRORROW_SSSE3 -#define HAS_ARGBMULTIPLYROW_SSE2 -#define HAS_ARGBPOLYNOMIALROW_SSE2 -#define HAS_ARGBQUANTIZEROW_SSE2 -#define HAS_ARGBSEPIAROW_SSSE3 -#define HAS_ARGBSHADEROW_SSE2 -#define HAS_ARGBSUBTRACTROW_SSE2 -#define HAS_ARGBTOUVROW_SSSE3 -#define HAS_ARGBUNATTENUATEROW_SSE2 -#define HAS_COMPUTECUMULATIVESUMROW_SSE2 -#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 -#define HAS_INTERPOLATEROW_SSE2 -#define HAS_INTERPOLATEROW_SSSE3 -#define HAS_RGBCOLORTABLEROW_X86 -#define HAS_SOBELROW_SSE2 -#define HAS_SOBELTOPLANEROW_SSE2 -#define HAS_SOBELXROW_SSE2 -#define HAS_SOBELXYROW_SSE2 -#define HAS_SOBELYROW_SSE2 - -// Conversions: -#define HAS_ABGRTOUVROW_SSSE3 -#define HAS_ABGRTOYROW_SSSE3 -#define HAS_ARGB1555TOARGBROW_SSE2 -#define HAS_ARGB4444TOARGBROW_SSE2 -#define HAS_ARGBSHUFFLEROW_SSE2 -#define HAS_ARGBSHUFFLEROW_SSSE3 -#define HAS_ARGBTOARGB1555ROW_SSE2 -#define HAS_ARGBTOARGB4444ROW_SSE2 -#define HAS_ARGBTOBAYERGGROW_SSE2 -#define HAS_ARGBTOBAYERROW_SSSE3 -#define HAS_ARGBTORAWROW_SSSE3 -#define HAS_ARGBTORGB24ROW_SSSE3 -#define HAS_ARGBTORGB565ROW_SSE2 -#define HAS_ARGBTOUV422ROW_SSSE3 -#define HAS_ARGBTOUV444ROW_SSSE3 -#define HAS_ARGBTOUVJROW_SSSE3 -#define HAS_ARGBTOYJROW_SSSE3 -#define HAS_ARGBTOYROW_SSSE3 -#define HAS_BGRATOUVROW_SSSE3 -#define HAS_BGRATOYROW_SSSE3 -#define HAS_COPYROW_ERMS -#define HAS_COPYROW_SSE2 -#define HAS_COPYROW_X86 -#define HAS_HALFROW_SSE2 -#define HAS_I400TOARGBROW_SSE2 -#define HAS_I411TOARGBROW_SSSE3 -#define HAS_I422TOARGB1555ROW_SSSE3 -#define HAS_I422TOABGRROW_SSSE3 -#define HAS_I422TOARGB1555ROW_SSSE3 -#define HAS_I422TOARGB4444ROW_SSSE3 -#define HAS_I422TOARGBROW_SSSE3 -#define HAS_I422TOBGRAROW_SSSE3 -#define HAS_I422TORAWROW_SSSE3 -#define HAS_I422TORGB24ROW_SSSE3 -#define HAS_I422TORGB565ROW_SSSE3 -#define HAS_I422TORGBAROW_SSSE3 -#define HAS_I422TOUYVYROW_SSE2 -#define HAS_I422TOYUY2ROW_SSE2 -#define HAS_I444TOARGBROW_SSSE3 -#define HAS_MERGEUVROW_SSE2 -#define HAS_MIRRORROW_SSE2 -#define HAS_MIRRORROW_SSSE3 -#define HAS_MIRRORROW_UV_SSSE3 -#define HAS_MIRRORUVROW_SSSE3 -#define HAS_NV12TOARGBROW_SSSE3 -#define HAS_NV12TORGB565ROW_SSSE3 -#define HAS_NV21TOARGBROW_SSSE3 -#define HAS_NV21TORGB565ROW_SSSE3 -#define HAS_RAWTOARGBROW_SSSE3 -#define HAS_RAWTOYROW_SSSE3 -#define HAS_RGB24TOARGBROW_SSSE3 -#define HAS_RGB24TOYROW_SSSE3 -#define HAS_RGB565TOARGBROW_SSE2 -#define HAS_RGBATOUVROW_SSSE3 -#define HAS_RGBATOYROW_SSSE3 -#define HAS_SETROW_X86 -#define HAS_SPLITUVROW_SSE2 -#define HAS_UYVYTOARGBROW_SSSE3 -#define HAS_UYVYTOUV422ROW_SSE2 -#define HAS_UYVYTOUVROW_SSE2 -#define HAS_UYVYTOYROW_SSE2 -#define HAS_YTOARGBROW_SSE2 -#define HAS_YUY2TOARGBROW_SSSE3 -#define HAS_YUY2TOUV422ROW_SSE2 -#define HAS_YUY2TOUVROW_SSE2 -#define HAS_YUY2TOYROW_SSE2 -#endif - -// GCC >= 4.7.0 required for AVX2. -#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) -#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) -#define GCC_HAS_AVX2 1 -#endif // GNUC >= 4.7 -#endif // __GNUC__ - -// clang >= 3.4.0 required for AVX2. -#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) -#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4)) -#define CLANG_HAS_AVX2 1 -#endif // clang >= 3.4 -#endif // __clang__ - -// Visual C 2012 required for AVX2. -#if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700 -#define VISUALC_HAS_AVX2 1 -#endif // VisualStudio >= 2012 - -// The following are available on all x86 platforms, but -// require VS2012, clang 3.4 or gcc 4.7. -// The code supports NaCL but requires a new compiler and validator. -#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \ - defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) -// Effects: -#define HAS_ARGBPOLYNOMIALROW_AVX2 -#define HAS_ARGBSHUFFLEROW_AVX2 -#define HAS_ARGBCOPYALPHAROW_AVX2 -#define HAS_ARGBCOPYYTOALPHAROW_AVX2 -#endif - -// The following are require VS2012. -// TODO(fbarchard): Port to gcc. -#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2) -#define HAS_ARGBTOUVROW_AVX2 -#define HAS_ARGBTOYJROW_AVX2 -#define HAS_ARGBTOYROW_AVX2 -#define HAS_HALFROW_AVX2 -#define HAS_I422TOARGBROW_AVX2 -#define HAS_INTERPOLATEROW_AVX2 -#define HAS_MERGEUVROW_AVX2 -#define HAS_MIRRORROW_AVX2 -#define HAS_SPLITUVROW_AVX2 -#define HAS_UYVYTOUV422ROW_AVX2 -#define HAS_UYVYTOUVROW_AVX2 -#define HAS_UYVYTOYROW_AVX2 -#define HAS_YUY2TOUV422ROW_AVX2 -#define HAS_YUY2TOUVROW_AVX2 -#define HAS_YUY2TOYROW_AVX2 - -// Effects: -#define HAS_ARGBADDROW_AVX2 -#define HAS_ARGBATTENUATEROW_AVX2 -#define HAS_ARGBMIRRORROW_AVX2 -#define HAS_ARGBMULTIPLYROW_AVX2 -#define HAS_ARGBSUBTRACTROW_AVX2 -#define HAS_ARGBUNATTENUATEROW_AVX2 -#endif // defined(VISUALC_HAS_AVX2) - -// The following are Yasm x86 only: -// TODO(fbarchard): Port AVX2 to inline. -#if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM) - (defined(_M_IX86) || defined(_M_X64) || \ - defined(__x86_64__) || defined(__i386__)) -#define HAS_MERGEUVROW_AVX2 -#define HAS_MERGEUVROW_MMX -#define HAS_SPLITUVROW_AVX2 -#define HAS_SPLITUVROW_MMX -#define HAS_UYVYTOYROW_AVX2 -#define HAS_UYVYTOYROW_MMX -#define HAS_YUY2TOYROW_AVX2 -#define HAS_YUY2TOYROW_MMX -#endif - -// The following are disabled when SSSE3 is available: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \ - !defined(LIBYUV_SSSE3_ONLY) -#define HAS_ARGBBLENDROW_SSE2 -#define HAS_ARGBATTENUATEROW_SSE2 -#define HAS_MIRRORROW_SSE2 -#endif - -// The following are available on Neon platforms: -#if !defined(LIBYUV_DISABLE_NEON) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -#define HAS_ABGRTOUVROW_NEON -#define HAS_ABGRTOYROW_NEON -#define HAS_ARGB1555TOARGBROW_NEON -#define HAS_ARGB1555TOUVROW_NEON -#define HAS_ARGB1555TOYROW_NEON -#define HAS_ARGB4444TOARGBROW_NEON -#define HAS_ARGB4444TOUVROW_NEON -#define HAS_ARGB4444TOYROW_NEON -#define HAS_ARGBTOARGB1555ROW_NEON -#define HAS_ARGBTOARGB4444ROW_NEON -#define HAS_ARGBTOBAYERROW_NEON -#define HAS_ARGBTOBAYERGGROW_NEON -#define HAS_ARGBTORAWROW_NEON -#define HAS_ARGBTORGB24ROW_NEON -#define HAS_ARGBTORGB565ROW_NEON -#define HAS_ARGBTOUV411ROW_NEON -#define HAS_ARGBTOUV422ROW_NEON -#define HAS_ARGBTOUV444ROW_NEON -#define HAS_ARGBTOUVROW_NEON -#define HAS_ARGBTOUVJROW_NEON -#define HAS_ARGBTOYROW_NEON -#define HAS_ARGBTOYJROW_NEON -#define HAS_BGRATOUVROW_NEON -#define HAS_BGRATOYROW_NEON -#define HAS_COPYROW_NEON -#define HAS_HALFROW_NEON -#define HAS_I400TOARGBROW_NEON -#define HAS_I411TOARGBROW_NEON -#define HAS_I422TOABGRROW_NEON -#define HAS_I422TOARGB1555ROW_NEON -#define HAS_I422TOARGB4444ROW_NEON -#define HAS_I422TOARGBROW_NEON -#define HAS_I422TOBGRAROW_NEON -#define HAS_I422TORAWROW_NEON -#define HAS_I422TORGB24ROW_NEON -#define HAS_I422TORGB565ROW_NEON -#define HAS_I422TORGBAROW_NEON -#define HAS_I422TOUYVYROW_NEON -#define HAS_I422TOYUY2ROW_NEON -#define HAS_I444TOARGBROW_NEON -#define HAS_MERGEUVROW_NEON -#define HAS_MIRRORROW_NEON -#define HAS_MIRRORUVROW_NEON -#define HAS_NV12TOARGBROW_NEON -#define HAS_NV12TORGB565ROW_NEON -#define HAS_NV21TOARGBROW_NEON -#define HAS_NV21TORGB565ROW_NEON -#define HAS_RAWTOARGBROW_NEON -#define HAS_RAWTOUVROW_NEON -#define HAS_RAWTOYROW_NEON -#define HAS_RGB24TOARGBROW_NEON -#define HAS_RGB24TOUVROW_NEON -#define HAS_RGB24TOYROW_NEON -#define HAS_RGB565TOARGBROW_NEON -#define HAS_RGB565TOUVROW_NEON -#define HAS_RGB565TOYROW_NEON -#define HAS_RGBATOUVROW_NEON -#define HAS_RGBATOYROW_NEON -#define HAS_SETROW_NEON -#define HAS_SPLITUVROW_NEON -#define HAS_UYVYTOARGBROW_NEON -#define HAS_UYVYTOUV422ROW_NEON -#define HAS_UYVYTOUVROW_NEON -#define HAS_UYVYTOYROW_NEON -#define HAS_YTOARGBROW_NEON -#define HAS_YUY2TOARGBROW_NEON -#define HAS_YUY2TOUV422ROW_NEON -#define HAS_YUY2TOUVROW_NEON -#define HAS_YUY2TOYROW_NEON - -// Effects: -#define HAS_ARGBADDROW_NEON -#define HAS_ARGBATTENUATEROW_NEON -#define HAS_ARGBBLENDROW_NEON -#define HAS_ARGBCOLORMATRIXROW_NEON -#define HAS_ARGBGRAYROW_NEON -#define HAS_ARGBMIRRORROW_NEON -#define HAS_ARGBMULTIPLYROW_NEON -#define HAS_ARGBQUANTIZEROW_NEON -#define HAS_ARGBSEPIAROW_NEON -#define HAS_ARGBSHADEROW_NEON -#define HAS_ARGBSUBTRACTROW_NEON -#define HAS_SOBELROW_NEON -#define HAS_SOBELTOPLANEROW_NEON -#define HAS_SOBELXYROW_NEON -#define HAS_SOBELXROW_NEON -#define HAS_SOBELYROW_NEON -#define HAS_INTERPOLATEROW_NEON -#endif - -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) -#define HAS_COPYROW_MIPS -#if defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_I422TOABGRROW_MIPS_DSPR2 -#define HAS_I422TOARGBROW_MIPS_DSPR2 -#define HAS_I422TOBGRAROW_MIPS_DSPR2 -#define HAS_INTERPOLATEROWS_MIPS_DSPR2 -#define HAS_MIRRORROW_MIPS_DSPR2 -#define HAS_MIRRORUVROW_MIPS_DSPR2 -#define HAS_SPLITUVROW_MIPS_DSPR2 -#endif -#endif - -#if defined(_MSC_VER) && !defined(__CLR_VER) -#define SIMD_ALIGNED(var) __declspec(align(16)) var -typedef __declspec(align(16)) int16 vec16[8]; -typedef __declspec(align(16)) int32 vec32[4]; -typedef __declspec(align(16)) int8 vec8[16]; -typedef __declspec(align(16)) uint16 uvec16[8]; -typedef __declspec(align(16)) uint32 uvec32[4]; -typedef __declspec(align(16)) uint8 uvec8[16]; -typedef __declspec(align(32)) int16 lvec16[16]; -typedef __declspec(align(32)) int32 lvec32[8]; -typedef __declspec(align(32)) int8 lvec8[32]; -typedef __declspec(align(32)) uint16 ulvec16[16]; -typedef __declspec(align(32)) uint32 ulvec32[8]; -typedef __declspec(align(32)) uint8 ulvec8[32]; - -#elif defined(__GNUC__) -// Caveat GCC 4.2 to 4.7 have a known issue using vectors with const. -#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) -typedef int16 __attribute__((vector_size(16))) vec16; -typedef int32 __attribute__((vector_size(16))) vec32; -typedef int8 __attribute__((vector_size(16))) vec8; -typedef uint16 __attribute__((vector_size(16))) uvec16; -typedef uint32 __attribute__((vector_size(16))) uvec32; -typedef uint8 __attribute__((vector_size(16))) uvec8; -#else -#define SIMD_ALIGNED(var) var -typedef int16 vec16[8]; -typedef int32 vec32[4]; -typedef int8 vec8[16]; -typedef uint16 uvec16[8]; -typedef uint32 uvec32[4]; -typedef uint8 uvec8[16]; -#endif - -#if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) -#define OMITFP -#else -#define OMITFP __attribute__((optimize("omit-frame-pointer"))) -#endif - -// NaCL macros for GCC x86 and x64. - -// TODO(nfullagar): When pepper_33 toolchain is distributed, default to -// NEW_BINUTILS and remove all BUNDLEALIGN occurances. -#if defined(__native_client__) -#define LABELALIGN ".p2align 5\n" -#else -#define LABELALIGN ".p2align 2\n" -#endif -#if defined(__native_client__) && defined(__x86_64__) -#if defined(NEW_BINUTILS) -#define BUNDLELOCK ".bundle_lock\n" -#define BUNDLEUNLOCK ".bundle_unlock\n" -#define BUNDLEALIGN "\n" -#else -#define BUNDLELOCK "\n" -#define BUNDLEUNLOCK "\n" -#define BUNDLEALIGN ".p2align 5\n" -#endif -#define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")" -#define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")" -#define MEMLEA(offset, base) #offset "(%q" #base ")" -#define MEMLEA3(offset, index, scale) \ - #offset "(,%q" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%q" #base ",%q" #index "," #scale ")" -#define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15" -#define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15" -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%%" #reg "\n" \ - BUNDLEUNLOCK -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " %%" #reg ",(%%r15,%%r14)\n" \ - BUNDLEUNLOCK -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - BUNDLELOCK \ - "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \ - #opcode " (%%r15,%%r14),%" #arg "\n" \ - BUNDLEUNLOCK -#else -#define BUNDLEALIGN "\n" -#define MEMACCESS(base) "(%" #base ")" -#define MEMACCESS2(offset, base) #offset "(%" #base ")" -#define MEMLEA(offset, base) #offset "(%" #base ")" -#define MEMLEA3(offset, index, scale) \ - #offset "(,%" #index "," #scale ")" -#define MEMLEA4(offset, base, index, scale) \ - #offset "(%" #base ",%" #index "," #scale ")" -#define MEMMOVESTRING(s, d) -#define MEMSTORESTRING(reg, d) -#define MEMOPREG(opcode, offset, base, index, scale, reg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n" -#define MEMOPMEM(opcode, reg, offset, base, index, scale) \ - #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n" -#define MEMOPARG(opcode, offset, base, index, scale, arg) \ - #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n" -#endif - -void I444ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void I422ToRGBARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToRGB24Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, - int width); -void I422ToRAWRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width); -void I422ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, - int width); -void I422ToARGB1555Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, - int width); -void I422ToARGB4444Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - int width); -void NV12ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, - int width); -void NV21ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_vu, - uint8* dst_rgb565, - int width); -void YUY2ToARGBRow_NEON(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_NEON(const uint8* src_uyvy, - uint8* dst_argb, - int width); - -void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); -void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); -void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); -void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int pix); -void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int pix); -void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int pix); -void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int pix); -void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int pix); -void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int pix); -void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int pix); -void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int pix); -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix); -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); -void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix); -void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix); -void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix); -void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix); -void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix); -void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix); -void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix); -void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix); -void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix); -void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix); -void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix); -void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix); -void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix); -void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix); - -void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix); -void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); -void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix); -void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int pix); -void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int pix); -void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int pix); -void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int pix); -void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int pix); -void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int pix); -void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555, - int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int pix); -void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444, - int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int pix); -void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width); -void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width); -void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width); -void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width); -void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width); -void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width); -void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width); -void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width); -void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width); - -void ARGBToUV444Row_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - -void ARGBToUV422Row_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - -void ARGBToUV444Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV422Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); -void ARGBToUV411Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width); - -void MirrorRow_AVX2(const uint8* src, uint8* dst, int width); -void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width); -void MirrorRow_SSE2(const uint8* src, uint8* dst, int width); -void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width); -void MirrorRow_C(const uint8* src, uint8* dst, int width); - -void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); -void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width); - -void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width); -void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width); - -void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix); -void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, - uint8* dst_v, int pix); -void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); -void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix); - -void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, - uint8* dst_uv, int width); -void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); -void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width); - -void CopyRow_SSE2(const uint8* src, uint8* dst, int count); -void CopyRow_ERMS(const uint8* src, uint8* dst, int count); -void CopyRow_X86(const uint8* src, uint8* dst, int count); -void CopyRow_NEON(const uint8* src, uint8* dst, int count); -void CopyRow_MIPS(const uint8* src, uint8* dst, int count); -void CopyRow_C(const uint8* src, uint8* dst, int count); - -void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); - -void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width); -void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width); - -void SetRow_X86(uint8* dst, uint32 v32, int count); -void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, - int dst_stride, int height); -void SetRow_NEON(uint8* dst, uint32 v32, int count); -void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, - int dst_stride, int height); -void SetRow_C(uint8* dst, uint32 v32, int count); -void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride, - int height); - -// ARGBShufflers for BGRAToARGB etc. -void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); -void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix); - -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix); -void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int pix); -void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int pix); - -void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix); -void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, - int pix); -void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, - int pix); -void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix); -void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); -void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix); -void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb, - int pix); -void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int pix); -void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int pix); -void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix); -void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix); -void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb, - int pix); -void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb, - int pix); -void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb, - int pix); - -void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); - -void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); - -void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix); - -void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix); -void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix); -void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix); -void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix); -void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix); -void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix); - -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void NV12ToARGBRow_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToRGB565Row_C(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void NV12ToRGB565Row_C(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void YUY2ToARGBRow_C(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_C(const uint8* src_uyvy, - uint8* dst_argb, - int width); -void I422ToBGRARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void I422ToRGBARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToRGB24Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, - int width); -void I422ToRAWRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width); -void I422ToARGB4444Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - int width); -void I422ToARGB1555Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - int width); -void I422ToRGB565Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, - int width); -void YToARGBRow_C(const uint8* src_y, - uint8* dst_argb, - int width); -void I422ToARGBRow_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I444ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void NV12ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void NV12ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, - int width); -void I422ToBGRARow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void I422ToRGBARow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToARGB4444Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGB1555Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -// RGB24/RAW are unaligned. -void I422ToRGB24Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, - int width); -void I422ToRAWRow_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width); - -void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, - int width); -void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToARGBRow_Any_AVX2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I444ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void NV12ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_Any_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_argb, - int width); -void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, - int width); -void I422ToBGRARow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width); -void I422ToABGRRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width); -void I422ToRGBARow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -void I422ToRGB565Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width); -// RGB24/RAW are unaligned. -void I422ToRGB24Row_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRAWRow_Any_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void YToARGBRow_SSE2(const uint8* src_y, - uint8* dst_argb, - int width); -void YToARGBRow_NEON(const uint8* src_y, - uint8* dst_argb, - int width); -void YToARGBRow_Any_SSE2(const uint8* src_y, - uint8* dst_argb, - int width); -void YToARGBRow_Any_NEON(const uint8* src_y, - uint8* dst_argb, - int width); - -// ARGB preattenuated alpha blend. -void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); - -// ARGB multiply images. Same API as Blend, but these require -// pointer and width alignment for SSE2. -void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); - -// ARGB add images. -void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); - -// ARGB subtract images. Same API as Blend, but these require -// pointer and width alignment for SSE2. -void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); -void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width); - -void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix); - -void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); -void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix); - -void I444ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I411ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToABGRRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRGBARow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRGB24Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRAWRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGB4444Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGB1555Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void NV12ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToARGBRow_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV12ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void NV21ToRGB565Row_Any_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width); -void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2, - uint8* dst_argb, - int width); -void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy, - uint8* dst_argb, - int width); -void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); -void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width); - -void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix); -void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix); -void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, - uint8* dst_y, int pix); -void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix); -void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToUV422Row_NEON(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix); -void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToUV422Row_C(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix); -void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix); -void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix); -void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, - uint8* dst_y, int pix); -void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_NEON(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); - -void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_C(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix); -void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); -void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix); - -void HalfRow_C(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); -void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix); - -void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix); -void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix); -void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix); -void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix); -void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix); -void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); -void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 /* selector */, int pix); - -void I422ToYUY2Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_Any_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); -void I422ToYUY2Row_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width); -void I422ToUYVYRow_Any_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width); - -// Effects related row functions. -void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb, - int width); - -// Inverse table for unattenuate, shared by C and SSE2. -extern const uint32 fixed_invtbl8[256]; -void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb, - int width); -void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb, - int width); - -void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width); -void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width); - -void ARGBSepiaRow_C(uint8* dst_argb, int width); -void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width); -void ARGBSepiaRow_NEON(uint8* dst_argb, int width); - -void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); -void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); -void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width); - -void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); -void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); - -void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width); -void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width); - -void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width); -void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width); -void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width); - -void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value); -void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value); -void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value); - -// Used for blur. -void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count); -void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width); - -void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count); -void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width); - -LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); -LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width); - -// Used for I420Scale, ARGBScale, and ARGBInterpolate. -void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, - int width, int source_y_fraction); -void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); -void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride_ptr, int width, - int source_y_fraction); - -// Sobel images. -void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, - uint8* dst_sobelx, int width); -void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width); -void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width); -void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width); -void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width); -void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); -void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width); - -void ARGBPolynomialRow_C(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width); -void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width); -void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width); - -void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, - const uint8* luma, uint32 lumacoeff); -void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - int width, - const uint8* luma, uint32 lumacoeff); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_ROW_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale.h deleted file mode 100755 index 592b8ed5fa..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_SCALE_H_ // NOLINT -#define INCLUDE_LIBYUV_SCALE_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Supported filtering. -typedef enum FilterMode { - kFilterNone = 0, // Point sample; Fastest. - kFilterLinear = 1, // Filter horizontally only. - kFilterBilinear = 2, // Faster than box, but lower quality scaling down. - kFilterBox = 3 // Highest quality. -} FilterModeEnum; - -// Scale a YUV plane. -LIBYUV_API -void ScalePlane(const uint8* src, int src_stride, - int src_width, int src_height, - uint8* dst, int dst_stride, - int dst_width, int dst_height, - enum FilterMode filtering); - -// Scales a YUV 4:2:0 image from the src width and height to the -// dst width and height. -// If filtering is kFilterNone, a simple nearest-neighbor algorithm is -// used. This produces basic (blocky) quality at the fastest speed. -// If filtering is kFilterBilinear, interpolation is used to produce a better -// quality image, at the expense of speed. -// If filtering is kFilterBox, averaging is used to produce ever better -// quality image, at further expense of speed. -// Returns 0 if successful. - -LIBYUV_API -int I420Scale(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - int src_width, int src_height, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int dst_width, int dst_height, - enum FilterMode filtering); - -#ifdef __cplusplus -// Legacy API. Deprecated. -LIBYUV_API -int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, - int src_stride_y, int src_stride_u, int src_stride_v, - int src_width, int src_height, - uint8* dst_y, uint8* dst_u, uint8* dst_v, - int dst_stride_y, int dst_stride_u, int dst_stride_v, - int dst_width, int dst_height, - LIBYUV_BOOL interpolate); - -// Legacy API. Deprecated. -LIBYUV_API -int ScaleOffset(const uint8* src_i420, int src_width, int src_height, - uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset, - LIBYUV_BOOL interpolate); - -// For testing, allow disabling of specialized scalers. -LIBYUV_API -void SetUseReferenceImpl(LIBYUV_BOOL use); -#endif // __cplusplus - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_SCALE_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_argb.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_argb.h deleted file mode 100755 index 0c9b362575..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_argb.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ // NOLINT -#define INCLUDE_LIBYUV_SCALE_ARGB_H_ - -#include "libyuv/basic_types.h" -#include "libyuv/scale.h" // For FilterMode - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -LIBYUV_API -int ARGBScale(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - enum FilterMode filtering); - -// Clipped scale takes destination rectangle coordinates for clip values. -LIBYUV_API -int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, - enum FilterMode filtering); - -// TODO(fbarchard): Implement this. -// Scale with YUV conversion to ARGB and clipping. -LIBYUV_API -int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint32 src_fourcc, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - uint32 dst_fourcc, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, - enum FilterMode filtering); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_row.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_row.h deleted file mode 100644 index 13eccc4d77..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/scale_row.h +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright 2013 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ // NOLINT -#define INCLUDE_LIBYUV_SCALE_ROW_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \ - defined(TARGET_IPHONE_SIMULATOR) -#define LIBYUV_DISABLE_X86 -#endif - -// The following are available on all x86 platforms: -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -#define HAS_SCALEROWDOWN2_SSE2 -#define HAS_SCALEROWDOWN4_SSE2 -#define HAS_SCALEROWDOWN34_SSSE3 -#define HAS_SCALEROWDOWN38_SSSE3 -#define HAS_SCALEADDROWS_SSE2 -#define HAS_SCALEFILTERCOLS_SSSE3 -#define HAS_SCALECOLSUP2_SSE2 -#define HAS_SCALEARGBROWDOWN2_SSE2 -#define HAS_SCALEARGBROWDOWNEVEN_SSE2 -#define HAS_SCALEARGBCOLS_SSE2 -#define HAS_SCALEARGBFILTERCOLS_SSSE3 -#define HAS_SCALEARGBCOLSUP2_SSE2 -#define HAS_FIXEDDIV_X86 -#define HAS_FIXEDDIV1_X86 -#endif - -// The following are available on Neon platforms: -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -#define HAS_SCALEROWDOWN2_NEON -#define HAS_SCALEROWDOWN4_NEON -#define HAS_SCALEROWDOWN34_NEON -#define HAS_SCALEROWDOWN38_NEON -#define HAS_SCALEARGBROWDOWNEVEN_NEON -#define HAS_SCALEARGBROWDOWN2_NEON -#endif - -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ - defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_SCALEROWDOWN2_MIPS_DSPR2 -#define HAS_SCALEROWDOWN4_MIPS_DSPR2 -#define HAS_SCALEROWDOWN34_MIPS_DSPR2 -#define HAS_SCALEROWDOWN38_MIPS_DSPR2 -#endif - -// Scale ARGB vertically with bilinear interpolation. -void ScalePlaneVertical(int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int y, int dy, - int bpp, enum FilterMode filtering); - -// Simplify the filtering based on scale factors. -enum FilterMode ScaleFilterReduce(int src_width, int src_height, - int dst_width, int dst_height, - enum FilterMode filtering); - -// Divide num by div and return as 16.16 fixed point result. -int FixedDiv_C(int num, int div); -int FixedDiv_X86(int num, int div); -// Divide num - 1 by div - 1 and return as 16.16 fixed point result. -int FixedDiv1_C(int num, int div); -int FixedDiv1_X86(int num, int div); -#ifdef HAS_FIXEDDIV_X86 -#define FixedDiv FixedDiv_X86 -#define FixedDiv1 FixedDiv1_X86 -#else -#define FixedDiv FixedDiv_C -#define FixedDiv1 FixedDiv1_C -#endif - -// Compute slope values for stepping. -void ScaleSlope(int src_width, int src_height, - int dst_width, int dst_height, - enum FilterMode filtering, - int* x, int* y, int* dx, int* dy); - -void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int, int); -void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown38_3_Box_C(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, int src_height); -void ScaleARGBRowDown2_C(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_C(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int, int); -void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); - -void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, - int src_height); -void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx); -void ScaleARGBRowDown2_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx); -// Row functions. -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, int src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, int src_stride, - int src_stepx, - uint8* dst_argb, int dst_width); -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); - -// ScaleRowDown2Box also used by planar functions -// NEON downscalers with interpolation. - -// Note - not static due to reuse in convert for 444 to 420. -void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); - -void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); - -void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -// Down scale from 4 to 3 pixels. Use the neon multilane read/write -// to load up the every 4th pixel into a 4 different registers. -// Point samples 32 pixels to 24 pixels. -void ScaleRowDown34_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -// 32 -> 12 -void ScaleRowDown38_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -// 32x3 -> 12x1 -void ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -// 32x2 -> 12x1 -void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width); -void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width); -void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); -void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_SCALE_ROW_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/version.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/version.h deleted file mode 100755 index 4881861866..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/version.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT -#define INCLUDE_LIBYUV_VERSION_H_ - -#define LIBYUV_VERSION 998 - -#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/video_common.h b/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/video_common.h deleted file mode 100755 index 039efb96d1..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/include/libyuv/video_common.h +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// Common definitions for video, including fourcc and VideoFormat. - -#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT -#define INCLUDE_LIBYUV_VIDEO_COMMON_H_ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -////////////////////////////////////////////////////////////////////////////// -// Definition of FourCC codes -////////////////////////////////////////////////////////////////////////////// - -// Convert four characters to a FourCC code. -// Needs to be a macro otherwise the OS X compiler complains when the kFormat* -// constants are used in a switch. -#ifdef __cplusplus -#define FOURCC(a, b, c, d) ( \ - (static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \ - (static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24)) -#else -#define FOURCC(a, b, c, d) ( \ - ((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \ - ((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */ -#endif - -// Some pages discussing FourCC codes: -// http://www.fourcc.org/yuv.php -// http://v4l2spec.bytesex.org/spec/book1.htm -// http://developer.apple.com/quicktime/icefloe/dispatch020.html -// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12 -// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt - -// FourCC codes grouped according to implementation efficiency. -// Primary formats should convert in 1 efficient step. -// Secondary formats are converted in 2 steps. -// Auxilliary formats call primary converters. -enum FourCC { - // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. - FOURCC_I420 = FOURCC('I', '4', '2', '0'), - FOURCC_I422 = FOURCC('I', '4', '2', '2'), - FOURCC_I444 = FOURCC('I', '4', '4', '4'), - FOURCC_I411 = FOURCC('I', '4', '1', '1'), - FOURCC_I400 = FOURCC('I', '4', '0', '0'), - FOURCC_NV21 = FOURCC('N', 'V', '2', '1'), - FOURCC_NV12 = FOURCC('N', 'V', '1', '2'), - FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'), - FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'), - - // 2 Secondary YUV formats: row biplanar. - FOURCC_M420 = FOURCC('M', '4', '2', '0'), - FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), - - // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp. - FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), - FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), - FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), - FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), - FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), - FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), - FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE. - FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE. - FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE. - - // 4 Secondary RGB formats: 4 Bayer Patterns. - FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), - FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), - FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), - FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'), - - // 1 Primary Compressed YUV format. - FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), - - // 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. - FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'), - FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'), - FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), - FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420. - FOURCC_J420 = FOURCC('J', '4', '2', '0'), - FOURCC_J400 = FOURCC('J', '4', '0', '0'), - - // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. - FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. - FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422. - FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444. - FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2. - FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac. - FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY. - FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac. - FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG. - FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac. - FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR. - FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW. - FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG. - FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB - FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB - FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO. - FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP. - FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO. - - // 1 Auxiliary compressed YUV format set aside for capturer. - FOURCC_H264 = FOURCC('H', '2', '6', '4'), - - // Match any fourcc. - FOURCC_ANY = 0xFFFFFFFF, -}; - -enum FourCCBpp { - // Canonical fourcc codes used in our code. - FOURCC_BPP_I420 = 12, - FOURCC_BPP_I422 = 16, - FOURCC_BPP_I444 = 24, - FOURCC_BPP_I411 = 12, - FOURCC_BPP_I400 = 8, - FOURCC_BPP_NV21 = 12, - FOURCC_BPP_NV12 = 12, - FOURCC_BPP_YUY2 = 16, - FOURCC_BPP_UYVY = 16, - FOURCC_BPP_M420 = 12, - FOURCC_BPP_Q420 = 12, - FOURCC_BPP_ARGB = 32, - FOURCC_BPP_BGRA = 32, - FOURCC_BPP_ABGR = 32, - FOURCC_BPP_RGBA = 32, - FOURCC_BPP_24BG = 24, - FOURCC_BPP_RAW = 24, - FOURCC_BPP_RGBP = 16, - FOURCC_BPP_RGBO = 16, - FOURCC_BPP_R444 = 16, - FOURCC_BPP_RGGB = 8, - FOURCC_BPP_BGGR = 8, - FOURCC_BPP_GRBG = 8, - FOURCC_BPP_GBRG = 8, - FOURCC_BPP_YV12 = 12, - FOURCC_BPP_YV16 = 16, - FOURCC_BPP_YV24 = 24, - FOURCC_BPP_YU12 = 12, - FOURCC_BPP_J420 = 12, - FOURCC_BPP_J400 = 8, - FOURCC_BPP_MJPG = 0, // 0 means unknown. - FOURCC_BPP_H264 = 0, - FOURCC_BPP_IYUV = 12, - FOURCC_BPP_YU16 = 16, - FOURCC_BPP_YU24 = 24, - FOURCC_BPP_YUYV = 16, - FOURCC_BPP_YUVS = 16, - FOURCC_BPP_HDYC = 16, - FOURCC_BPP_2VUY = 16, - FOURCC_BPP_JPEG = 1, - FOURCC_BPP_DMB1 = 1, - FOURCC_BPP_BA81 = 8, - FOURCC_BPP_RGB3 = 24, - FOURCC_BPP_BGR3 = 24, - FOURCC_BPP_CM32 = 32, - FOURCC_BPP_CM24 = 24, - - // Match any fourcc. - FOURCC_BPP_ANY = 0, // 0 means unknown. -}; - -// Converts fourcc aliases into canonical ones. -LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc); - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - -#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_ NOLINT diff --git a/drivers/theoraplayer/src/YUV/libyuv/libtheoraplayer-readme.txt b/drivers/theoraplayer/src/YUV/libyuv/libtheoraplayer-readme.txt deleted file mode 100755 index 680e4a1c36..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/libtheoraplayer-readme.txt +++ /dev/null @@ -1,15 +0,0 @@ -libyuv's source code is here provided in minimalist distribution format -with all source files not needed for compiling libtheoraplayer removed. - -- The project files were modified to fit libtheoraplayer's binary output - folder structure. -- Some project files missing in the original source distibution were added to support - compiling the libtheoraplayer on those platforms. -- Also, some code may have been changed to address certain compiler/platform - specific problems and is so indicated in the source code. - -libyuv is owned and maintained by the Google Inc. and this distribution -is present here only for convenience and easier compilation of libtheoraplayer. - -If you want to use libyuv outside of libtheoraplayer, it is encouraged to use the -original source distribution by Google Inc: https://code.google.com/p/libyuv/
\ No newline at end of file diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare.cc deleted file mode 100755 index 9ea81b4e21..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/compare.cc +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/compare.h" - -#include <float.h> -#include <math.h> -#ifdef _OPENMP -#include <omp.h> -#endif - -#include "libyuv/basic_types.h" -#include "libyuv/cpu_id.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// hash seed of 5381 recommended. -// Internal C version of HashDjb2 with int sized count for efficiency. -uint32 HashDjb2_C(const uint8* src, int count, uint32 seed); - -// This module is for Visual C x86 -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || \ - (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))) -#define HAS_HASHDJB2_SSE41 -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed); - -#if _MSC_VER >= 1700 -#define HAS_HASHDJB2_AVX2 -uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); -#endif - -#endif // HAS_HASHDJB2_SSE41 - -// hash seed of 5381 recommended. -LIBYUV_API -uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { - const int kBlockSize = 1 << 15; // 32768; - int remainder; - uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; -#if defined(HAS_HASHDJB2_SSE41) - if (TestCpuFlag(kCpuHasSSE41)) { - HashDjb2_SSE = HashDjb2_SSE41; - } -#endif -#if defined(HAS_HASHDJB2_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - HashDjb2_SSE = HashDjb2_AVX2; - } -#endif - - while (count >= (uint64)(kBlockSize)) { - seed = HashDjb2_SSE(src, kBlockSize, seed); - src += kBlockSize; - count -= kBlockSize; - } - remainder = (int)(count) & ~15; - if (remainder) { - seed = HashDjb2_SSE(src, remainder, seed); - src += remainder; - count -= remainder; - } - remainder = (int)(count) & 15; - if (remainder) { - seed = HashDjb2_C(src, remainder, seed); - } - return seed; -} - -uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); -#if !defined(LIBYUV_DISABLE_NEON) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -#define HAS_SUMSQUAREERROR_NEON -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); -#endif -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -#define HAS_SUMSQUAREERROR_SSE2 -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); -#endif -// Visual C 2012 required for AVX2. -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700 -#define HAS_SUMSQUAREERROR_AVX2 -uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); -#endif - -// TODO(fbarchard): Refactor into row function. -LIBYUV_API -uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, - int count) { - // SumSquareError returns values 0 to 65535 for each squared difference. - // Up to 65536 of those can be summed and remain within a uint32. - // After each block of 65536 pixels, accumulate into a uint64. - const int kBlockSize = 65536; - int remainder = count & (kBlockSize - 1) & ~31; - uint64 sse = 0; - int i; - uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = - SumSquareError_C; -#if defined(HAS_SUMSQUAREERROR_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - SumSquareError = SumSquareError_NEON; - } -#endif -#if defined(HAS_SUMSQUAREERROR_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) { - // Note only used for multiples of 16 so count is not checked. - SumSquareError = SumSquareError_SSE2; - } -#endif -#if defined(HAS_SUMSQUAREERROR_AVX2) - if (TestCpuFlag(kCpuHasAVX2)) { - // Note only used for multiples of 32 so count is not checked. - SumSquareError = SumSquareError_AVX2; - } -#endif -#ifdef _OPENMP -#pragma omp parallel for reduction(+: sse) -#endif - for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { - sse += SumSquareError(src_a + i, src_b + i, kBlockSize); - } - src_a += count & ~(kBlockSize - 1); - src_b += count & ~(kBlockSize - 1); - if (remainder) { - sse += SumSquareError(src_a, src_b, remainder); - src_a += remainder; - src_b += remainder; - } - remainder = count & 31; - if (remainder) { - sse += SumSquareError_C(src_a, src_b, remainder); - } - return sse; -} - -LIBYUV_API -uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height) { - uint64 sse = 0; - int h; - // Coalesce rows. - if (stride_a == width && - stride_b == width) { - width *= height; - height = 1; - stride_a = stride_b = 0; - } - for (h = 0; h < height; ++h) { - sse += ComputeSumSquareError(src_a, src_b, width); - src_a += stride_a; - src_b += stride_b; - } - return sse; -} - -LIBYUV_API -double SumSquareErrorToPsnr(uint64 sse, uint64 count) { - double psnr; - if (sse > 0) { - double mse = (double)(count) / (double)(sse); - psnr = 10.0 * log10(255.0 * 255.0 * mse); - } else { - psnr = kMaxPsnr; // Limit to prevent divide by 0 - } - - if (psnr > kMaxPsnr) - psnr = kMaxPsnr; - - return psnr; -} - -LIBYUV_API -double CalcFramePsnr(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height) { - const uint64 samples = width * height; - const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a, - src_b, stride_b, - width, height); - return SumSquareErrorToPsnr(sse, samples); -} - -LIBYUV_API -double I420Psnr(const uint8* src_y_a, int stride_y_a, - const uint8* src_u_a, int stride_u_a, - const uint8* src_v_a, int stride_v_a, - const uint8* src_y_b, int stride_y_b, - const uint8* src_u_b, int stride_u_b, - const uint8* src_v_b, int stride_v_b, - int width, int height) { - const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, - src_y_b, stride_y_b, - width, height); - const int width_uv = (width + 1) >> 1; - const int height_uv = (height + 1) >> 1; - const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a, - src_u_b, stride_u_b, - width_uv, height_uv); - const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a, - src_v_b, stride_v_b, - width_uv, height_uv); - const uint64 samples = width * height + 2 * (width_uv * height_uv); - const uint64 sse = sse_y + sse_u + sse_v; - return SumSquareErrorToPsnr(sse, samples); -} - -static const int64 cc1 = 26634; // (64^2*(.01*255)^2 -static const int64 cc2 = 239708; // (64^2*(.03*255)^2 - -static double Ssim8x8_C(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b) { - int64 sum_a = 0; - int64 sum_b = 0; - int64 sum_sq_a = 0; - int64 sum_sq_b = 0; - int64 sum_axb = 0; - - int i; - for (i = 0; i < 8; ++i) { - int j; - for (j = 0; j < 8; ++j) { - sum_a += src_a[j]; - sum_b += src_b[j]; - sum_sq_a += src_a[j] * src_a[j]; - sum_sq_b += src_b[j] * src_b[j]; - sum_axb += src_a[j] * src_b[j]; - } - - src_a += stride_a; - src_b += stride_b; - } - - { - const int64 count = 64; - // scale the constants by number of pixels - const int64 c1 = (cc1 * count * count) >> 12; - const int64 c2 = (cc2 * count * count) >> 12; - - const int64 sum_a_x_sum_b = sum_a * sum_b; - - const int64 ssim_n = (2 * sum_a_x_sum_b + c1) * - (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); - - const int64 sum_a_sq = sum_a*sum_a; - const int64 sum_b_sq = sum_b*sum_b; - - const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) * - (count * sum_sq_a - sum_a_sq + - count * sum_sq_b - sum_b_sq + c2); - - if (ssim_d == 0.0) { - return DBL_MAX; - } - return ssim_n * 1.0 / ssim_d; - } -} - -// We are using a 8x8 moving window with starting location of each 8x8 window -// on the 4x4 pixel grid. Such arrangement allows the windows to overlap -// block boundaries to penalize blocking artifacts. -LIBYUV_API -double CalcFrameSsim(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b, - int width, int height) { - int samples = 0; - double ssim_total = 0; - double (*Ssim8x8)(const uint8* src_a, int stride_a, - const uint8* src_b, int stride_b) = Ssim8x8_C; - - // sample point start with each 4x4 location - int i; - for (i = 0; i < height - 8; i += 4) { - int j; - for (j = 0; j < width - 8; j += 4) { - ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b); - samples++; - } - - src_a += stride_a * 4; - src_b += stride_b * 4; - } - - ssim_total /= samples; - return ssim_total; -} - -LIBYUV_API -double I420Ssim(const uint8* src_y_a, int stride_y_a, - const uint8* src_u_a, int stride_u_a, - const uint8* src_v_a, int stride_v_a, - const uint8* src_y_b, int stride_y_b, - const uint8* src_u_b, int stride_u_b, - const uint8* src_v_b, int stride_v_b, - int width, int height) { - const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a, - src_y_b, stride_y_b, width, height); - const int width_uv = (width + 1) >> 1; - const int height_uv = (height + 1) >> 1; - const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, - src_u_b, stride_u_b, - width_uv, height_uv); - const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, - src_v_b, stride_v_b, - width_uv, height_uv); - return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v); -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_common.cc deleted file mode 100755 index c546b51829..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_common.cc +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse = 0u; - int i; - for (i = 0; i < count; ++i) { - int diff = src_a[i] - src_b[i]; - sse += (uint32)(diff * diff); - } - return sse; -} - -// hash seed of 5381 recommended. -// Internal C version of HashDjb2 with int sized count for efficiency. -uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) { - uint32 hash = seed; - int i; - for (i = 0; i < count; ++i) { - hash += (hash << 5) + src[i]; - } - return hash; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_neon.cc deleted file mode 100755 index bb843a6ab8..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_neon.cc +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) - -uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { - volatile uint32 sse; - asm volatile ( -#ifdef _ANDROID - ".fpu neon\n" -#endif - "vmov.u8 q8, #0 \n" - "vmov.u8 q10, #0 \n" - "vmov.u8 q9, #0 \n" - "vmov.u8 q11, #0 \n" - - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" - "vld1.8 {q1}, [%1]! \n" - "subs %2, %2, #16 \n" - "vsubl.u8 q2, d0, d2 \n" - "vsubl.u8 q3, d1, d3 \n" - "vmlal.s16 q8, d4, d4 \n" - "vmlal.s16 q9, d6, d6 \n" - "vmlal.s16 q10, d5, d5 \n" - "vmlal.s16 q11, d7, d7 \n" - "bgt 1b \n" - - "vadd.u32 q8, q8, q9 \n" - "vadd.u32 q10, q10, q11 \n" - "vadd.u32 q11, q8, q10 \n" - "vpaddl.u32 q1, q11 \n" - "vadd.u64 d0, d2, d3 \n" - "vmov.32 %3, d0[0] \n" - : "+r"(src_a), - "+r"(src_b), - "+r"(count), - "=r"(sse) - : - : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); - return sse; -} - -#endif // __ARM_NEON__ - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_posix.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_posix.cc deleted file mode 100755 index ac361190e8..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_posix.cc +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) - -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { - uint32 sse; - asm volatile ( // NOLINT - "pxor %%xmm0,%%xmm0 \n" - "pxor %%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm1 \n" - "lea " MEMLEA(0x10, 0) ",%0 \n" - "movdqa " MEMACCESS(1) ",%%xmm2 \n" - "lea " MEMLEA(0x10, 1) ",%1 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psubusb %%xmm2,%%xmm1 \n" - "psubusb %%xmm3,%%xmm2 \n" - "por %%xmm2,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "punpckhbw %%xmm5,%%xmm2 \n" - "pmaddwd %%xmm1,%%xmm1 \n" - "pmaddwd %%xmm2,%%xmm2 \n" - "paddd %%xmm1,%%xmm0 \n" - "paddd %%xmm2,%%xmm0 \n" - "jg 1b \n" - - "pshufd $0xee,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "pshufd $0x1,%%xmm0,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "movd %%xmm0,%3 \n" - - : "+r"(src_a), // %0 - "+r"(src_b), // %1 - "+r"(count), // %2 - "=g"(sse) // %3 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); // NOLINT - return sse; -} - -#endif // defined(__x86_64__) || defined(__i386__) - -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) -#define HAS_HASHDJB2_SSE41 -static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 -static uvec32 kHashMul0 = { - 0x0c3525e1, // 33 ^ 15 - 0xa3476dc1, // 33 ^ 14 - 0x3b4039a1, // 33 ^ 13 - 0x4f5f0981, // 33 ^ 12 -}; -static uvec32 kHashMul1 = { - 0x30f35d61, // 33 ^ 11 - 0x855cb541, // 33 ^ 10 - 0x040a9121, // 33 ^ 9 - 0x747c7101, // 33 ^ 8 -}; -static uvec32 kHashMul2 = { - 0xec41d4e1, // 33 ^ 7 - 0x4cfa3cc1, // 33 ^ 6 - 0x025528a1, // 33 ^ 5 - 0x00121881, // 33 ^ 4 -}; -static uvec32 kHashMul3 = { - 0x00008c61, // 33 ^ 3 - 0x00000441, // 33 ^ 2 - 0x00000021, // 33 ^ 1 - 0x00000001, // 33 ^ 0 -}; - -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { - uint32 hash; - asm volatile ( // NOLINT - "movd %2,%%xmm0 \n" - "pxor %%xmm7,%%xmm7 \n" - "movdqa %4,%%xmm6 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "lea " MEMLEA(0x10, 0) ",%0 \n" - "pmulld %%xmm6,%%xmm0 \n" - "movdqa %5,%%xmm5 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm7,%%xmm2 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm7,%%xmm3 \n" - "pmulld %%xmm5,%%xmm3 \n" - "movdqa %6,%%xmm5 \n" - "movdqa %%xmm2,%%xmm4 \n" - "punpckhwd %%xmm7,%%xmm4 \n" - "pmulld %%xmm5,%%xmm4 \n" - "movdqa %7,%%xmm5 \n" - "punpckhbw %%xmm7,%%xmm1 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklwd %%xmm7,%%xmm2 \n" - "pmulld %%xmm5,%%xmm2 \n" - "movdqa %8,%%xmm5 \n" - "punpckhwd %%xmm7,%%xmm1 \n" - "pmulld %%xmm5,%%xmm1 \n" - "paddd %%xmm4,%%xmm3 \n" - "paddd %%xmm2,%%xmm1 \n" - "sub $0x10,%1 \n" - "paddd %%xmm3,%%xmm1 \n" - "pshufd $0xe,%%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm1 \n" - "pshufd $0x1,%%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm1 \n" - "paddd %%xmm1,%%xmm0 \n" - "jg 1b \n" - "movd %%xmm0,%3 \n" - : "+r"(src), // %0 - "+r"(count), // %1 - "+rm"(seed), // %2 - "=g"(hash) // %3 - : "m"(kHash16x33), // %4 - "m"(kHashMul0), // %5 - "m"(kHashMul1), // %6 - "m"(kHashMul2), // %7 - "m"(kHashMul3) // %8 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); // NOLINT - return hash; -} -#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/compare_win.cc b/drivers/theoraplayer/src/YUV/libyuv/src/compare_win.cc deleted file mode 100755 index 99831651f5..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/compare_win.cc +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) - -__declspec(naked) __declspec(align(16)) -uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) { - __asm { - mov eax, [esp + 4] // src_a - mov edx, [esp + 8] // src_b - mov ecx, [esp + 12] // count - pxor xmm0, xmm0 - pxor xmm5, xmm5 - - align 4 - wloop: - movdqa xmm1, [eax] - lea eax, [eax + 16] - movdqa xmm2, [edx] - lea edx, [edx + 16] - sub ecx, 16 - movdqa xmm3, xmm1 // abs trick - psubusb xmm1, xmm2 - psubusb xmm2, xmm3 - por xmm1, xmm2 - movdqa xmm2, xmm1 - punpcklbw xmm1, xmm5 - punpckhbw xmm2, xmm5 - pmaddwd xmm1, xmm1 - pmaddwd xmm2, xmm2 - paddd xmm0, xmm1 - paddd xmm0, xmm2 - jg wloop - - pshufd xmm1, xmm0, 0xee - paddd xmm0, xmm1 - pshufd xmm1, xmm0, 0x01 - paddd xmm0, xmm1 - movd eax, xmm0 - ret - } -} - -// Visual C 2012 required for AVX2. -#if _MSC_VER >= 1700 -// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. -#pragma warning(disable: 4752) -__declspec(naked) __declspec(align(16)) -uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) { - __asm { - mov eax, [esp + 4] // src_a - mov edx, [esp + 8] // src_b - mov ecx, [esp + 12] // count - vpxor ymm0, ymm0, ymm0 // sum - vpxor ymm5, ymm5, ymm5 // constant 0 for unpck - sub edx, eax - - align 4 - wloop: - vmovdqu ymm1, [eax] - vmovdqu ymm2, [eax + edx] - lea eax, [eax + 32] - sub ecx, 32 - vpsubusb ymm3, ymm1, ymm2 // abs difference trick - vpsubusb ymm2, ymm2, ymm1 - vpor ymm1, ymm2, ymm3 - vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order. - vpunpckhbw ymm1, ymm1, ymm5 - vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32. - vpmaddwd ymm1, ymm1, ymm1 - vpaddd ymm0, ymm0, ymm1 - vpaddd ymm0, ymm0, ymm2 - jg wloop - - vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes. - vpaddd ymm0, ymm0, ymm1 - vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes. - vpaddd ymm0, ymm0, ymm1 - vpermq ymm1, ymm0, 0x02 // high + low lane. - vpaddd ymm0, ymm0, ymm1 - vmovd eax, xmm0 - vzeroupper - ret - } -} -#endif // _MSC_VER >= 1700 - -#define HAS_HASHDJB2_SSE41 -static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16 -static uvec32 kHashMul0 = { - 0x0c3525e1, // 33 ^ 15 - 0xa3476dc1, // 33 ^ 14 - 0x3b4039a1, // 33 ^ 13 - 0x4f5f0981, // 33 ^ 12 -}; -static uvec32 kHashMul1 = { - 0x30f35d61, // 33 ^ 11 - 0x855cb541, // 33 ^ 10 - 0x040a9121, // 33 ^ 9 - 0x747c7101, // 33 ^ 8 -}; -static uvec32 kHashMul2 = { - 0xec41d4e1, // 33 ^ 7 - 0x4cfa3cc1, // 33 ^ 6 - 0x025528a1, // 33 ^ 5 - 0x00121881, // 33 ^ 4 -}; -static uvec32 kHashMul3 = { - 0x00008c61, // 33 ^ 3 - 0x00000441, // 33 ^ 2 - 0x00000021, // 33 ^ 1 - 0x00000001, // 33 ^ 0 -}; - -// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6 -// 44: 66 0F 38 40 DD pmulld xmm3,xmm5 -// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5 -// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5 -// 83: 66 0F 38 40 CD pmulld xmm1,xmm5 -#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \ - _asm _emit 0x40 _asm _emit reg - -__declspec(naked) __declspec(align(16)) -uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) { - __asm { - mov eax, [esp + 4] // src - mov ecx, [esp + 8] // count - movd xmm0, [esp + 12] // seed - - pxor xmm7, xmm7 // constant 0 for unpck - movdqa xmm6, kHash16x33 - - align 4 - wloop: - movdqu xmm1, [eax] // src[0-15] - lea eax, [eax + 16] - pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16 - movdqa xmm5, kHashMul0 - movdqa xmm2, xmm1 - punpcklbw xmm2, xmm7 // src[0-7] - movdqa xmm3, xmm2 - punpcklwd xmm3, xmm7 // src[0-3] - pmulld(0xdd) // pmulld xmm3, xmm5 - movdqa xmm5, kHashMul1 - movdqa xmm4, xmm2 - punpckhwd xmm4, xmm7 // src[4-7] - pmulld(0xe5) // pmulld xmm4, xmm5 - movdqa xmm5, kHashMul2 - punpckhbw xmm1, xmm7 // src[8-15] - movdqa xmm2, xmm1 - punpcklwd xmm2, xmm7 // src[8-11] - pmulld(0xd5) // pmulld xmm2, xmm5 - movdqa xmm5, kHashMul3 - punpckhwd xmm1, xmm7 // src[12-15] - pmulld(0xcd) // pmulld xmm1, xmm5 - paddd xmm3, xmm4 // add 16 results - paddd xmm1, xmm2 - sub ecx, 16 - paddd xmm1, xmm3 - - pshufd xmm2, xmm1, 0x0e // upper 2 dwords - paddd xmm1, xmm2 - pshufd xmm2, xmm1, 0x01 - paddd xmm1, xmm2 - paddd xmm0, xmm1 - jg wloop - - movd eax, xmm0 // return hash - ret - } -} - -// Visual C 2012 required for AVX2. -#if _MSC_VER >= 1700 -__declspec(naked) __declspec(align(16)) -uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) { - __asm { - mov eax, [esp + 4] // src - mov ecx, [esp + 8] // count - movd xmm0, [esp + 12] // seed - movdqa xmm6, kHash16x33 - - align 4 - wloop: - vpmovzxbd xmm3, dword ptr [eax] // src[0-3] - pmulld xmm0, xmm6 // hash *= 33 ^ 16 - vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7] - pmulld xmm3, kHashMul0 - vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11] - pmulld xmm4, kHashMul1 - vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15] - pmulld xmm2, kHashMul2 - lea eax, [eax + 16] - pmulld xmm1, kHashMul3 - paddd xmm3, xmm4 // add 16 results - paddd xmm1, xmm2 - sub ecx, 16 - paddd xmm1, xmm3 - pshufd xmm2, xmm1, 0x0e // upper 2 dwords - paddd xmm1, xmm2 - pshufd xmm2, xmm1, 0x01 - paddd xmm1, xmm2 - paddd xmm0, xmm1 - jg wloop - - movd eax, xmm0 // return hash - ret - } -} -#endif // _MSC_VER >= 1700 - -#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert.cc deleted file mode 100755 index c8408dc798..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/convert.cc +++ /dev/null @@ -1,1491 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/convert.h" - -#include "libyuv/basic_types.h" -#include "libyuv/cpu_id.h" -#include "libyuv/planar_functions.h" -#include "libyuv/rotate.h" -#include "libyuv/scale.h" // For ScalePlane() -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) -static __inline int Abs(int v) { - return v >= 0 ? v : -v; -} - -// Any I4xx To I420 format with mirroring. -static int I4xxToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_y_width, int src_y_height, - int src_uv_width, int src_uv_height) { - if (src_y_width == 0 || src_y_height == 0 || - src_uv_width == 0 || src_uv_height == 0) { - return -1; - } - const int dst_y_width = Abs(src_y_width); - const int dst_y_height = Abs(src_y_height); - const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); - const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, - dst_y, dst_stride_y, dst_y_width, dst_y_height, - kFilterBilinear); - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, - dst_u, dst_stride_u, dst_uv_width, dst_uv_height, - kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, - dst_v, dst_stride_v, dst_uv_width, dst_uv_height, - kFilterBilinear); - return 0; -} - -// Copy I420 with optional flipping -// TODO(fbarchard): Use Scale plane which supports mirroring, but ensure -// is does row coalescing. -LIBYUV_API -int I420Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_y || !src_u || !src_v || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - const int halfheight = (height + 1) >> 1; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (halfheight - 1) * src_stride_u; - src_v = src_v + (halfheight - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - - if (dst_y) { - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - } - // Copy UV planes. - const int halfwidth = (width + 1) >> 1; - const int halfheight = (height + 1) >> 1; - CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); - CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); - return 0; -} - -// 422 chroma is 1/2 width, 1x height -// 420 chroma is 1/2 width, 1/2 height -LIBYUV_API -int I422ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - const int src_uv_width = SUBSAMPLE(width, 1, 1); - return I4xxToI420(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - src_uv_width, height); -} - -// 444 chroma is 1x width, 1x height -// 420 chroma is 1/2 width, 1/2 height -LIBYUV_API -int I444ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - return I4xxToI420(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - width, height); -} - -// 411 chroma is 1/4 width, 1x height -// 420 chroma is 1/2 width, 1/2 height -LIBYUV_API -int I411ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - const int src_uv_width = SUBSAMPLE(width, 3, 2); - return I4xxToI420(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - src_uv_width, height); -} - -// I400 is greyscale typically used in MJPG -LIBYUV_API -int I400ToI420(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_y || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; - } - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128); - SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128); - return 0; -} - -static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1, - uint8* dst, int dst_stride, - int width, int height) { - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; -#if defined(HAS_COPYROW_X86) - if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { - CopyRow = CopyRow_X86; - } -#endif -#if defined(HAS_COPYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && - IS_ALIGNED(src, 16) && - IS_ALIGNED(src_stride_0, 16) && IS_ALIGNED(src_stride_1, 16) && - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - CopyRow = CopyRow_SSE2; - } -#endif -#if defined(HAS_COPYROW_ERMS) - if (TestCpuFlag(kCpuHasERMS)) { - CopyRow = CopyRow_ERMS; - } -#endif -#if defined(HAS_COPYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { - CopyRow = CopyRow_NEON; - } -#endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif - - // Copy plane - for (int y = 0; y < height - 1; y += 2) { - CopyRow(src, dst, width); - CopyRow(src + src_stride_0, dst + dst_stride, width); - src += src_stride_0 + src_stride_1; - dst += dst_stride * 2; - } - if (height & 1) { - CopyRow(src, dst, width); - } -} - -// Support converting from FOURCC_M420 -// Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for -// easy conversion to I420. -// M420 format description: -// M420 is row biplanar 420: 2 rows of Y and 1 row of UV. -// Chroma is half width / half height. (420) -// src_stride_m420 is row planar. Normally this will be the width in pixels. -// The UV plane is half width, but 2 values, so src_stride_m420 applies to -// this as well as the two Y planes. -static int X420ToI420(const uint8* src_y, - int src_stride_y0, int src_stride_y1, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_y || !src_uv || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - int halfheight = (height + 1) >> 1; - dst_y = dst_y + (height - 1) * dst_stride_y; - dst_u = dst_u + (halfheight - 1) * dst_stride_u; - dst_v = dst_v + (halfheight - 1) * dst_stride_v; - dst_stride_y = -dst_stride_y; - dst_stride_u = -dst_stride_u; - dst_stride_v = -dst_stride_v; - } - // Coalesce rows. - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - if (src_stride_y0 == width && - src_stride_y1 == width && - dst_stride_y == width) { - width *= height; - height = 1; - src_stride_y0 = src_stride_y1 = dst_stride_y = 0; - } - // Coalesce rows. - if (src_stride_uv == halfwidth * 2 && - dst_stride_u == halfwidth && - dst_stride_v == halfwidth) { - halfwidth *= halfheight; - halfheight = 1; - src_stride_uv = dst_stride_u = dst_stride_v = 0; - } - void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) = - SplitUVRow_C; -#if defined(HAS_SPLITUVROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { - SplitUVRow = SplitUVRow_Any_SSE2; - if (IS_ALIGNED(halfwidth, 16)) { - SplitUVRow = SplitUVRow_Unaligned_SSE2; - if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) && - IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) && - IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) { - SplitUVRow = SplitUVRow_SSE2; - } - } - } -#endif -#if defined(HAS_SPLITUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { - SplitUVRow = SplitUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { - SplitUVRow = SplitUVRow_AVX2; - } - } -#endif -#if defined(HAS_SPLITUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { - SplitUVRow = SplitUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - SplitUVRow = SplitUVRow_NEON; - } - } -#endif -#if defined(HAS_SPLITUVROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) { - SplitUVRow = SplitUVRow_Any_MIPS_DSPR2; - if (IS_ALIGNED(halfwidth, 16)) { - SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2; - if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) && - IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) && - IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) { - SplitUVRow = SplitUVRow_MIPS_DSPR2; - } - } - } -#endif - - if (dst_y) { - if (src_stride_y0 == src_stride_y1) { - CopyPlane(src_y, src_stride_y0, dst_y, dst_stride_y, width, height); - } else { - CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y, - width, height); - } - } - - for (int y = 0; y < halfheight; ++y) { - // Copy a row of UV. - SplitUVRow(src_uv, dst_u, dst_v, halfwidth); - dst_u += dst_stride_u; - dst_v += dst_stride_v; - src_uv += src_stride_uv; - } - return 0; -} - -// Convert NV12 to I420. -LIBYUV_API -int NV12ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - return X420ToI420(src_y, src_stride_y, src_stride_y, - src_uv, src_stride_uv, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height); -} - -// Convert NV21 to I420. Same as NV12 but u and v pointers swapped. -LIBYUV_API -int NV21ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - return X420ToI420(src_y, src_stride_y, src_stride_y, - src_vu, src_stride_vu, - dst_y, dst_stride_y, - dst_v, dst_stride_v, - dst_u, dst_stride_u, - width, height); -} - -// Convert M420 to I420. -LIBYUV_API -int M420ToI420(const uint8* src_m420, int src_stride_m420, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2, - src_m420 + src_stride_m420 * 2, src_stride_m420 * 3, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height); -} - -// Convert Q420 to I420. -// Format is rows of YY/YUYV -LIBYUV_API -int Q420ToI420(const uint8* src_y, int src_stride_y, - const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_y || !src_yuy2 || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - int halfheight = (height + 1) >> 1; - dst_y = dst_y + (height - 1) * dst_stride_y; - dst_u = dst_u + (halfheight - 1) * dst_stride_u; - dst_v = dst_v + (halfheight - 1) * dst_stride_v; - dst_stride_y = -dst_stride_y; - dst_stride_u = -dst_stride_u; - dst_stride_v = -dst_stride_v; - } - // CopyRow for rows of just Y in Q420 copied to Y plane of I420. - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; -#if defined(HAS_COPYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { - CopyRow = CopyRow_NEON; - } -#endif -#if defined(HAS_COPYROW_X86) - if (IS_ALIGNED(width, 4)) { - CopyRow = CopyRow_X86; - } -#endif -#if defined(HAS_COPYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - CopyRow = CopyRow_SSE2; - } -#endif -#if defined(HAS_COPYROW_ERMS) - if (TestCpuFlag(kCpuHasERMS)) { - CopyRow = CopyRow_ERMS; - } -#endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif - - void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, - int pix) = YUY2ToUV422Row_C; - void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) = - YUY2ToYRow_C; -#if defined(HAS_YUY2TOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; - YUY2ToYRow = YUY2ToYRow_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2; - YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; - if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) { - YUY2ToUV422Row = YUY2ToUV422Row_SSE2; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - YUY2ToYRow = YUY2ToYRow_SSE2; - } - } - } - } -#endif -#if defined(HAS_YUY2TOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2; - YUY2ToYRow = YUY2ToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - YUY2ToUV422Row = YUY2ToUV422Row_AVX2; - YUY2ToYRow = YUY2ToYRow_AVX2; - } - } -#endif -#if defined(HAS_YUY2TOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - YUY2ToYRow = YUY2ToYRow_Any_NEON; - if (width >= 16) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; - } - if (IS_ALIGNED(width, 16)) { - YUY2ToYRow = YUY2ToYRow_NEON; - YUY2ToUV422Row = YUY2ToUV422Row_NEON; - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - CopyRow(src_y, dst_y, width); - src_y += src_stride_y; - dst_y += dst_stride_y; - - YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); - YUY2ToYRow(src_yuy2, dst_y, width); - src_yuy2 += src_stride_yuy2; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - CopyRow(src_y, dst_y, width); - YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); - } - return 0; -} - -// Convert YUY2 to I420. -LIBYUV_API -int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; - src_stride_yuy2 = -src_stride_yuy2; - } - void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix); - void (*YUY2ToYRow)(const uint8* src_yuy2, - uint8* dst_y, int pix); - YUY2ToYRow = YUY2ToYRow_C; - YUY2ToUVRow = YUY2ToUVRow_C; -#if defined(HAS_YUY2TOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - YUY2ToUVRow = YUY2ToUVRow_Any_SSE2; - YUY2ToYRow = YUY2ToYRow_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2; - YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; - if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) { - YUY2ToUVRow = YUY2ToUVRow_SSE2; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - YUY2ToYRow = YUY2ToYRow_SSE2; - } - } - } - } -#endif -#if defined(HAS_YUY2TOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - YUY2ToUVRow = YUY2ToUVRow_Any_AVX2; - YUY2ToYRow = YUY2ToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - YUY2ToUVRow = YUY2ToUVRow_AVX2; - YUY2ToYRow = YUY2ToYRow_AVX2; - } - } -#endif -#if defined(HAS_YUY2TOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - YUY2ToYRow = YUY2ToYRow_Any_NEON; - if (width >= 16) { - YUY2ToUVRow = YUY2ToUVRow_Any_NEON; - } - if (IS_ALIGNED(width, 16)) { - YUY2ToYRow = YUY2ToYRow_NEON; - YUY2ToUVRow = YUY2ToUVRow_NEON; - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width); - YUY2ToYRow(src_yuy2, dst_y, width); - YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width); - src_yuy2 += src_stride_yuy2 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - YUY2ToUVRow(src_yuy2, 0, dst_u, dst_v, width); - YUY2ToYRow(src_yuy2, dst_y, width); - } - return 0; -} - -// Convert UYVY to I420. -LIBYUV_API -int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; - src_stride_uyvy = -src_stride_uyvy; - } - void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix); - void (*UYVYToYRow)(const uint8* src_uyvy, - uint8* dst_y, int pix); - UYVYToYRow = UYVYToYRow_C; - UYVYToUVRow = UYVYToUVRow_C; -#if defined(HAS_UYVYTOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - UYVYToUVRow = UYVYToUVRow_Any_SSE2; - UYVYToYRow = UYVYToYRow_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2; - UYVYToYRow = UYVYToYRow_Unaligned_SSE2; - if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) { - UYVYToUVRow = UYVYToUVRow_SSE2; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - UYVYToYRow = UYVYToYRow_SSE2; - } - } - } - } -#endif -#if defined(HAS_UYVYTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - UYVYToUVRow = UYVYToUVRow_Any_AVX2; - UYVYToYRow = UYVYToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - UYVYToUVRow = UYVYToUVRow_AVX2; - UYVYToYRow = UYVYToYRow_AVX2; - } - } -#endif -#if defined(HAS_UYVYTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - UYVYToYRow = UYVYToYRow_Any_NEON; - if (width >= 16) { - UYVYToUVRow = UYVYToUVRow_Any_NEON; - } - if (IS_ALIGNED(width, 16)) { - UYVYToYRow = UYVYToYRow_NEON; - UYVYToUVRow = UYVYToUVRow_NEON; - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width); - UYVYToYRow(src_uyvy, dst_y, width); - UYVYToYRow(src_uyvy + src_stride_uyvy, dst_y + dst_stride_y, width); - src_uyvy += src_stride_uyvy * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - UYVYToUVRow(src_uyvy, 0, dst_u, dst_v, width); - UYVYToYRow(src_uyvy, dst_y, width); - } - return 0; -} - -// Convert ARGB to I420. -LIBYUV_API -int ARGBToI420(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_argb || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3; - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - ARGBToUVRow = ARGBToUVRow_Any_AVX2; - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToUVRow = ARGBToUVRow_AVX2; - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - if (width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); - ARGBToYRow(src_argb, dst_y, width); - ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); - src_argb += src_stride_argb * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); - ARGBToYRow(src_argb, dst_y, width); - } - return 0; -} - -// Convert BGRA to I420. -LIBYUV_API -int BGRAToI420(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_bgra || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_bgra = src_bgra + (height - 1) * src_stride_bgra; - src_stride_bgra = -src_stride_bgra; - } - void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C; - void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) = - BGRAToYRow_C; -#if defined(HAS_BGRATOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - BGRAToUVRow = BGRAToUVRow_Any_SSSE3; - BGRAToYRow = BGRAToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3; - BGRAToYRow = BGRAToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16)) { - BGRAToUVRow = BGRAToUVRow_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - BGRAToYRow = BGRAToYRow_SSSE3; - } - } - } - } -#elif defined(HAS_BGRATOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - BGRAToYRow = BGRAToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - BGRAToYRow = BGRAToYRow_NEON; - } - if (width >= 16) { - BGRAToUVRow = BGRAToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - BGRAToUVRow = BGRAToUVRow_NEON; - } - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); - BGRAToYRow(src_bgra, dst_y, width); - BGRAToYRow(src_bgra + src_stride_bgra, dst_y + dst_stride_y, width); - src_bgra += src_stride_bgra * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width); - BGRAToYRow(src_bgra, dst_y, width); - } - return 0; -} - -// Convert ABGR to I420. -LIBYUV_API -int ABGRToI420(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_abgr || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_abgr = src_abgr + (height - 1) * src_stride_abgr; - src_stride_abgr = -src_stride_abgr; - } - void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C; - void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) = - ABGRToYRow_C; -#if defined(HAS_ABGRTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ABGRToUVRow = ABGRToUVRow_Any_SSSE3; - ABGRToYRow = ABGRToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3; - ABGRToYRow = ABGRToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_abgr, 16) && IS_ALIGNED(src_stride_abgr, 16)) { - ABGRToUVRow = ABGRToUVRow_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ABGRToYRow = ABGRToYRow_SSSE3; - } - } - } - } -#elif defined(HAS_ABGRTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ABGRToYRow = ABGRToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ABGRToYRow = ABGRToYRow_NEON; - } - if (width >= 16) { - ABGRToUVRow = ABGRToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ABGRToUVRow = ABGRToUVRow_NEON; - } - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); - ABGRToYRow(src_abgr, dst_y, width); - ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width); - src_abgr += src_stride_abgr * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width); - ABGRToYRow(src_abgr, dst_y, width); - } - return 0; -} - -// Convert RGBA to I420. -LIBYUV_API -int RGBAToI420(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_rgba || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgba = src_rgba + (height - 1) * src_stride_rgba; - src_stride_rgba = -src_stride_rgba; - } - void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C; - void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) = - RGBAToYRow_C; -#if defined(HAS_RGBATOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - RGBAToUVRow = RGBAToUVRow_Any_SSSE3; - RGBAToYRow = RGBAToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3; - RGBAToYRow = RGBAToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_rgba, 16) && IS_ALIGNED(src_stride_rgba, 16)) { - RGBAToUVRow = RGBAToUVRow_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - RGBAToYRow = RGBAToYRow_SSSE3; - } - } - } - } -#elif defined(HAS_RGBATOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RGBAToYRow = RGBAToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGBAToYRow = RGBAToYRow_NEON; - } - if (width >= 16) { - RGBAToUVRow = RGBAToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGBAToUVRow = RGBAToUVRow_NEON; - } - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); - RGBAToYRow(src_rgba, dst_y, width); - RGBAToYRow(src_rgba + src_stride_rgba, dst_y + dst_stride_y, width); - src_rgba += src_stride_rgba * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - RGBAToUVRow(src_rgba, 0, dst_u, dst_v, width); - RGBAToYRow(src_rgba, dst_y, width); - } - return 0; -} - -// Convert RGB24 to I420. -LIBYUV_API -int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_rgb24 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; - src_stride_rgb24 = -src_stride_rgb24; - } - -#if defined(HAS_RGB24TOYROW_NEON) - void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C; - void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) = - RGB24ToYRow_C; - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RGB24ToYRow = RGB24ToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB24ToYRow = RGB24ToYRow_NEON; - } - if (width >= 16) { - RGB24ToUVRow = RGB24ToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGB24ToUVRow = RGB24ToUVRow_NEON; - } - } - } -#else // HAS_RGB24TOYROW_NEON - - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - RGB24ToARGBRow_C; -#if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } - } -#endif - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#endif // HAS_ARGBTOUVROW_SSSE3 -#endif // HAS_RGB24TOYROW_NEON - - for (int y = 0; y < height - 1; y += 2) { -#if defined(HAS_RGB24TOYROW_NEON) - RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); - RGB24ToYRow(src_rgb24, dst_y, width); - RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); -#endif - src_rgb24 += src_stride_rgb24 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { -#if defined(HAS_RGB24TOYROW_NEON) - RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width); - RGB24ToYRow(src_rgb24, dst_y, width); -#else - RGB24ToARGBRow(src_rgb24, row, width); - ARGBToUVRow(row, 0, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); -#endif - } -#if !defined(HAS_RGB24TOYROW_NEON) - free_aligned_buffer_64(row); -#endif - return 0; -} - -// Convert RAW to I420. -LIBYUV_API -int RAWToI420(const uint8* src_raw, int src_stride_raw, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_raw || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_raw = src_raw + (height - 1) * src_stride_raw; - src_stride_raw = -src_stride_raw; - } - -#if defined(HAS_RAWTOYROW_NEON) - void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C; - void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) = - RAWToYRow_C; - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RAWToYRow = RAWToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RAWToYRow = RAWToYRow_NEON; - } - if (width >= 16) { - RAWToUVRow = RAWToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RAWToUVRow = RAWToUVRow_NEON; - } - } - } -#else // HAS_RAWTOYROW_NEON - - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - RAWToARGBRow_C; -#if defined(HAS_RAWTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - RAWToARGBRow = RAWToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_SSSE3; - } - } -#endif - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#endif // HAS_ARGBTOUVROW_SSSE3 -#endif // HAS_RAWTOYROW_NEON - - for (int y = 0; y < height - 1; y += 2) { -#if defined(HAS_RAWTOYROW_NEON) - RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); - RAWToYRow(src_raw, dst_y, width); - RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); -#else - RAWToARGBRow(src_raw, row, width); - RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); -#endif - src_raw += src_stride_raw * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { -#if defined(HAS_RAWTOYROW_NEON) - RAWToUVRow(src_raw, 0, dst_u, dst_v, width); - RAWToYRow(src_raw, dst_y, width); -#else - RAWToARGBRow(src_raw, row, width); - ARGBToUVRow(row, 0, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); -#endif - } -#if !defined(HAS_RAWTOYROW_NEON) - free_aligned_buffer_64(row); -#endif - return 0; -} - -// Convert RGB565 to I420. -LIBYUV_API -int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_rgb565 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565; - src_stride_rgb565 = -src_stride_rgb565; - } - -#if defined(HAS_RGB565TOYROW_NEON) - void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C; - void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) = - RGB565ToYRow_C; - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RGB565ToYRow = RGB565ToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB565ToYRow = RGB565ToYRow_NEON; - } - if (width >= 16) { - RGB565ToUVRow = RGB565ToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - RGB565ToUVRow = RGB565ToUVRow_NEON; - } - } - } -#else // HAS_RGB565TOYROW_NEON - - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - RGB565ToARGBRow_C; -#if defined(HAS_RGB565TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { - RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - RGB565ToARGBRow = RGB565ToARGBRow_SSE2; - } - } -#endif - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#endif // HAS_ARGBTOUVROW_SSSE3 -#endif // HAS_RGB565TOYROW_NEON - - for (int y = 0; y < height - 1; y += 2) { -#if defined(HAS_RGB565TOYROW_NEON) - RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); - RGB565ToYRow(src_rgb565, dst_y, width); - RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); -#else - RGB565ToARGBRow(src_rgb565, row, width); - RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); -#endif - src_rgb565 += src_stride_rgb565 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { -#if defined(HAS_RGB565TOYROW_NEON) - RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); - RGB565ToYRow(src_rgb565, dst_y, width); -#else - RGB565ToARGBRow(src_rgb565, row, width); - ARGBToUVRow(row, 0, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); -#endif - } -#if !defined(HAS_RGB565TOYROW_NEON) - free_aligned_buffer_64(row); -#endif - return 0; -} - -// Convert ARGB1555 to I420. -LIBYUV_API -int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_argb1555 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555; - src_stride_argb1555 = -src_stride_argb1555; - } - -#if defined(HAS_ARGB1555TOYROW_NEON) - void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C; - void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) = - ARGB1555ToYRow_C; - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGB1555ToYRow = ARGB1555ToYRow_NEON; - } - if (width >= 16) { - ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGB1555ToUVRow = ARGB1555ToUVRow_NEON; - } - } - } -#else // HAS_ARGB1555TOYROW_NEON - - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - ARGB1555ToARGBRow_C; -#if defined(HAS_ARGB1555TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; - } - } -#endif - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#endif // HAS_ARGBTOUVROW_SSSE3 -#endif // HAS_ARGB1555TOYROW_NEON - - for (int y = 0; y < height - 1; y += 2) { -#if defined(HAS_ARGB1555TOYROW_NEON) - ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); - ARGB1555ToYRow(src_argb1555, dst_y, width); - ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, - width); -#else - ARGB1555ToARGBRow(src_argb1555, row, width); - ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize, - width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); -#endif - src_argb1555 += src_stride_argb1555 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { -#if defined(HAS_ARGB1555TOYROW_NEON) - ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); - ARGB1555ToYRow(src_argb1555, dst_y, width); -#else - ARGB1555ToARGBRow(src_argb1555, row, width); - ARGBToUVRow(row, 0, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); -#endif - } -#if !defined(HAS_ARGB1555TOYROW_NEON) - free_aligned_buffer_64(row); -#endif - return 0; -} - -// Convert ARGB4444 to I420. -LIBYUV_API -int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_argb4444 || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444; - src_stride_argb4444 = -src_stride_argb4444; - } - -#if defined(HAS_ARGB4444TOYROW_NEON) - void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C; - void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) = - ARGB4444ToYRow_C; - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGB4444ToYRow = ARGB4444ToYRow_NEON; - } - if (width >= 16) { - ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGB4444ToUVRow = ARGB4444ToUVRow_NEON; - } - } - } -#else // HAS_ARGB4444TOYROW_NEON - - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - ARGB4444ToARGBRow_C; -#if defined(HAS_ARGB4444TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; - } - } -#endif - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#endif // HAS_ARGBTOUVROW_SSSE3 -#endif // HAS_ARGB4444TOYROW_NEON - - for (int y = 0; y < height - 1; y += 2) { -#if defined(HAS_ARGB4444TOYROW_NEON) - ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width); - ARGB4444ToYRow(src_argb4444, dst_y, width); - ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, - width); -#else - ARGB4444ToARGBRow(src_argb4444, row, width); - ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize, - width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); -#endif - src_argb4444 += src_stride_argb4444 * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { -#if defined(HAS_ARGB4444TOYROW_NEON) - ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width); - ARGB4444ToYRow(src_argb4444, dst_y, width); -#else - ARGB4444ToARGBRow(src_argb4444, row, width); - ARGBToUVRow(row, 0, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); -#endif - } -#if !defined(HAS_ARGB4444TOYROW_NEON) - free_aligned_buffer_64(row); -#endif - return 0; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_argb.cc deleted file mode 100755 index a8aab91478..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_argb.cc +++ /dev/null @@ -1,901 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/convert_argb.h" - -#include "libyuv/cpu_id.h" -#include "libyuv/format_conversion.h" -#ifdef HAVE_JPEG -#include "libyuv/mjpeg_decoder.h" -#endif -#include "libyuv/rotate_argb.h" -#include "libyuv/row.h" -#include "libyuv/video_common.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Copy ARGB with optional flipping -LIBYUV_API -int ARGBCopy(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_argb || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - - CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb, - width * 4, height); - return 0; -} - -// Convert I444 to ARGB. -LIBYUV_API -int I444ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_y || !src_u || !src_v || - !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u == width && - src_stride_v == width && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; - } - void (*I444ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I444ToARGBRow_C; -#if defined(HAS_I444TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I444ToARGBRow = I444ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - I444ToARGBRow = I444ToARGBRow_SSSE3; - } - } - } -#elif defined(HAS_I444TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I444ToARGBRow = I444ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I444ToARGBRow = I444ToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I444ToARGBRow(src_y, src_u, src_v, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - } - return 0; -} - -// Convert I422 to ARGB. -LIBYUV_API -int I422ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_y || !src_u || !src_v || - !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; - } - void (*I422ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToARGBRow_C; -#if defined(HAS_I422TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToARGBRow = I422ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - I422ToARGBRow = I422ToARGBRow_SSSE3; - } - } - } -#endif -#if defined(HAS_I422TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { - I422ToARGBRow = I422ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I422ToARGBRow = I422ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I422TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToARGBRow = I422ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_I422TOARGBROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToARGBRow(src_y, src_u, src_v, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - } - return 0; -} - -// Convert I411 to ARGB. -LIBYUV_API -int I411ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_y || !src_u || !src_v || - !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 4 == width && - src_stride_v * 4 == width && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; - } - void (*I411ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I411ToARGBRow_C; -#if defined(HAS_I411TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I411ToARGBRow = I411ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - I411ToARGBRow = I411ToARGBRow_SSSE3; - } - } - } -#elif defined(HAS_I411TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I411ToARGBRow = I411ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I411ToARGBRow = I411ToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I411ToARGBRow(src_y, src_u, src_v, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - } - return 0; -} - -// Convert I400 to ARGB. -LIBYUV_API -int I400ToARGB_Reference(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_y || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_y == width && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_y = dst_stride_argb = 0; - } - void (*YToARGBRow)(const uint8* y_buf, - uint8* rgb_buf, - int width) = YToARGBRow_C; -#if defined(HAS_YTOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - YToARGBRow = YToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - YToARGBRow = YToARGBRow_SSE2; - } - } -#elif defined(HAS_YTOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - YToARGBRow = YToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - YToARGBRow = YToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - YToARGBRow(src_y, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - } - return 0; -} - -// Convert I400 to ARGB. -LIBYUV_API -int I400ToARGB(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_y || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; - } - // Coalesce rows. - if (src_stride_y == width && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_y = dst_stride_argb = 0; - } - void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) = - I400ToARGBRow_C; -#if defined(HAS_I400TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8) { - I400ToARGBRow = I400ToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - I400ToARGBRow = I400ToARGBRow_Unaligned_SSE2; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - I400ToARGBRow = I400ToARGBRow_SSE2; - } - } - } -#elif defined(HAS_I400TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I400ToARGBRow = I400ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I400ToARGBRow = I400ToARGBRow_NEON; - } - } -#endif - for (int y = 0; y < height; ++y) { - I400ToARGBRow(src_y, dst_argb, width); - src_y += src_stride_y; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Shuffle table for converting BGRA to ARGB. -static uvec8 kShuffleMaskBGRAToARGB = { - 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u -}; - -// Shuffle table for converting ABGR to ARGB. -static uvec8 kShuffleMaskABGRToARGB = { - 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u -}; - -// Shuffle table for converting RGBA to ARGB. -static uvec8 kShuffleMaskRGBAToARGB = { - 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u -}; - -// Convert BGRA to ARGB. -LIBYUV_API -int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return ARGBShuffle(src_bgra, src_stride_bgra, - dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskBGRAToARGB), - width, height); -} - -// Convert ABGR to ARGB. -LIBYUV_API -int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return ARGBShuffle(src_abgr, src_stride_abgr, - dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskABGRToARGB), - width, height); -} - -// Convert RGBA to ARGB. -LIBYUV_API -int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - return ARGBShuffle(src_rgba, src_stride_rgba, - dst_argb, dst_stride_argb, - (const uint8*)(&kShuffleMaskRGBAToARGB), - width, height); -} - -// Convert RGB24 to ARGB. -LIBYUV_API -int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_rgb24 || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; - src_stride_rgb24 = -src_stride_rgb24; - } - // Coalesce rows. - if (src_stride_rgb24 == width * 3 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_rgb24 = dst_stride_argb = 0; - } - void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - RGB24ToARGBRow_C; -#if defined(HAS_RGB24TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; - } - } -#elif defined(HAS_RGB24TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB24ToARGBRow = RGB24ToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - RGB24ToARGBRow(src_rgb24, dst_argb, width); - src_rgb24 += src_stride_rgb24; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Convert RAW to ARGB. -LIBYUV_API -int RAWToARGB(const uint8* src_raw, int src_stride_raw, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_raw || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_raw = src_raw + (height - 1) * src_stride_raw; - src_stride_raw = -src_stride_raw; - } - // Coalesce rows. - if (src_stride_raw == width * 3 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_raw = dst_stride_argb = 0; - } - void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) = - RAWToARGBRow_C; -#if defined(HAS_RAWTOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - RAWToARGBRow = RAWToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - RAWToARGBRow = RAWToARGBRow_SSSE3; - } - } -#elif defined(HAS_RAWTOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RAWToARGBRow = RAWToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RAWToARGBRow = RAWToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - RAWToARGBRow(src_raw, dst_argb, width); - src_raw += src_stride_raw; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Convert RGB565 to ARGB. -LIBYUV_API -int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_rgb565 || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565; - src_stride_rgb565 = -src_stride_rgb565; - } - // Coalesce rows. - if (src_stride_rgb565 == width * 2 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_rgb565 = dst_stride_argb = 0; - } - void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) = - RGB565ToARGBRow_C; -#if defined(HAS_RGB565TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - RGB565ToARGBRow = RGB565ToARGBRow_SSE2; - } - } -#elif defined(HAS_RGB565TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - RGB565ToARGBRow = RGB565ToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - RGB565ToARGBRow(src_rgb565, dst_argb, width); - src_rgb565 += src_stride_rgb565; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Convert ARGB1555 to ARGB. -LIBYUV_API -int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_argb1555 || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555; - src_stride_argb1555 = -src_stride_argb1555; - } - // Coalesce rows. - if (src_stride_argb1555 == width * 2 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb1555 = dst_stride_argb = 0; - } - void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb, - int pix) = ARGB1555ToARGBRow_C; -#if defined(HAS_ARGB1555TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; - } - } -#elif defined(HAS_ARGB1555TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGB1555ToARGBRow(src_argb1555, dst_argb, width); - src_argb1555 += src_stride_argb1555; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Convert ARGB4444 to ARGB. -LIBYUV_API -int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_argb4444 || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444; - src_stride_argb4444 = -src_stride_argb4444; - } - // Coalesce rows. - if (src_stride_argb4444 == width * 2 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb4444 = dst_stride_argb = 0; - } - void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb, - int pix) = ARGB4444ToARGBRow_C; -#if defined(HAS_ARGB4444TOARGBROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; - } - } -#elif defined(HAS_ARGB4444TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGB4444ToARGBRow(src_argb4444, dst_argb, width); - src_argb4444 += src_stride_argb4444; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Convert NV12 to ARGB. -LIBYUV_API -int NV12ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_y || !src_uv || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*NV12ToARGBRow)(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, - int width) = NV12ToARGBRow_C; -#if defined(HAS_NV12TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - NV12ToARGBRow = NV12ToARGBRow_SSSE3; - } - } - } -#elif defined(HAS_NV12TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - NV12ToARGBRow = NV12ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - NV12ToARGBRow = NV12ToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - NV12ToARGBRow(src_y, src_uv, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - if (y & 1) { - src_uv += src_stride_uv; - } - } - return 0; -} - -// Convert NV21 to ARGB. -LIBYUV_API -int NV21ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_y || !src_uv || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*NV21ToARGBRow)(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, - int width) = NV21ToARGBRow_C; -#if defined(HAS_NV21TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - NV21ToARGBRow = NV21ToARGBRow_SSSE3; - } - } - } -#endif -#if defined(HAS_NV21TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - NV21ToARGBRow = NV21ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - NV21ToARGBRow = NV21ToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - NV21ToARGBRow(src_y, src_uv, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - if (y & 1) { - src_uv += src_stride_uv; - } - } - return 0; -} - -// Convert M420 to ARGB. -LIBYUV_API -int M420ToARGB(const uint8* src_m420, int src_stride_m420, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_m420 || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*NV12ToARGBRow)(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, - int width) = NV12ToARGBRow_C; -#if defined(HAS_NV12TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - NV12ToARGBRow = NV12ToARGBRow_SSSE3; - } - } - } -#elif defined(HAS_NV12TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - NV12ToARGBRow = NV12ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - NV12ToARGBRow = NV12ToARGBRow_NEON; - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width); - NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2, - dst_argb + dst_stride_argb, width); - dst_argb += dst_stride_argb * 2; - src_m420 += src_stride_m420 * 3; - } - if (height & 1) { - NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width); - } - return 0; -} - -// Convert YUY2 to ARGB. -LIBYUV_API -int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_yuy2 || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; - src_stride_yuy2 = -src_stride_yuy2; - } - // Coalesce rows. - if (src_stride_yuy2 == width * 2 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_yuy2 = dst_stride_argb = 0; - } - void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) = - YUY2ToARGBRow_C; -#if defined(HAS_YUY2TOARGBROW_SSSE3) - // Posix is 16, Windows is 8. - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - YUY2ToARGBRow = YUY2ToARGBRow_SSSE3; - } - } - } -#elif defined(HAS_YUY2TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - YUY2ToARGBRow = YUY2ToARGBRow_NEON; - } - } -#endif - for (int y = 0; y < height; ++y) { - YUY2ToARGBRow(src_yuy2, dst_argb, width); - src_yuy2 += src_stride_yuy2; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Convert UYVY to ARGB. -LIBYUV_API -int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_uyvy || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; - src_stride_uyvy = -src_stride_uyvy; - } - // Coalesce rows. - if (src_stride_uyvy == width * 2 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_uyvy = dst_stride_argb = 0; - } - void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) = - UYVYToARGBRow_C; -#if defined(HAS_UYVYTOARGBROW_SSSE3) - // Posix is 16, Windows is 8. - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - UYVYToARGBRow = UYVYToARGBRow_SSSE3; - } - } - } -#elif defined(HAS_UYVYTOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - UYVYToARGBRow = UYVYToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - UYVYToARGBRow = UYVYToARGBRow_NEON; - } - } -#endif - for (int y = 0; y < height; ++y) { - UYVYToARGBRow(src_uyvy, dst_argb, width); - src_uyvy += src_stride_uyvy; - dst_argb += dst_stride_argb; - } - return 0; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_from.cc deleted file mode 100755 index 1e10832856..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from.cc +++ /dev/null @@ -1,1196 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/convert_from.h" - -#include "libyuv/basic_types.h" -#include "libyuv/convert.h" // For I420Copy -#include "libyuv/cpu_id.h" -#include "libyuv/format_conversion.h" -#include "libyuv/planar_functions.h" -#include "libyuv/rotate.h" -#include "libyuv/scale.h" // For ScalePlane() -#include "libyuv/video_common.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) -static __inline int Abs(int v) { - return v >= 0 ? v : -v; -} - -// I420 To any I4xx YUV format with mirroring. -static int I420ToI4xx(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int src_y_width, int src_y_height, - int dst_uv_width, int dst_uv_height) { - if (src_y_width == 0 || src_y_height == 0 || - dst_uv_width <= 0 || dst_uv_height <= 0) { - return -1; - } - const int dst_y_width = Abs(src_y_width); - const int dst_y_height = Abs(src_y_height); - const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1); - const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1); - ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, - dst_y, dst_stride_y, dst_y_width, dst_y_height, - kFilterBilinear); - ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, - dst_u, dst_stride_u, dst_uv_width, dst_uv_height, - kFilterBilinear); - ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, - dst_v, dst_stride_v, dst_uv_width, dst_uv_height, - kFilterBilinear); - return 0; -} - -// 420 chroma is 1/2 width, 1/2 height -// 422 chroma is 1/2 width, 1x height -LIBYUV_API -int I420ToI422(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - const int dst_uv_width = (Abs(width) + 1) >> 1; - const int dst_uv_height = Abs(height); - return I420ToI4xx(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - dst_uv_width, dst_uv_height); -} - -// 420 chroma is 1/2 width, 1/2 height -// 444 chroma is 1x width, 1x height -LIBYUV_API -int I420ToI444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - const int dst_uv_width = Abs(width); - const int dst_uv_height = Abs(height); - return I420ToI4xx(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - dst_uv_width, dst_uv_height); -} - -// 420 chroma is 1/2 width, 1/2 height -// 411 chroma is 1/4 width, 1x height -LIBYUV_API -int I420ToI411(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - const int dst_uv_width = (Abs(width) + 3) >> 2; - const int dst_uv_height = Abs(height); - return I420ToI4xx(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height, - dst_uv_width, dst_uv_height); -} - -// Copy to I400. Source can be I420,422,444,400,NV12,NV21 -LIBYUV_API -int I400Copy(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { - if (!src_y || !dst_y || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; - } - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - return 0; -} - -LIBYUV_API -int I422ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_yuy2 || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; - dst_stride_yuy2 = -dst_stride_yuy2; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_yuy2 == width * 2) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0; - } - void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_yuy2, int width) = - I422ToYUY2Row_C; -#if defined(HAS_I422TOYUY2ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - I422ToYUY2Row = I422ToYUY2Row_SSE2; - } - } -#elif defined(HAS_I422TOYUY2ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 16) { - I422ToYUY2Row = I422ToYUY2Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToYUY2Row = I422ToYUY2Row_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - dst_yuy2 += dst_stride_yuy2; - } - return 0; -} - -LIBYUV_API -int I420ToYUY2(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_yuy2 || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; - dst_stride_yuy2 = -dst_stride_yuy2; - } - void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_yuy2, int width) = - I422ToYUY2Row_C; -#if defined(HAS_I422TOYUY2ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - I422ToYUY2Row = I422ToYUY2Row_SSE2; - } - } -#elif defined(HAS_I422TOYUY2ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 16) { - I422ToYUY2Row = I422ToYUY2Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToYUY2Row = I422ToYUY2Row_NEON; - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); - I422ToYUY2Row(src_y + src_stride_y, src_u, src_v, - dst_yuy2 + dst_stride_yuy2, width); - src_y += src_stride_y * 2; - src_u += src_stride_u; - src_v += src_stride_v; - dst_yuy2 += dst_stride_yuy2 * 2; - } - if (height & 1) { - I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); - } - return 0; -} - -LIBYUV_API -int I422ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_uyvy || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; - dst_stride_uyvy = -dst_stride_uyvy; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_uyvy == width * 2) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0; - } - void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_uyvy, int width) = - I422ToUYVYRow_C; -#if defined(HAS_I422TOUYVYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - I422ToUYVYRow = I422ToUYVYRow_SSE2; - } - } -#elif defined(HAS_I422TOUYVYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 16) { - I422ToUYVYRow = I422ToUYVYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToUYVYRow = I422ToUYVYRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - dst_uyvy += dst_stride_uyvy; - } - return 0; -} - -LIBYUV_API -int I420ToUYVY(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_uyvy || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; - dst_stride_uyvy = -dst_stride_uyvy; - } - void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_uyvy, int width) = - I422ToUYVYRow_C; -#if defined(HAS_I422TOUYVYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - I422ToUYVYRow = I422ToUYVYRow_SSE2; - } - } -#elif defined(HAS_I422TOUYVYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 16) { - I422ToUYVYRow = I422ToUYVYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToUYVYRow = I422ToUYVYRow_NEON; - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); - I422ToUYVYRow(src_y + src_stride_y, src_u, src_v, - dst_uyvy + dst_stride_uyvy, width); - src_y += src_stride_y * 2; - src_u += src_stride_u; - src_v += src_stride_v; - dst_uyvy += dst_stride_uyvy * 2; - } - if (height & 1) { - I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); - } - return 0; -} - -LIBYUV_API -int I420ToNV12(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - int halfheight = (height + 1) >> 1; - dst_y = dst_y + (height - 1) * dst_stride_y; - dst_uv = dst_uv + (halfheight - 1) * dst_stride_uv; - dst_stride_y = -dst_stride_y; - dst_stride_uv = -dst_stride_uv; - } - // Coalesce rows. - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - if (src_stride_y == width && - dst_stride_y == width) { - width *= height; - height = 1; - src_stride_y = dst_stride_y = 0; - } - // Coalesce rows. - if (src_stride_u == halfwidth && - src_stride_v == halfwidth && - dst_stride_uv == halfwidth * 2) { - halfwidth *= halfheight; - halfheight = 1; - src_stride_u = src_stride_v = dst_stride_uv = 0; - } - void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUVRow_C; -#if defined(HAS_MERGEUVROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { - MergeUVRow_ = MergeUVRow_Any_SSE2; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_Unaligned_SSE2; - if (IS_ALIGNED(src_u, 16) && IS_ALIGNED(src_stride_u, 16) && - IS_ALIGNED(src_v, 16) && IS_ALIGNED(src_stride_v, 16) && - IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { - MergeUVRow_ = MergeUVRow_SSE2; - } - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { - MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { - MergeUVRow_ = MergeUVRow_AVX2; - } - } -#endif -#if defined(HAS_MERGEUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { - MergeUVRow_ = MergeUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_NEON; - } - } -#endif - - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - for (int y = 0; y < halfheight; ++y) { - // Merge a row of U and V into a row of UV. - MergeUVRow_(src_u, src_v, dst_uv, halfwidth); - src_u += src_stride_u; - src_v += src_stride_v; - dst_uv += dst_stride_uv; - } - return 0; -} - -LIBYUV_API -int I420ToNV21(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_vu, int dst_stride_vu, - int width, int height) { - return I420ToNV12(src_y, src_stride_y, - src_v, src_stride_v, - src_u, src_stride_u, - dst_y, src_stride_y, - dst_vu, dst_stride_vu, - width, height); -} - -// Convert I420 to ARGB. -LIBYUV_API -int I420ToARGB(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - void (*I422ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToARGBRow_C; -#if defined(HAS_I422TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToARGBRow = I422ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - I422ToARGBRow = I422ToARGBRow_SSSE3; - } - } - } -#endif -#if defined(HAS_I422TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { - I422ToARGBRow = I422ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I422ToARGBRow = I422ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I422TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToARGBRow = I422ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_I422TOARGBROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToARGBRow(src_y, src_u, src_v, dst_argb, width); - dst_argb += dst_stride_argb; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to BGRA. -LIBYUV_API -int I420ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_bgra || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; - dst_stride_bgra = -dst_stride_bgra; - } - void (*I422ToBGRARow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToBGRARow_C; -#if defined(HAS_I422TOBGRAROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToBGRARow = I422ToBGRARow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) { - I422ToBGRARow = I422ToBGRARow_SSSE3; - } - } - } -#elif defined(HAS_I422TOBGRAROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToBGRARow = I422ToBGRARow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToBGRARow = I422ToBGRARow_NEON; - } - } -#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) { - I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2; - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width); - dst_bgra += dst_stride_bgra; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to ABGR. -LIBYUV_API -int I420ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_abgr || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; - dst_stride_abgr = -dst_stride_abgr; - } - void (*I422ToABGRRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToABGRRow_C; -#if defined(HAS_I422TOABGRROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToABGRRow = I422ToABGRRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) { - I422ToABGRRow = I422ToABGRRow_SSSE3; - } - } - } -#elif defined(HAS_I422TOABGRROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToABGRRow = I422ToABGRRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToABGRRow = I422ToABGRRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); - dst_abgr += dst_stride_abgr; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to RGBA. -LIBYUV_API -int I420ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_rgba || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; - dst_stride_rgba = -dst_stride_rgba; - } - void (*I422ToRGBARow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToRGBARow_C; -#if defined(HAS_I422TORGBAROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToRGBARow = I422ToRGBARow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) { - I422ToRGBARow = I422ToRGBARow_SSSE3; - } - } - } -#elif defined(HAS_I422TORGBAROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToRGBARow = I422ToRGBARow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToRGBARow = I422ToRGBARow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width); - dst_rgba += dst_stride_rgba; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to RGB24. -LIBYUV_API -int I420ToRGB24(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_rgb24 || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; - dst_stride_rgb24 = -dst_stride_rgb24; - } - void (*I422ToRGB24Row)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToRGB24Row_C; -#if defined(HAS_I422TORGB24ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToRGB24Row = I422ToRGB24Row_SSSE3; - } - } -#elif defined(HAS_I422TORGB24ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToRGB24Row = I422ToRGB24Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToRGB24Row = I422ToRGB24Row_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width); - dst_rgb24 += dst_stride_rgb24; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to RAW. -LIBYUV_API -int I420ToRAW(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_raw, int dst_stride_raw, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_raw || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_raw = dst_raw + (height - 1) * dst_stride_raw; - dst_stride_raw = -dst_stride_raw; - } - void (*I422ToRAWRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToRAWRow_C; -#if defined(HAS_I422TORAWROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToRAWRow = I422ToRAWRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToRAWRow = I422ToRAWRow_SSSE3; - } - } -#elif defined(HAS_I422TORAWROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToRAWRow = I422ToRAWRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToRAWRow = I422ToRAWRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToRAWRow(src_y, src_u, src_v, dst_raw, width); - dst_raw += dst_stride_raw; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to ARGB1555. -LIBYUV_API -int I420ToARGB1555(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb1555, int dst_stride_argb1555, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_argb1555 || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb1555 = dst_argb1555 + (height - 1) * dst_stride_argb1555; - dst_stride_argb1555 = -dst_stride_argb1555; - } - void (*I422ToARGB1555Row)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToARGB1555Row_C; -#if defined(HAS_I422TOARGB1555ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToARGB1555Row = I422ToARGB1555Row_SSSE3; - } - } -#elif defined(HAS_I422TOARGB1555ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToARGB1555Row = I422ToARGB1555Row_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width); - dst_argb1555 += dst_stride_argb1555; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - - -// Convert I420 to ARGB4444. -LIBYUV_API -int I420ToARGB4444(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_argb4444, int dst_stride_argb4444, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_argb4444 || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb4444 = dst_argb4444 + (height - 1) * dst_stride_argb4444; - dst_stride_argb4444 = -dst_stride_argb4444; - } - void (*I422ToARGB4444Row)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToARGB4444Row_C; -#if defined(HAS_I422TOARGB4444ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToARGB4444Row = I422ToARGB4444Row_SSSE3; - } - } -#elif defined(HAS_I422TOARGB4444ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToARGB4444Row = I422ToARGB4444Row_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width); - dst_argb4444 += dst_stride_argb4444; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to RGB565. -LIBYUV_API -int I420ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height) { - if (!src_y || !src_u || !src_v || !dst_rgb565 || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; - dst_stride_rgb565 = -dst_stride_rgb565; - } - void (*I422ToRGB565Row)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToRGB565Row_C; -#if defined(HAS_I422TORGB565ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToRGB565Row = I422ToRGB565Row_SSSE3; - } - } -#elif defined(HAS_I422TORGB565ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToRGB565Row = I422ToRGB565Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToRGB565Row = I422ToRGB565Row_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width); - dst_rgb565 += dst_stride_rgb565; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - return 0; -} - -// Convert I420 to specified format -LIBYUV_API -int ConvertFromI420(const uint8* y, int y_stride, - const uint8* u, int u_stride, - const uint8* v, int v_stride, - uint8* dst_sample, int dst_sample_stride, - int width, int height, - uint32 fourcc) { - uint32 format = CanonicalFourCC(fourcc); - if (!y || !u|| !v || !dst_sample || - width <= 0 || height == 0) { - return -1; - } - int r = 0; - switch (format) { - // Single plane formats - case FOURCC_YUY2: - r = I420ToYUY2(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); - break; - case FOURCC_UYVY: - r = I420ToUYVY(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); - break; - case FOURCC_RGBP: - r = I420ToRGB565(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); - break; - case FOURCC_RGBO: - r = I420ToARGB1555(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); - break; - case FOURCC_R444: - r = I420ToARGB4444(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 2, - width, height); - break; - case FOURCC_24BG: - r = I420ToRGB24(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 3, - width, height); - break; - case FOURCC_RAW: - r = I420ToRAW(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 3, - width, height); - break; - case FOURCC_ARGB: - r = I420ToARGB(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); - break; - case FOURCC_BGRA: - r = I420ToBGRA(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); - break; - case FOURCC_ABGR: - r = I420ToABGR(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); - break; - case FOURCC_RGBA: - r = I420ToRGBA(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width * 4, - width, height); - break; - case FOURCC_BGGR: - r = I420ToBayerBGGR(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - case FOURCC_GBRG: - r = I420ToBayerGBRG(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - case FOURCC_GRBG: - r = I420ToBayerGRBG(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - case FOURCC_RGGB: - r = I420ToBayerRGGB(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - case FOURCC_I400: - r = I400Copy(y, y_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - case FOURCC_NV12: { - uint8* dst_uv = dst_sample + width * height; - r = I420ToNV12(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - dst_uv, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - } - case FOURCC_NV21: { - uint8* dst_vu = dst_sample + width * height; - r = I420ToNV21(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, - dst_sample_stride ? dst_sample_stride : width, - dst_vu, - dst_sample_stride ? dst_sample_stride : width, - width, height); - break; - } - // TODO(fbarchard): Add M420 and Q420. - // Triplanar formats - // TODO(fbarchard): halfstride instead of halfwidth - case FOURCC_I420: - case FOURCC_YU12: - case FOURCC_YV12: { - int halfwidth = (width + 1) / 2; - int halfheight = (height + 1) / 2; - uint8* dst_u; - uint8* dst_v; - if (format == FOURCC_YV12) { - dst_v = dst_sample + width * height; - dst_u = dst_v + halfwidth * halfheight; - } else { - dst_u = dst_sample + width * height; - dst_v = dst_u + halfwidth * halfheight; - } - r = I420Copy(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, halfwidth, - dst_v, halfwidth, - width, height); - break; - } - case FOURCC_I422: - case FOURCC_YV16: { - int halfwidth = (width + 1) / 2; - uint8* dst_u; - uint8* dst_v; - if (format == FOURCC_YV16) { - dst_v = dst_sample + width * height; - dst_u = dst_v + halfwidth * height; - } else { - dst_u = dst_sample + width * height; - dst_v = dst_u + halfwidth * height; - } - r = I420ToI422(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, halfwidth, - dst_v, halfwidth, - width, height); - break; - } - case FOURCC_I444: - case FOURCC_YV24: { - uint8* dst_u; - uint8* dst_v; - if (format == FOURCC_YV24) { - dst_v = dst_sample + width * height; - dst_u = dst_v + width * height; - } else { - dst_u = dst_sample + width * height; - dst_v = dst_u + width * height; - } - r = I420ToI444(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, width, - dst_v, width, - width, height); - break; - } - case FOURCC_I411: { - int quarterwidth = (width + 3) / 4; - uint8* dst_u = dst_sample + width * height; - uint8* dst_v = dst_u + quarterwidth * height; - r = I420ToI411(y, y_stride, - u, u_stride, - v, v_stride, - dst_sample, width, - dst_u, quarterwidth, - dst_v, quarterwidth, - width, height); - break; - } - - // Formats not supported - MJPG, biplanar, some rgb formats. - default: - return -1; // unknown fourcc - return failure code. - } - return r; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_from_argb.cc deleted file mode 100755 index 41421fb30b..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_from_argb.cc +++ /dev/null @@ -1,1096 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/convert_from_argb.h" - -#include "libyuv/basic_types.h" -#include "libyuv/cpu_id.h" -#include "libyuv/format_conversion.h" -#include "libyuv/planar_functions.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// ARGB little endian (bgra in memory) to I444 -LIBYUV_API -int ARGBToI444(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_y == width && - dst_stride_u == width && - dst_stride_v == width) { - width *= height; - height = 1; - src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; - } - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; - void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix) = ARGBToUV444Row_C; -#if defined(HAS_ARGBTOUV444ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV444Row = ARGBToUV444Row_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToUV444Row = ARGBToUV444Row_SSSE3; - } - } - } -#endif -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } - -#elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - ARGBToUV444Row = ARGBToUV444Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - ARGBToUV444Row = ARGBToUV444Row_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToUV444Row(src_argb, dst_u, dst_v, width); - ARGBToYRow(src_argb, dst_y, width); - src_argb += src_stride_argb; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - return 0; -} - -// ARGB little endian (bgra in memory) to I422 -LIBYUV_API -int ARGBToI422(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_y == width && - dst_stride_u * 2 == width && - dst_stride_v * 2 == width) { - width *= height; - height = 1; - src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; - } - void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix) = ARGBToUV422Row_C; -#if defined(HAS_ARGBTOUV422ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToUV422Row = ARGBToUV422Row_SSSE3; - } - } - } -#endif - - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - if (width >= 16) { - ARGBToUV422Row = ARGBToUV422Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_NEON; - } - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToUV422Row(src_argb, dst_u, dst_v, width); - ARGBToYRow(src_argb, dst_y, width); - src_argb += src_stride_argb; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - return 0; -} - -// ARGB little endian (bgra in memory) to I411 -LIBYUV_API -int ARGBToI411(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_y == width && - dst_stride_u * 4 == width && - dst_stride_v * 4 == width) { - width *= height; - height = 1; - src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; - } - void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix) = ARGBToUV411Row_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - if (width >= 32) { - ARGBToUV411Row = ARGBToUV411Row_Any_NEON; - if (IS_ALIGNED(width, 32)) { - ARGBToUV411Row = ARGBToUV411Row_NEON; - } - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToUV411Row(src_argb, dst_u, dst_v, width); - ARGBToYRow(src_argb, dst_y, width); - src_argb += src_stride_argb; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - return 0; -} - -LIBYUV_API -int ARGBToNV12(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height) { - if (!src_argb || - !dst_y || !dst_uv || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3; - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } - } -#elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - if (width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } - } - } -#endif - int halfwidth = (width + 1) >> 1; - void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUVRow_C; -#if defined(HAS_MERGEUVROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { - MergeUVRow_ = MergeUVRow_Any_SSE2; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_Unaligned_SSE2; - if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { - MergeUVRow_ = MergeUVRow_SSE2; - } - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { - MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { - MergeUVRow_ = MergeUVRow_AVX2; - } - } -#endif -#if defined(HAS_MERGEUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { - MergeUVRow_ = MergeUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_NEON; - } - } -#endif - - // Allocate a rows of uv. - align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2); - uint8* row_v = row_u + ((halfwidth + 15) & ~15); - - for (int y = 0; y < height - 1; y += 2) { - ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); - MergeUVRow_(row_u, row_v, dst_uv, halfwidth); - ARGBToYRow(src_argb, dst_y, width); - ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); - src_argb += src_stride_argb * 2; - dst_y += dst_stride_y * 2; - dst_uv += dst_stride_uv; - } - if (height & 1) { - ARGBToUVRow(src_argb, 0, row_u, row_v, width); - MergeUVRow_(row_u, row_v, dst_uv, halfwidth); - ARGBToYRow(src_argb, dst_y, width); - } - free_aligned_buffer_64(row_u); - return 0; -} - -// Same as NV12 but U and V swapped. -LIBYUV_API -int ARGBToNV21(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - uint8* dst_uv, int dst_stride_uv, - int width, int height) { - if (!src_argb || - !dst_y || !dst_uv || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3; - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToUVRow = ARGBToUVRow_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } - } -#elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - if (width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } - } - } -#endif - int halfwidth = (width + 1) >> 1; - void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) = MergeUVRow_C; -#if defined(HAS_MERGEUVROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) { - MergeUVRow_ = MergeUVRow_Any_SSE2; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_Unaligned_SSE2; - if (IS_ALIGNED(dst_uv, 16) && IS_ALIGNED(dst_stride_uv, 16)) { - MergeUVRow_ = MergeUVRow_SSE2; - } - } - } -#endif -#if defined(HAS_MERGEUVROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) { - MergeUVRow_ = MergeUVRow_Any_AVX2; - if (IS_ALIGNED(halfwidth, 32)) { - MergeUVRow_ = MergeUVRow_AVX2; - } - } -#endif -#if defined(HAS_MERGEUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) { - MergeUVRow_ = MergeUVRow_Any_NEON; - if (IS_ALIGNED(halfwidth, 16)) { - MergeUVRow_ = MergeUVRow_NEON; - } - } -#endif - - // Allocate a rows of uv. - align_buffer_64(row_u, ((halfwidth + 15) & ~15) * 2); - uint8* row_v = row_u + ((halfwidth + 15) & ~15); - - for (int y = 0; y < height - 1; y += 2) { - ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_uv, halfwidth); - ARGBToYRow(src_argb, dst_y, width); - ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); - src_argb += src_stride_argb * 2; - dst_y += dst_stride_y * 2; - dst_uv += dst_stride_uv; - } - if (height & 1) { - ARGBToUVRow(src_argb, 0, row_u, row_v, width); - MergeUVRow_(row_v, row_u, dst_uv, halfwidth); - ARGBToYRow(src_argb, dst_y, width); - } - free_aligned_buffer_64(row_u); - return 0; -} - -// Convert ARGB to YUY2. -LIBYUV_API -int ARGBToYUY2(const uint8* src_argb, int src_stride_argb, - uint8* dst_yuy2, int dst_stride_yuy2, - int width, int height) { - if (!src_argb || !dst_yuy2 || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; - dst_stride_yuy2 = -dst_stride_yuy2; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_yuy2 == width * 2) { - width *= height; - height = 1; - src_stride_argb = dst_stride_yuy2 = 0; - } - void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix) = ARGBToUV422Row_C; -#if defined(HAS_ARGBTOUV422ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToUV422Row = ARGBToUV422Row_SSSE3; - } - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - if (width >= 16) { - ARGBToUV422Row = ARGBToUV422Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_NEON; - } - } - } -#endif - - void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_yuy2, int width) = - I422ToYUY2Row_C; -#if defined(HAS_I422TOYUY2ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - I422ToYUY2Row = I422ToYUY2Row_SSE2; - } - } -#elif defined(HAS_I422TOYUY2ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 16) { - I422ToYUY2Row = I422ToYUY2Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToYUY2Row = I422ToYUY2Row_NEON; - } - } -#endif - - // Allocate a rows of yuv. - align_buffer_64(row_y, ((width + 63) & ~63) * 2); - uint8* row_u = row_y + ((width + 63) & ~63); - uint8* row_v = row_u + ((width + 63) & ~63) / 2; - - for (int y = 0; y < height; ++y) { - ARGBToUV422Row(src_argb, row_u, row_v, width); - ARGBToYRow(src_argb, row_y, width); - I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width); - src_argb += src_stride_argb; - dst_yuy2 += dst_stride_yuy2; - } - - free_aligned_buffer_64(row_y); - return 0; -} - -// Convert ARGB to UYVY. -LIBYUV_API -int ARGBToUYVY(const uint8* src_argb, int src_stride_argb, - uint8* dst_uyvy, int dst_stride_uyvy, - int width, int height) { - if (!src_argb || !dst_uyvy || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; - dst_stride_uyvy = -dst_stride_uyvy; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_uyvy == width * 2) { - width *= height; - height = 1; - src_stride_argb = dst_stride_uyvy = 0; - } - void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix) = ARGBToUV422Row_C; -#if defined(HAS_ARGBTOUV422ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToUV422Row = ARGBToUV422Row_SSSE3; - } - } - } -#endif - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - if (width >= 16) { - ARGBToUV422Row = ARGBToUV422Row_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUV422Row = ARGBToUV422Row_NEON; - } - } - } -#endif - - void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u, - const uint8* src_v, uint8* dst_uyvy, int width) = - I422ToUYVYRow_C; -#if defined(HAS_I422TOUYVYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - I422ToUYVYRow = I422ToUYVYRow_SSE2; - } - } -#elif defined(HAS_I422TOUYVYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 16) { - I422ToUYVYRow = I422ToUYVYRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToUYVYRow = I422ToUYVYRow_NEON; - } - } -#endif - - // Allocate a rows of yuv. - align_buffer_64(row_y, ((width + 63) & ~63) * 2); - uint8* row_u = row_y + ((width + 63) & ~63); - uint8* row_v = row_u + ((width + 63) & ~63) / 2; - - for (int y = 0; y < height; ++y) { - ARGBToUV422Row(src_argb, row_u, row_v, width); - ARGBToYRow(src_argb, row_y, width); - I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width); - src_argb += src_stride_argb; - dst_uyvy += dst_stride_uyvy; - } - - free_aligned_buffer_64(row_y); - return 0; -} - -// Convert ARGB to I400. -LIBYUV_API -int ARGBToI400(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height) { - if (!src_argb || !dst_y || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_y == width) { - width *= height; - height = 1; - src_stride_argb = dst_stride_y = 0; - } - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; -#if defined(HAS_ARGBTOYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#endif -#if defined(HAS_ARGBTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - ARGBToYRow = ARGBToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYRow = ARGBToYRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToYRow(src_argb, dst_y, width); - src_argb += src_stride_argb; - dst_y += dst_stride_y; - } - return 0; -} - -// Shuffle table for converting ARGB to RGBA. -static uvec8 kShuffleMaskARGBToRGBA = { - 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u -}; - -// Convert ARGB to RGBA. -LIBYUV_API -int ARGBToRGBA(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height) { - return ARGBShuffle(src_argb, src_stride_argb, - dst_rgba, dst_stride_rgba, - (const uint8*)(&kShuffleMaskARGBToRGBA), - width, height); -} - -// Convert ARGB To RGB24. -LIBYUV_API -int ARGBToRGB24(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb24, int dst_stride_rgb24, - int width, int height) { - if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_rgb24 == width * 3) { - width *= height; - height = 1; - src_stride_argb = dst_stride_rgb24 = 0; - } - void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = - ARGBToRGB24Row_C; -#if defined(HAS_ARGBTORGB24ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToRGB24Row = ARGBToRGB24Row_SSSE3; - } - } -#elif defined(HAS_ARGBTORGB24ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToRGB24Row = ARGBToRGB24Row_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToRGB24Row(src_argb, dst_rgb24, width); - src_argb += src_stride_argb; - dst_rgb24 += dst_stride_rgb24; - } - return 0; -} - -// Convert ARGB To RAW. -LIBYUV_API -int ARGBToRAW(const uint8* src_argb, int src_stride_argb, - uint8* dst_raw, int dst_stride_raw, - int width, int height) { - if (!src_argb || !dst_raw || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_raw == width * 3) { - width *= height; - height = 1; - src_stride_argb = dst_stride_raw = 0; - } - void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) = - ARGBToRAWRow_C; -#if defined(HAS_ARGBTORAWROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToRAWRow = ARGBToRAWRow_SSSE3; - } - } -#elif defined(HAS_ARGBTORAWROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToRAWRow = ARGBToRAWRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToRAWRow = ARGBToRAWRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToRAWRow(src_argb, dst_raw, width); - src_argb += src_stride_argb; - dst_raw += dst_stride_raw; - } - return 0; -} - -// Convert ARGB To RGB565. -LIBYUV_API -int ARGBToRGB565(const uint8* src_argb, int src_stride_argb, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height) { - if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_rgb565 == width * 2) { - width *= height; - height = 1; - src_stride_argb = dst_stride_rgb565 = 0; - } - void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = - ARGBToRGB565Row_C; -#if defined(HAS_ARGBTORGB565ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBToRGB565Row = ARGBToRGB565Row_SSE2; - } - } -#elif defined(HAS_ARGBTORGB565ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToRGB565Row = ARGBToRGB565Row_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToRGB565Row(src_argb, dst_rgb565, width); - src_argb += src_stride_argb; - dst_rgb565 += dst_stride_rgb565; - } - return 0; -} - -// Convert ARGB To ARGB1555. -LIBYUV_API -int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb1555, int dst_stride_argb1555, - int width, int height) { - if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb1555 == width * 2) { - width *= height; - height = 1; - src_stride_argb = dst_stride_argb1555 = 0; - } - void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = - ARGBToARGB1555Row_C; -#if defined(HAS_ARGBTOARGB1555ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; - } - } -#elif defined(HAS_ARGBTOARGB1555ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToARGB1555Row = ARGBToARGB1555Row_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToARGB1555Row(src_argb, dst_argb1555, width); - src_argb += src_stride_argb; - dst_argb1555 += dst_stride_argb1555; - } - return 0; -} - -// Convert ARGB To ARGB4444. -LIBYUV_API -int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb4444, int dst_stride_argb4444, - int width, int height) { - if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb4444 == width * 2) { - width *= height; - height = 1; - src_stride_argb = dst_stride_argb4444 = 0; - } - void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) = - ARGBToARGB4444Row_C; -#if defined(HAS_ARGBTOARGB4444ROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; - } - } -#elif defined(HAS_ARGBTOARGB4444ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToARGB4444Row = ARGBToARGB4444Row_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToARGB4444Row(src_argb, dst_argb4444, width); - src_argb += src_stride_argb; - dst_argb4444 += dst_stride_argb4444; - } - return 0; -} - -// Convert ARGB to J420. (JPeg full range I420). -LIBYUV_API -int ARGBToJ420(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_argb || - !dst_yj || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C; - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) = - ARGBToYJRow_C; -#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_Unaligned_SSSE3; - ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToUVJRow = ARGBToUVJRow_SSSE3; - if (IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; - } - } - } - } -#endif -#if defined(HAS_ARGBTOYJROW_AVX2) && defined(HAS_ARGBTOUVJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYJRow = ARGBToYJRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYJRow = ARGBToYJRow_NEON; - } - if (width >= 16) { - ARGBToUVJRow = ARGBToUVJRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVJRow = ARGBToUVJRow_NEON; - } - } - } -#endif - - for (int y = 0; y < height - 1; y += 2) { - ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width); - ARGBToYJRow(src_argb, dst_yj, width); - ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width); - src_argb += src_stride_argb * 2; - dst_yj += dst_stride_yj * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); - ARGBToYJRow(src_argb, dst_yj, width); - } - return 0; -} - -// Convert ARGB to J400. -LIBYUV_API -int ARGBToJ400(const uint8* src_argb, int src_stride_argb, - uint8* dst_yj, int dst_stride_yj, - int width, int height) { - if (!src_argb || !dst_yj || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_yj == width) { - width *= height; - height = 1; - src_stride_argb = dst_stride_yj = 0; - } - void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) = - ARGBToYJRow_C; -#if defined(HAS_ARGBTOYJROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToYJRow = ARGBToYJRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYJRow = ARGBToYJRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_yj, 16) && IS_ALIGNED(dst_stride_yj, 16)) { - ARGBToYJRow = ARGBToYJRow_SSSE3; - } - } - } -#endif -#if defined(HAS_ARGBTOYJROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - ARGBToYJRow = ARGBToYJRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - ARGBToYJRow = ARGBToYJRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBTOYJROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYJRow = ARGBToYJRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYJRow = ARGBToYJRow_NEON; - } - } -#endif - - for (int y = 0; y < height; ++y) { - ARGBToYJRow(src_argb, dst_yj, width); - src_argb += src_stride_argb; - dst_yj += dst_stride_yj; - } - return 0; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_jpeg.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_jpeg.cc deleted file mode 100755 index bcb980f7f1..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_jpeg.cc +++ /dev/null @@ -1,392 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/convert.h" - -#ifdef HAVE_JPEG -#include "libyuv/mjpeg_decoder.h" -#endif - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#ifdef HAVE_JPEG -struct I420Buffers { - uint8* y; - int y_stride; - uint8* u; - int u_stride; - uint8* v; - int v_stride; - int w; - int h; -}; - -static void JpegCopyI420(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - I420Buffers* dest = (I420Buffers*)(opaque); - I420Copy(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); - dest->y += rows * dest->y_stride; - dest->u += ((rows + 1) >> 1) * dest->u_stride; - dest->v += ((rows + 1) >> 1) * dest->v_stride; - dest->h -= rows; -} - -static void JpegI422ToI420(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - I420Buffers* dest = (I420Buffers*)(opaque); - I422ToI420(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); - dest->y += rows * dest->y_stride; - dest->u += ((rows + 1) >> 1) * dest->u_stride; - dest->v += ((rows + 1) >> 1) * dest->v_stride; - dest->h -= rows; -} - -static void JpegI444ToI420(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - I420Buffers* dest = (I420Buffers*)(opaque); - I444ToI420(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); - dest->y += rows * dest->y_stride; - dest->u += ((rows + 1) >> 1) * dest->u_stride; - dest->v += ((rows + 1) >> 1) * dest->v_stride; - dest->h -= rows; -} - -static void JpegI411ToI420(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - I420Buffers* dest = (I420Buffers*)(opaque); - I411ToI420(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); - dest->y += rows * dest->y_stride; - dest->u += ((rows + 1) >> 1) * dest->u_stride; - dest->v += ((rows + 1) >> 1) * dest->v_stride; - dest->h -= rows; -} - -static void JpegI400ToI420(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - I420Buffers* dest = (I420Buffers*)(opaque); - I400ToI420(data[0], strides[0], - dest->y, dest->y_stride, - dest->u, dest->u_stride, - dest->v, dest->v_stride, - dest->w, rows); - dest->y += rows * dest->y_stride; - dest->u += ((rows + 1) >> 1) * dest->u_stride; - dest->v += ((rows + 1) >> 1) * dest->v_stride; - dest->h -= rows; -} - -// Query size of MJPG in pixels. -LIBYUV_API -int MJPGSize(const uint8* sample, size_t sample_size, - int* width, int* height) { - MJpegDecoder mjpeg_decoder; - LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); - if (ret) { - *width = mjpeg_decoder.GetWidth(); - *height = mjpeg_decoder.GetHeight(); - } - mjpeg_decoder.UnloadFrame(); - return ret ? 0 : -1; // -1 for runtime failure. -} - -// MJPG (Motion JPeg) to I420 -// TODO(fbarchard): review w and h requirement. dw and dh may be enough. -LIBYUV_API -int MJPGToI420(const uint8* sample, - size_t sample_size, - uint8* y, int y_stride, - uint8* u, int u_stride, - uint8* v, int v_stride, - int w, int h, - int dw, int dh) { - if (sample_size == kUnknownDataSize) { - // ERROR: MJPEG frame size unknown - return -1; - } - - // TODO(fbarchard): Port MJpeg to C. - MJpegDecoder mjpeg_decoder; - LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); - if (ret && (mjpeg_decoder.GetWidth() != w || - mjpeg_decoder.GetHeight() != h)) { - // ERROR: MJPEG frame has unexpected dimensions - mjpeg_decoder.UnloadFrame(); - return 1; // runtime failure - } - if (ret) { - I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh }; - // YUV420 - if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 2 && - mjpeg_decoder.GetHorizSampFactor(0) == 2 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh); - // YUV422 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 2 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh); - // YUV444 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 1 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh); - // YUV411 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 4 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh); - // YUV400 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceGrayscale && - mjpeg_decoder.GetNumComponents() == 1 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh); - } else { - // TODO(fbarchard): Implement conversion for any other colorspace/sample - // factors that occur in practice. 411 is supported by libjpeg - // ERROR: Unable to convert MJPEG frame because format is not supported - mjpeg_decoder.UnloadFrame(); - return 1; - } - } - return ret ? 0 : 1; -} - -#ifdef HAVE_JPEG -struct ARGBBuffers { - uint8* argb; - int argb_stride; - int w; - int h; -}; - -static void JpegI420ToARGB(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I420ToARGB(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->argb, dest->argb_stride, - dest->w, rows); - dest->argb += rows * dest->argb_stride; - dest->h -= rows; -} - -static void JpegI422ToARGB(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I422ToARGB(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->argb, dest->argb_stride, - dest->w, rows); - dest->argb += rows * dest->argb_stride; - dest->h -= rows; -} - -static void JpegI444ToARGB(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I444ToARGB(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->argb, dest->argb_stride, - dest->w, rows); - dest->argb += rows * dest->argb_stride; - dest->h -= rows; -} - -static void JpegI411ToARGB(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I411ToARGB(data[0], strides[0], - data[1], strides[1], - data[2], strides[2], - dest->argb, dest->argb_stride, - dest->w, rows); - dest->argb += rows * dest->argb_stride; - dest->h -= rows; -} - -static void JpegI400ToARGB(void* opaque, - const uint8* const* data, - const int* strides, - int rows) { - ARGBBuffers* dest = (ARGBBuffers*)(opaque); - I400ToARGB(data[0], strides[0], - dest->argb, dest->argb_stride, - dest->w, rows); - dest->argb += rows * dest->argb_stride; - dest->h -= rows; -} - -// MJPG (Motion JPeg) to ARGB -// TODO(fbarchard): review w and h requirement. dw and dh may be enough. -LIBYUV_API -int MJPGToARGB(const uint8* sample, - size_t sample_size, - uint8* argb, int argb_stride, - int w, int h, - int dw, int dh) { - if (sample_size == kUnknownDataSize) { - // ERROR: MJPEG frame size unknown - return -1; - } - - // TODO(fbarchard): Port MJpeg to C. - MJpegDecoder mjpeg_decoder; - LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); - if (ret && (mjpeg_decoder.GetWidth() != w || - mjpeg_decoder.GetHeight() != h)) { - // ERROR: MJPEG frame has unexpected dimensions - mjpeg_decoder.UnloadFrame(); - return 1; // runtime failure - } - if (ret) { - ARGBBuffers bufs = { argb, argb_stride, dw, dh }; - // YUV420 - if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 2 && - mjpeg_decoder.GetHorizSampFactor(0) == 2 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh); - // YUV422 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 2 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh); - // YUV444 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 1 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh); - // YUV411 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceYCbCr && - mjpeg_decoder.GetNumComponents() == 3 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 4 && - mjpeg_decoder.GetVertSampFactor(1) == 1 && - mjpeg_decoder.GetHorizSampFactor(1) == 1 && - mjpeg_decoder.GetVertSampFactor(2) == 1 && - mjpeg_decoder.GetHorizSampFactor(2) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh); - // YUV400 - } else if (mjpeg_decoder.GetColorSpace() == - MJpegDecoder::kColorSpaceGrayscale && - mjpeg_decoder.GetNumComponents() == 1 && - mjpeg_decoder.GetVertSampFactor(0) == 1 && - mjpeg_decoder.GetHorizSampFactor(0) == 1) { - ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh); - } else { - // TODO(fbarchard): Implement conversion for any other colorspace/sample - // factors that occur in practice. 411 is supported by libjpeg - // ERROR: Unable to convert MJPEG frame because format is not supported - mjpeg_decoder.UnloadFrame(); - return 1; - } - } - return ret ? 0 : 1; -} -#endif - -#endif - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_argb.cc deleted file mode 100755 index 1b228a7b4d..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_argb.cc +++ /dev/null @@ -1,327 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/convert_argb.h" - -#include "libyuv/cpu_id.h" -#include "libyuv/format_conversion.h" -#ifdef HAVE_JPEG -#include "libyuv/mjpeg_decoder.h" -#endif -#include "libyuv/rotate_argb.h" -#include "libyuv/row.h" -#include "libyuv/video_common.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Convert camera sample to I420 with cropping, rotation and vertical flip. -// src_width is used for source stride computation -// src_height is used to compute location of planes, and indicate inversion -// sample_size is measured in bytes and is the size of the frame. -// With MJPEG it is the compressed size of the frame. -LIBYUV_API -int ConvertToARGB(const uint8* sample, size_t sample_size, - uint8* crop_argb, int argb_stride, - int crop_x, int crop_y, - int src_width, int src_height, - int crop_width, int crop_height, - enum RotationMode rotation, - uint32 fourcc) { - uint32 format = CanonicalFourCC(fourcc); - int aligned_src_width = (src_width + 1) & ~1; - const uint8* src; - const uint8* src_uv; - int abs_src_height = (src_height < 0) ? -src_height : src_height; - int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height; - int r = 0; - - // One pass rotation is available for some formats. For the rest, convert - // to I420 (with optional vertical flipping) into a temporary I420 buffer, - // and then rotate the I420 to the final destination buffer. - // For in-place conversion, if destination crop_argb is same as source sample, - // also enable temporary buffer. - LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) || - crop_argb == sample; - uint8* tmp_argb = crop_argb; - int tmp_argb_stride = argb_stride; - uint8* rotate_buffer = NULL; - int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; - - if (crop_argb == NULL || sample == NULL || - src_width <= 0 || crop_width <= 0 || - src_height == 0 || crop_height == 0) { - return -1; - } - if (src_height < 0) { - inv_crop_height = -inv_crop_height; - } - - if (need_buf) { - int argb_size = crop_width * abs_crop_height * 4; - rotate_buffer = (uint8*)malloc(argb_size); - if (!rotate_buffer) { - return 1; // Out of memory runtime error. - } - crop_argb = rotate_buffer; - argb_stride = crop_width; - } - - switch (format) { - // Single plane formats - case FOURCC_YUY2: - src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = YUY2ToARGB(src, aligned_src_width * 2, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_UYVY: - src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = UYVYToARGB(src, aligned_src_width * 2, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_24BG: - src = sample + (src_width * crop_y + crop_x) * 3; - r = RGB24ToARGB(src, src_width * 3, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_RAW: - src = sample + (src_width * crop_y + crop_x) * 3; - r = RAWToARGB(src, src_width * 3, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_ARGB: - src = sample + (src_width * crop_y + crop_x) * 4; - r = ARGBToARGB(src, src_width * 4, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_BGRA: - src = sample + (src_width * crop_y + crop_x) * 4; - r = BGRAToARGB(src, src_width * 4, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_ABGR: - src = sample + (src_width * crop_y + crop_x) * 4; - r = ABGRToARGB(src, src_width * 4, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_RGBA: - src = sample + (src_width * crop_y + crop_x) * 4; - r = RGBAToARGB(src, src_width * 4, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_RGBP: - src = sample + (src_width * crop_y + crop_x) * 2; - r = RGB565ToARGB(src, src_width * 2, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_RGBO: - src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB1555ToARGB(src, src_width * 2, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_R444: - src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB4444ToARGB(src, src_width * 2, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - // TODO(fbarchard): Support cropping Bayer by odd numbers - // by adjusting fourcc. - case FOURCC_BGGR: - src = sample + (src_width * crop_y + crop_x); - r = BayerBGGRToARGB(src, src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - - case FOURCC_GBRG: - src = sample + (src_width * crop_y + crop_x); - r = BayerGBRGToARGB(src, src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - - case FOURCC_GRBG: - src = sample + (src_width * crop_y + crop_x); - r = BayerGRBGToARGB(src, src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - - case FOURCC_RGGB: - src = sample + (src_width * crop_y + crop_x); - r = BayerRGGBToARGB(src, src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - - case FOURCC_I400: - src = sample + src_width * crop_y + crop_x; - r = I400ToARGB(src, src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - - // Biplanar formats - case FOURCC_NV12: - src = sample + (src_width * crop_y + crop_x); - src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; - r = NV12ToARGB(src, src_width, - src_uv, aligned_src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_NV21: - src = sample + (src_width * crop_y + crop_x); - src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; - // Call NV12 but with u and v parameters swapped. - r = NV21ToARGB(src, src_width, - src_uv, aligned_src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - case FOURCC_M420: - src = sample + (src_width * crop_y) * 12 / 8 + crop_x; - r = M420ToARGB(src, src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; -// case FOURCC_Q420: -// src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x; -// src_uv = sample + (src_width + aligned_src_width * 2) * crop_y + -// src_width + crop_x * 2; -// r = Q420ToARGB(src, src_width * 3, -// src_uv, src_width * 3, -// crop_argb, argb_stride, -// crop_width, inv_crop_height); -// break; - // Triplanar formats - case FOURCC_I420: - case FOURCC_YU12: - case FOURCC_YV12: { - const uint8* src_y = sample + (src_width * crop_y + crop_x); - const uint8* src_u; - const uint8* src_v; - int halfwidth = (src_width + 1) / 2; - int halfheight = (abs_src_height + 1) / 2; - if (format == FOURCC_YV12) { - src_v = sample + src_width * abs_src_height + - (halfwidth * crop_y + crop_x) / 2; - src_u = sample + src_width * abs_src_height + - halfwidth * (halfheight + crop_y / 2) + crop_x / 2; - } else { - src_u = sample + src_width * abs_src_height + - (halfwidth * crop_y + crop_x) / 2; - src_v = sample + src_width * abs_src_height + - halfwidth * (halfheight + crop_y / 2) + crop_x / 2; - } - r = I420ToARGB(src_y, src_width, - src_u, halfwidth, - src_v, halfwidth, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - } - case FOURCC_I422: - case FOURCC_YV16: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; - int halfwidth = (src_width + 1) / 2; - if (format == FOURCC_YV16) { - src_v = sample + src_width * abs_src_height + - halfwidth * crop_y + crop_x / 2; - src_u = sample + src_width * abs_src_height + - halfwidth * (abs_src_height + crop_y) + crop_x / 2; - } else { - src_u = sample + src_width * abs_src_height + - halfwidth * crop_y + crop_x / 2; - src_v = sample + src_width * abs_src_height + - halfwidth * (abs_src_height + crop_y) + crop_x / 2; - } - r = I422ToARGB(src_y, src_width, - src_u, halfwidth, - src_v, halfwidth, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - } - case FOURCC_I444: - case FOURCC_YV24: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; - if (format == FOURCC_YV24) { - src_v = sample + src_width * (abs_src_height + crop_y) + crop_x; - src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; - } else { - src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; - src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; - } - r = I444ToARGB(src_y, src_width, - src_u, src_width, - src_v, src_width, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - } - case FOURCC_I411: { - int quarterwidth = (src_width + 3) / 4; - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u = sample + src_width * abs_src_height + - quarterwidth * crop_y + crop_x / 4; - const uint8* src_v = sample + src_width * abs_src_height + - quarterwidth * (abs_src_height + crop_y) + crop_x / 4; - r = I411ToARGB(src_y, src_width, - src_u, quarterwidth, - src_v, quarterwidth, - crop_argb, argb_stride, - crop_width, inv_crop_height); - break; - } -#ifdef HAVE_JPEG - case FOURCC_MJPG: - r = MJPGToARGB(sample, sample_size, - crop_argb, argb_stride, - src_width, abs_src_height, crop_width, inv_crop_height); - break; -#endif - default: - r = -1; // unknown fourcc - return failure code. - } - - if (need_buf) { - if (!r) { - r = ARGBRotate(crop_argb, argb_stride, - tmp_argb, tmp_argb_stride, - crop_width, abs_crop_height, rotation); - } - free(rotate_buffer); - } - - return r; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_i420.cc b/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_i420.cc deleted file mode 100755 index 7b194fff72..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/convert_to_i420.cc +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <stdlib.h> - -#include "libyuv/convert.h" - -#include "libyuv/format_conversion.h" -#include "libyuv/video_common.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Convert camera sample to I420 with cropping, rotation and vertical flip. -// src_width is used for source stride computation -// src_height is used to compute location of planes, and indicate inversion -// sample_size is measured in bytes and is the size of the frame. -// With MJPEG it is the compressed size of the frame. -LIBYUV_API -int ConvertToI420(const uint8* sample, - size_t sample_size, - uint8* y, int y_stride, - uint8* u, int u_stride, - uint8* v, int v_stride, - int crop_x, int crop_y, - int src_width, int src_height, - int crop_width, int crop_height, - enum RotationMode rotation, - uint32 fourcc) { - uint32 format = CanonicalFourCC(fourcc); - int aligned_src_width = (src_width + 1) & ~1; - const uint8* src; - const uint8* src_uv; - int abs_src_height = (src_height < 0) ? -src_height : src_height; - int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height; - int r = 0; - LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 && - format != FOURCC_NV12 && format != FOURCC_NV21 && - format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample; - uint8* tmp_y = y; - uint8* tmp_u = u; - uint8* tmp_v = v; - int tmp_y_stride = y_stride; - int tmp_u_stride = u_stride; - int tmp_v_stride = v_stride; - uint8* rotate_buffer = NULL; - int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; - - if (!y || !u || !v || !sample || - src_width <= 0 || crop_width <= 0 || - src_height == 0 || crop_height == 0) { - return -1; - } - if (src_height < 0) { - inv_crop_height = -inv_crop_height; - } - - // One pass rotation is available for some formats. For the rest, convert - // to I420 (with optional vertical flipping) into a temporary I420 buffer, - // and then rotate the I420 to the final destination buffer. - // For in-place conversion, if destination y is same as source sample, - // also enable temporary buffer. - if (need_buf) { - int y_size = crop_width * abs_crop_height; - int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2); - rotate_buffer = (uint8*)malloc(y_size + uv_size * 2); - if (!rotate_buffer) { - return 1; // Out of memory runtime error. - } - y = rotate_buffer; - u = y + y_size; - v = u + uv_size; - y_stride = crop_width; - u_stride = v_stride = ((crop_width + 1) / 2); - } - - switch (format) { - // Single plane formats - case FOURCC_YUY2: - src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = YUY2ToI420(src, aligned_src_width * 2, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_UYVY: - src = sample + (aligned_src_width * crop_y + crop_x) * 2; - r = UYVYToI420(src, aligned_src_width * 2, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_RGBP: - src = sample + (src_width * crop_y + crop_x) * 2; - r = RGB565ToI420(src, src_width * 2, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_RGBO: - src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB1555ToI420(src, src_width * 2, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_R444: - src = sample + (src_width * crop_y + crop_x) * 2; - r = ARGB4444ToI420(src, src_width * 2, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_24BG: - src = sample + (src_width * crop_y + crop_x) * 3; - r = RGB24ToI420(src, src_width * 3, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_RAW: - src = sample + (src_width * crop_y + crop_x) * 3; - r = RAWToI420(src, src_width * 3, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_ARGB: - src = sample + (src_width * crop_y + crop_x) * 4; - r = ARGBToI420(src, src_width * 4, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_BGRA: - src = sample + (src_width * crop_y + crop_x) * 4; - r = BGRAToI420(src, src_width * 4, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_ABGR: - src = sample + (src_width * crop_y + crop_x) * 4; - r = ABGRToI420(src, src_width * 4, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_RGBA: - src = sample + (src_width * crop_y + crop_x) * 4; - r = RGBAToI420(src, src_width * 4, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - // TODO(fbarchard): Support cropping Bayer by odd numbers - // by adjusting fourcc. - case FOURCC_BGGR: - src = sample + (src_width * crop_y + crop_x); - r = BayerBGGRToI420(src, src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_GBRG: - src = sample + (src_width * crop_y + crop_x); - r = BayerGBRGToI420(src, src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_GRBG: - src = sample + (src_width * crop_y + crop_x); - r = BayerGRBGToI420(src, src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_RGGB: - src = sample + (src_width * crop_y + crop_x); - r = BayerRGGBToI420(src, src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_I400: - src = sample + src_width * crop_y + crop_x; - r = I400ToI420(src, src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - // Biplanar formats - case FOURCC_NV12: - src = sample + (src_width * crop_y + crop_x); - src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; - r = NV12ToI420Rotate(src, src_width, - src_uv, aligned_src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height, rotation); - break; - case FOURCC_NV21: - src = sample + (src_width * crop_y + crop_x); - src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x; - // Call NV12 but with u and v parameters swapped. - r = NV12ToI420Rotate(src, src_width, - src_uv, aligned_src_width, - y, y_stride, - v, v_stride, - u, u_stride, - crop_width, inv_crop_height, rotation); - break; - case FOURCC_M420: - src = sample + (src_width * crop_y) * 12 / 8 + crop_x; - r = M420ToI420(src, src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - case FOURCC_Q420: - src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x; - src_uv = sample + (src_width + aligned_src_width * 2) * crop_y + - src_width + crop_x * 2; - r = Q420ToI420(src, src_width * 3, - src_uv, src_width * 3, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - // Triplanar formats - case FOURCC_I420: - case FOURCC_YU12: - case FOURCC_YV12: { - const uint8* src_y = sample + (src_width * crop_y + crop_x); - const uint8* src_u; - const uint8* src_v; - int halfwidth = (src_width + 1) / 2; - int halfheight = (abs_src_height + 1) / 2; - if (format == FOURCC_YV12) { - src_v = sample + src_width * abs_src_height + - (halfwidth * crop_y + crop_x) / 2; - src_u = sample + src_width * abs_src_height + - halfwidth * (halfheight + crop_y / 2) + crop_x / 2; - } else { - src_u = sample + src_width * abs_src_height + - (halfwidth * crop_y + crop_x) / 2; - src_v = sample + src_width * abs_src_height + - halfwidth * (halfheight + crop_y / 2) + crop_x / 2; - } - r = I420Rotate(src_y, src_width, - src_u, halfwidth, - src_v, halfwidth, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height, rotation); - break; - } - case FOURCC_I422: - case FOURCC_YV16: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; - int halfwidth = (src_width + 1) / 2; - if (format == FOURCC_YV16) { - src_v = sample + src_width * abs_src_height + - halfwidth * crop_y + crop_x / 2; - src_u = sample + src_width * abs_src_height + - halfwidth * (abs_src_height + crop_y) + crop_x / 2; - } else { - src_u = sample + src_width * abs_src_height + - halfwidth * crop_y + crop_x / 2; - src_v = sample + src_width * abs_src_height + - halfwidth * (abs_src_height + crop_y) + crop_x / 2; - } - r = I422ToI420(src_y, src_width, - src_u, halfwidth, - src_v, halfwidth, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - } - case FOURCC_I444: - case FOURCC_YV24: { - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u; - const uint8* src_v; - if (format == FOURCC_YV24) { - src_v = sample + src_width * (abs_src_height + crop_y) + crop_x; - src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; - } else { - src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; - src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; - } - r = I444ToI420(src_y, src_width, - src_u, src_width, - src_v, src_width, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - } - case FOURCC_I411: { - int quarterwidth = (src_width + 3) / 4; - const uint8* src_y = sample + src_width * crop_y + crop_x; - const uint8* src_u = sample + src_width * abs_src_height + - quarterwidth * crop_y + crop_x / 4; - const uint8* src_v = sample + src_width * abs_src_height + - quarterwidth * (abs_src_height + crop_y) + crop_x / 4; - r = I411ToI420(src_y, src_width, - src_u, quarterwidth, - src_v, quarterwidth, - y, y_stride, - u, u_stride, - v, v_stride, - crop_width, inv_crop_height); - break; - } -#ifdef HAVE_JPEG - case FOURCC_MJPG: - r = MJPGToI420(sample, sample_size, - y, y_stride, - u, u_stride, - v, v_stride, - src_width, abs_src_height, crop_width, inv_crop_height); - break; -#endif - default: - r = -1; // unknown fourcc - return failure code. - } - - if (need_buf) { - if (!r) { - r = I420Rotate(y, y_stride, - u, u_stride, - v, v_stride, - tmp_y, tmp_y_stride, - tmp_u, tmp_u_stride, - tmp_v, tmp_v_stride, - crop_width, abs_crop_height, rotation); - } - free(rotate_buffer); - } - - return r; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/cpu_id.cc b/drivers/theoraplayer/src/YUV/libyuv/src/cpu_id.cc deleted file mode 100755 index f52bd95551..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/cpu_id.cc +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/cpu_id.h" - -#ifdef _ANDROID //libtheoraplayer addition for cpu feature detection -#include "cpu-features.h" -#endif - -#ifdef _MSC_VER -#include <intrin.h> // For __cpuidex() -#endif -#if !defined(__pnacl__) && !defined(__CLR_VER) && \ - !defined(__native_client__) && defined(_M_X64) && \ - defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) -#include <immintrin.h> // For _xgetbv() -#endif - -#if !defined(__native_client__) -#include <stdlib.h> // For getenv() -#endif - -// For ArmCpuCaps() but unittested on all platforms -#include <stdio.h> -#include <string.h> - -#include "libyuv/basic_types.h" // For CPU_X86 - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// For functions that use the stack and have runtime checks for overflow, -// use SAFEBUFFERS to avoid additional check. -#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) -#define SAFEBUFFERS __declspec(safebuffers) -#else -#define SAFEBUFFERS -#endif - -// Low level cpuid for X86. Returns zeros on other CPUs. -#if !defined(__pnacl__) && !defined(__CLR_VER) && \ - (defined(_M_IX86) || defined(_M_X64) || \ - defined(__i386__) || defined(__x86_64__)) -LIBYUV_API -void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) { -#if defined(_MSC_VER) -#if (_MSC_FULL_VER >= 160040219) - __cpuidex((int*)(cpu_info), info_eax, info_ecx); -#elif defined(_M_IX86) - __asm { - mov eax, info_eax - mov ecx, info_ecx - mov edi, cpu_info - cpuid - mov [edi], eax - mov [edi + 4], ebx - mov [edi + 8], ecx - mov [edi + 12], edx - } -#else - if (info_ecx == 0) { - __cpuid((int*)(cpu_info), info_eax); - } else { - cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0; - } -#endif -#else // defined(_MSC_VER) - uint32 info_ebx, info_edx; - asm volatile ( // NOLINT -#if defined( __i386__) && defined(__PIC__) - // Preserve ebx for fpic 32 bit. - "mov %%ebx, %%edi \n" - "cpuid \n" - "xchg %%edi, %%ebx \n" - : "=D" (info_ebx), -#else - "cpuid \n" - : "=b" (info_ebx), -#endif // defined( __i386__) && defined(__PIC__) - "+a" (info_eax), "+c" (info_ecx), "=d" (info_edx)); - cpu_info[0] = info_eax; - cpu_info[1] = info_ebx; - cpu_info[2] = info_ecx; - cpu_info[3] = info_edx; -#endif // defined(_MSC_VER) -} - -#if !defined(__native_client__) -#define HAS_XGETBV -// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. -int TestOsSaveYmm() { - uint32 xcr0 = 0u; -#if defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) - xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required. -#elif defined(_M_IX86) - __asm { - xor ecx, ecx // xcr 0 - _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier. - mov xcr0, eax - } -#elif defined(__i386__) || defined(__x86_64__) - asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx"); -#endif // defined(_MSC_VER) - return((xcr0 & 6) == 6); // Is ymm saved? -} -#endif // !defined(__native_client__) -#else -LIBYUV_API -void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) { - cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; -} -#endif - -// based on libvpx arm_cpudetect.c -// For Arm, but public to allow testing on any CPU -LIBYUV_API SAFEBUFFERS -int ArmCpuCaps(const char* cpuinfo_name) { - char cpuinfo_line[512]; - FILE* f = fopen(cpuinfo_name, "r"); - if (!f) { - // Assume Neon if /proc/cpuinfo is unavailable. - // This will occur for Chrome sandbox for Pepper or Render process. - return kCpuHasNEON; - } - while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { - if (memcmp(cpuinfo_line, "Features", 8) == 0) { - char* p = strstr(cpuinfo_line, " neon"); - if (p && (p[5] == ' ' || p[5] == '\n')) { - fclose(f); - return kCpuHasNEON; - } - } - } - fclose(f); - return 0; -} - -#if defined(__mips__) && defined(__linux__) -static int MipsCpuCaps(const char* search_string) { - char cpuinfo_line[512]; - const char* file_name = "/proc/cpuinfo"; - FILE* f = fopen(file_name, "r"); - if (!f) { - // Assume DSP if /proc/cpuinfo is unavailable. - // This will occur for Chrome sandbox for Pepper or Render process. - return kCpuHasMIPS_DSP; - } - while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) { - if (strstr(cpuinfo_line, search_string) != NULL) { - fclose(f); - return kCpuHasMIPS_DSP; - } - } - fclose(f); - return 0; -} -#endif - -// CPU detect function for SIMD instruction sets. -LIBYUV_API -int cpu_info_ = kCpuInit; // cpu_info is not initialized yet. - -// Test environment variable for disabling CPU features. Any non-zero value -// to disable. Zero ignored to make it easy to set the variable on/off. -#if !defined(__native_client__) && !defined(_M_ARM) - -static LIBYUV_BOOL TestEnv(const char* name) { -#ifndef _WINRT - const char* var = getenv(name); - if (var) { - if (var[0] != '0') { - return LIBYUV_TRUE; - } - } -#endif - return LIBYUV_FALSE; -} -#else // nacl does not support getenv(). -static LIBYUV_BOOL TestEnv(const char*) { - return LIBYUV_FALSE; -} -#endif - -LIBYUV_API SAFEBUFFERS -int InitCpuFlags(void) { -#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86) - - uint32 cpu_info1[4] = { 0, 0, 0, 0 }; - uint32 cpu_info7[4] = { 0, 0, 0, 0 }; - CpuId(1, 0, cpu_info1); - CpuId(7, 0, cpu_info7); - cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | - ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | - ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | - ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | - ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) | - ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | - kCpuHasX86; -#ifdef HAS_XGETBV - if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave - TestOsSaveYmm()) { // Saves YMM. - cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | - kCpuHasAVX; - } -#endif - // Environment variable overrides for testing. - if (TestEnv("LIBYUV_DISABLE_X86")) { - cpu_info_ &= ~kCpuHasX86; - } - if (TestEnv("LIBYUV_DISABLE_SSE2")) { - cpu_info_ &= ~kCpuHasSSE2; - } - if (TestEnv("LIBYUV_DISABLE_SSSE3")) { - cpu_info_ &= ~kCpuHasSSSE3; - } - if (TestEnv("LIBYUV_DISABLE_SSE41")) { - cpu_info_ &= ~kCpuHasSSE41; - } - if (TestEnv("LIBYUV_DISABLE_SSE42")) { - cpu_info_ &= ~kCpuHasSSE42; - } - if (TestEnv("LIBYUV_DISABLE_AVX")) { - cpu_info_ &= ~kCpuHasAVX; - } - if (TestEnv("LIBYUV_DISABLE_AVX2")) { - cpu_info_ &= ~kCpuHasAVX2; - } - if (TestEnv("LIBYUV_DISABLE_ERMS")) { - cpu_info_ &= ~kCpuHasERMS; - } - if (TestEnv("LIBYUV_DISABLE_FMA3")) { - cpu_info_ &= ~kCpuHasFMA3; - } -#elif defined(__mips__) && defined(__linux__) - // Linux mips parse text file for dsp detect. - cpu_info_ = MipsCpuCaps("dsp"); // set kCpuHasMIPS_DSP. -#if defined(__mips_dspr2) - cpu_info_ |= kCpuHasMIPS_DSPR2; -#endif - cpu_info_ |= kCpuHasMIPS; - - if (getenv("LIBYUV_DISABLE_MIPS")) { - cpu_info_ &= ~kCpuHasMIPS; - } - if (getenv("LIBYUV_DISABLE_MIPS_DSP")) { - cpu_info_ &= ~kCpuHasMIPS_DSP; - } - if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) { - cpu_info_ &= ~kCpuHasMIPS_DSPR2; - } -#elif defined(__arm__) -// gcc -mfpu=neon defines __ARM_NEON__ -// __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon. -// For Linux, /proc/cpuinfo can be tested but without that assume Neon. -#if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__) -#ifdef _ANDROID - cpu_info_ = ArmCpuCaps("/proc/cpuinfo"); // libtheoraplayer #ifdef addition, just in case, android gave us troubles -#else - cpu_info_ = kCpuHasNEON; -#endif -#else - // Linux arm parse text file for neon detect. - cpu_info_ = ArmCpuCaps("/proc/cpuinfo"); -#endif - cpu_info_ |= kCpuHasARM; - if (TestEnv("LIBYUV_DISABLE_NEON")) { - cpu_info_ &= ~kCpuHasNEON; - } -#ifdef _ANDROID - // libtheoraplayer addition to disable NEON support on android devices that don't support it, once again, just in case - if ((android_getCpuFeaturesExt() & ANDROID_CPU_ARM_FEATURE_NEON) == 0) - { - cpu_info_ = kCpuHasARM; - } -#endif -#endif // __arm__ - if (TestEnv("LIBYUV_DISABLE_ASM")) { - cpu_info_ = 0; - } - return cpu_info_; -} - -LIBYUV_API -void MaskCpuFlags(int enable_flags) { - cpu_info_ = InitCpuFlags() & enable_flags; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/format_conversion.cc b/drivers/theoraplayer/src/YUV/libyuv/src/format_conversion.cc deleted file mode 100755 index a3daf96a98..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/format_conversion.cc +++ /dev/null @@ -1,552 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/format_conversion.h" - -#include "libyuv/basic_types.h" -#include "libyuv/cpu_id.h" -#include "libyuv/video_common.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// generate a selector mask useful for pshufb -static uint32 GenerateSelector(int select0, int select1) { - return (uint32)(select0) | - (uint32)((select1 + 4) << 8) | - (uint32)((select0 + 8) << 16) | - (uint32)((select1 + 12) << 24); -} - -static int MakeSelectors(const int blue_index, - const int green_index, - const int red_index, - uint32 dst_fourcc_bayer, - uint32* index_map) { - // Now build a lookup table containing the indices for the four pixels in each - // 2x2 Bayer grid. - switch (dst_fourcc_bayer) { - case FOURCC_BGGR: - index_map[0] = GenerateSelector(blue_index, green_index); - index_map[1] = GenerateSelector(green_index, red_index); - break; - case FOURCC_GBRG: - index_map[0] = GenerateSelector(green_index, blue_index); - index_map[1] = GenerateSelector(red_index, green_index); - break; - case FOURCC_RGGB: - index_map[0] = GenerateSelector(red_index, green_index); - index_map[1] = GenerateSelector(green_index, blue_index); - break; - case FOURCC_GRBG: - index_map[0] = GenerateSelector(green_index, red_index); - index_map[1] = GenerateSelector(blue_index, green_index); - break; - default: - return -1; // Bad FourCC - } - return 0; -} - -// Converts 32 bit ARGB to Bayer RGB formats. -LIBYUV_API -int ARGBToBayer(const uint8* src_argb, int src_stride_argb, - uint8* dst_bayer, int dst_stride_bayer, - int width, int height, - uint32 dst_fourcc_bayer) { - int y; - const int blue_index = 0; // Offsets for ARGB format - const int green_index = 1; - const int red_index = 2; - uint32 index_map[2]; - void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) = ARGBToBayerRow_C; - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } -#if defined(HAS_ARGBTOBAYERROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerRow_SSSE3; - } - } -#elif defined(HAS_ARGBTOBAYERROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToBayerRow = ARGBToBayerRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerRow_NEON; - } - } -#endif - if (MakeSelectors(blue_index, green_index, red_index, - dst_fourcc_bayer, index_map)) { - return -1; // Bad FourCC - } - - for (y = 0; y < height; ++y) { - ARGBToBayerRow(src_argb, dst_bayer, index_map[y & 1], width); - src_argb += src_stride_argb; - dst_bayer += dst_stride_bayer; - } - return 0; -} - -#define AVG(a, b) (((a) + (b)) >> 1) - -static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer, - uint8* dst_argb, int pix) { - const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; - uint8 g = src_bayer0[1]; - uint8 r = src_bayer1[1]; - int x; - for (x = 0; x < pix - 2; x += 2) { - dst_argb[0] = src_bayer0[0]; - dst_argb[1] = AVG(g, src_bayer0[1]); - dst_argb[2] = AVG(r, src_bayer1[1]); - dst_argb[3] = 255U; - dst_argb[4] = AVG(src_bayer0[0], src_bayer0[2]); - dst_argb[5] = src_bayer0[1]; - dst_argb[6] = src_bayer1[1]; - dst_argb[7] = 255U; - g = src_bayer0[1]; - r = src_bayer1[1]; - src_bayer0 += 2; - src_bayer1 += 2; - dst_argb += 8; - } - dst_argb[0] = src_bayer0[0]; - dst_argb[1] = AVG(g, src_bayer0[1]); - dst_argb[2] = AVG(r, src_bayer1[1]); - dst_argb[3] = 255U; - if (!(pix & 1)) { - dst_argb[4] = src_bayer0[0]; - dst_argb[5] = src_bayer0[1]; - dst_argb[6] = src_bayer1[1]; - dst_argb[7] = 255U; - } -} - -static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer, - uint8* dst_argb, int pix) { - const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; - uint8 g = src_bayer0[1]; - uint8 b = src_bayer1[1]; - int x; - for (x = 0; x < pix - 2; x += 2) { - dst_argb[0] = AVG(b, src_bayer1[1]); - dst_argb[1] = AVG(g, src_bayer0[1]); - dst_argb[2] = src_bayer0[0]; - dst_argb[3] = 255U; - dst_argb[4] = src_bayer1[1]; - dst_argb[5] = src_bayer0[1]; - dst_argb[6] = AVG(src_bayer0[0], src_bayer0[2]); - dst_argb[7] = 255U; - g = src_bayer0[1]; - b = src_bayer1[1]; - src_bayer0 += 2; - src_bayer1 += 2; - dst_argb += 8; - } - dst_argb[0] = AVG(b, src_bayer1[1]); - dst_argb[1] = AVG(g, src_bayer0[1]); - dst_argb[2] = src_bayer0[0]; - dst_argb[3] = 255U; - if (!(pix & 1)) { - dst_argb[4] = src_bayer1[1]; - dst_argb[5] = src_bayer0[1]; - dst_argb[6] = src_bayer0[0]; - dst_argb[7] = 255U; - } -} - -static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer, - uint8* dst_argb, int pix) { - const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; - uint8 b = src_bayer0[1]; - int x; - for (x = 0; x < pix - 2; x += 2) { - dst_argb[0] = AVG(b, src_bayer0[1]); - dst_argb[1] = src_bayer0[0]; - dst_argb[2] = src_bayer1[0]; - dst_argb[3] = 255U; - dst_argb[4] = src_bayer0[1]; - dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]); - dst_argb[6] = AVG(src_bayer1[0], src_bayer1[2]); - dst_argb[7] = 255U; - b = src_bayer0[1]; - src_bayer0 += 2; - src_bayer1 += 2; - dst_argb += 8; - } - dst_argb[0] = AVG(b, src_bayer0[1]); - dst_argb[1] = src_bayer0[0]; - dst_argb[2] = src_bayer1[0]; - dst_argb[3] = 255U; - if (!(pix & 1)) { - dst_argb[4] = src_bayer0[1]; - dst_argb[5] = src_bayer0[0]; - dst_argb[6] = src_bayer1[0]; - dst_argb[7] = 255U; - } -} - -static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer, - uint8* dst_argb, int pix) { - const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; - uint8 r = src_bayer0[1]; - int x; - for (x = 0; x < pix - 2; x += 2) { - dst_argb[0] = src_bayer1[0]; - dst_argb[1] = src_bayer0[0]; - dst_argb[2] = AVG(r, src_bayer0[1]); - dst_argb[3] = 255U; - dst_argb[4] = AVG(src_bayer1[0], src_bayer1[2]); - dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]); - dst_argb[6] = src_bayer0[1]; - dst_argb[7] = 255U; - r = src_bayer0[1]; - src_bayer0 += 2; - src_bayer1 += 2; - dst_argb += 8; - } - dst_argb[0] = src_bayer1[0]; - dst_argb[1] = src_bayer0[0]; - dst_argb[2] = AVG(r, src_bayer0[1]); - dst_argb[3] = 255U; - if (!(pix & 1)) { - dst_argb[4] = src_bayer1[0]; - dst_argb[5] = src_bayer0[0]; - dst_argb[6] = src_bayer0[1]; - dst_argb[7] = 255U; - } -} - -// Converts any Bayer RGB format to ARGB. -LIBYUV_API -int BayerToARGB(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_argb, int dst_stride_argb, - int width, int height, - uint32 src_fourcc_bayer) { - int y; - void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_argb, int pix); - void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_argb, int pix); - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - switch (src_fourcc_bayer) { - case FOURCC_BGGR: - BayerRow0 = BayerRowBG; - BayerRow1 = BayerRowGR; - break; - case FOURCC_GBRG: - BayerRow0 = BayerRowGB; - BayerRow1 = BayerRowRG; - break; - case FOURCC_GRBG: - BayerRow0 = BayerRowGR; - BayerRow1 = BayerRowBG; - break; - case FOURCC_RGGB: - BayerRow0 = BayerRowRG; - BayerRow1 = BayerRowGB; - break; - default: - return -1; // Bad FourCC - } - - for (y = 0; y < height - 1; y += 2) { - BayerRow0(src_bayer, src_stride_bayer, dst_argb, width); - BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer, - dst_argb + dst_stride_argb, width); - src_bayer += src_stride_bayer * 2; - dst_argb += dst_stride_argb * 2; - } - if (height & 1) { - BayerRow0(src_bayer, src_stride_bayer, dst_argb, width); - } - return 0; -} - -// Converts any Bayer RGB format to ARGB. -LIBYUV_API -int BayerToI420(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height, - uint32 src_fourcc_bayer) { - void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_argb, int pix); - void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer, - uint8* dst_argb, int pix); - - void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C; - void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) = - ARGBToYRow_C; - // Negative height means invert the image. - if (height < 0) { - int halfheight; - height = -height; - halfheight = (height + 1) >> 1; - dst_y = dst_y + (height - 1) * dst_stride_y; - dst_u = dst_u + (halfheight - 1) * dst_stride_u; - dst_v = dst_v + (halfheight - 1) * dst_stride_v; - dst_stride_y = -dst_stride_y; - dst_stride_u = -dst_stride_u; - dst_stride_v = -dst_stride_v; - } -#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_SSSE3; - ARGBToYRow = ARGBToYRow_Any_SSSE3; - if (IS_ALIGNED(width, 16)) { - ARGBToYRow = ARGBToYRow_Unaligned_SSSE3; - ARGBToUVRow = ARGBToUVRow_SSSE3; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - ARGBToYRow = ARGBToYRow_SSSE3; - } - } - } -#elif defined(HAS_ARGBTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToYRow = ARGBToYRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToYRow = ARGBToYRow_NEON; - } - if (width >= 16) { - ARGBToUVRow = ARGBToUVRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - ARGBToUVRow = ARGBToUVRow_NEON; - } - } - } -#endif - - switch (src_fourcc_bayer) { - case FOURCC_BGGR: - BayerRow0 = BayerRowBG; - BayerRow1 = BayerRowGR; - break; - case FOURCC_GBRG: - BayerRow0 = BayerRowGB; - BayerRow1 = BayerRowRG; - break; - case FOURCC_GRBG: - BayerRow0 = BayerRowGR; - BayerRow1 = BayerRowBG; - break; - case FOURCC_RGGB: - BayerRow0 = BayerRowRG; - BayerRow1 = BayerRowGB; - break; - default: - return -1; // Bad FourCC - } - - { - // Allocate 2 rows of ARGB. - const int kRowSize = (width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - int y; - for (y = 0; y < height - 1; y += 2) { - BayerRow0(src_bayer, src_stride_bayer, row, width); - BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer, - row + kRowSize, width); - ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); - ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); - src_bayer += src_stride_bayer * 2; - dst_y += dst_stride_y * 2; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - if (height & 1) { - BayerRow0(src_bayer, src_stride_bayer, row, width); - ARGBToUVRow(row, 0, dst_u, dst_v, width); - ARGBToYRow(row, dst_y, width); - } - free_aligned_buffer_64(row); - } - return 0; -} - -// Convert I420 to Bayer. -LIBYUV_API -int I420ToBayer(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bayer, int dst_stride_bayer, - int width, int height, - uint32 dst_fourcc_bayer) { - void (*I422ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToARGBRow_C; - void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) = ARGBToBayerRow_C; - const int blue_index = 0; // Offsets for ARGB format - const int green_index = 1; - const int red_index = 2; - uint32 index_map[2]; - // Negative height means invert the image. - if (height < 0) { - int halfheight; - height = -height; - halfheight = (height + 1) >> 1; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (halfheight - 1) * src_stride_u; - src_v = src_v + (halfheight - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } -#if defined(HAS_I422TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToARGBRow = I422ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_SSSE3; - } - } -#endif -#if defined(HAS_I422TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { - I422ToARGBRow = I422ToARGBRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - I422ToARGBRow = I422ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I422TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - I422ToARGBRow = I422ToARGBRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - I422ToARGBRow = I422ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_I422TOARGBROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) { - I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; - } -#endif - -#if defined(HAS_ARGBTOBAYERROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerRow_SSSE3; - } - } -#elif defined(HAS_ARGBTOBAYERROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToBayerRow = ARGBToBayerRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerRow_NEON; - } - } -#endif - - if (MakeSelectors(blue_index, green_index, red_index, - dst_fourcc_bayer, index_map)) { - return -1; // Bad FourCC - } - { - // Allocate a row of ARGB. - align_buffer_64(row, width * 4); - int y; - for (y = 0; y < height; ++y) { - I422ToARGBRow(src_y, src_u, src_v, row, width); - ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width); - dst_bayer += dst_stride_bayer; - src_y += src_stride_y; - if (y & 1) { - src_u += src_stride_u; - src_v += src_stride_v; - } - } - free_aligned_buffer_64(row); - } - return 0; -} - -#define MAKEBAYERFOURCC(BAYER) \ -LIBYUV_API \ -int Bayer##BAYER##ToI420(const uint8* src_bayer, int src_stride_bayer, \ - uint8* dst_y, int dst_stride_y, \ - uint8* dst_u, int dst_stride_u, \ - uint8* dst_v, int dst_stride_v, \ - int width, int height) { \ - return BayerToI420(src_bayer, src_stride_bayer, \ - dst_y, dst_stride_y, \ - dst_u, dst_stride_u, \ - dst_v, dst_stride_v, \ - width, height, \ - FOURCC_##BAYER); \ -} \ - \ -LIBYUV_API \ -int I420ToBayer##BAYER(const uint8* src_y, int src_stride_y, \ - const uint8* src_u, int src_stride_u, \ - const uint8* src_v, int src_stride_v, \ - uint8* dst_bayer, int dst_stride_bayer, \ - int width, int height) { \ - return I420ToBayer(src_y, src_stride_y, \ - src_u, src_stride_u, \ - src_v, src_stride_v, \ - dst_bayer, dst_stride_bayer, \ - width, height, \ - FOURCC_##BAYER); \ -} \ - \ -LIBYUV_API \ -int ARGBToBayer##BAYER(const uint8* src_argb, int src_stride_argb, \ - uint8* dst_bayer, int dst_stride_bayer, \ - int width, int height) { \ - return ARGBToBayer(src_argb, src_stride_argb, \ - dst_bayer, dst_stride_bayer, \ - width, height, \ - FOURCC_##BAYER); \ -} \ - \ -LIBYUV_API \ -int Bayer##BAYER##ToARGB(const uint8* src_bayer, int src_stride_bayer, \ - uint8* dst_argb, int dst_stride_argb, \ - int width, int height) { \ - return BayerToARGB(src_bayer, src_stride_bayer, \ - dst_argb, dst_stride_argb, \ - width, height, \ - FOURCC_##BAYER); \ -} - -MAKEBAYERFOURCC(BGGR) -MAKEBAYERFOURCC(GBRG) -MAKEBAYERFOURCC(GRBG) -MAKEBAYERFOURCC(RGGB) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_decoder.cc b/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_decoder.cc deleted file mode 100755 index 193b829ba9..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_decoder.cc +++ /dev/null @@ -1,558 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/mjpeg_decoder.h" - -#ifdef HAVE_JPEG -#include <assert.h> - -#if !defined(__pnacl__) && !defined(__CLR_VER) && !defined(COVERAGE_ENABLED) &&\ - !defined(TARGET_IPHONE_SIMULATOR) -// Must be included before jpeglib. -#include <setjmp.h> -#define HAVE_SETJMP -#endif -struct FILE; // For jpeglib.h. - -// C++ build requires extern C for jpeg internals. -#ifdef __cplusplus -extern "C" { -#endif - -#include <jpeglib.h> - -#ifdef __cplusplus -} // extern "C" -#endif - -#include "libyuv/planar_functions.h" // For CopyPlane(). - -namespace libyuv { - -#ifdef HAVE_SETJMP -struct SetJmpErrorMgr { - jpeg_error_mgr base; // Must be at the top - jmp_buf setjmp_buffer; -}; -#endif - -const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN; -const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE; -const int MJpegDecoder::kColorSpaceRgb = JCS_RGB; -const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr; -const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK; -const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK; - -MJpegDecoder::MJpegDecoder() - : has_scanline_padding_(LIBYUV_FALSE), - num_outbufs_(0), - scanlines_(NULL), - scanlines_sizes_(NULL), - databuf_(NULL), - databuf_strides_(NULL) { - decompress_struct_ = new jpeg_decompress_struct; - source_mgr_ = new jpeg_source_mgr; -#ifdef HAVE_SETJMP - error_mgr_ = new SetJmpErrorMgr; - decompress_struct_->err = jpeg_std_error(&error_mgr_->base); - // Override standard exit()-based error handler. - error_mgr_->base.error_exit = &ErrorHandler; -#endif - decompress_struct_->client_data = NULL; - source_mgr_->init_source = &init_source; - source_mgr_->fill_input_buffer = &fill_input_buffer; - source_mgr_->skip_input_data = &skip_input_data; - source_mgr_->resync_to_restart = &jpeg_resync_to_restart; - source_mgr_->term_source = &term_source; - jpeg_create_decompress(decompress_struct_); - decompress_struct_->src = source_mgr_; - buf_vec_.buffers = &buf_; - buf_vec_.len = 1; -} - -MJpegDecoder::~MJpegDecoder() { - jpeg_destroy_decompress(decompress_struct_); - delete decompress_struct_; - delete source_mgr_; -#ifdef HAVE_SETJMP - delete error_mgr_; -#endif - DestroyOutputBuffers(); -} - -LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) { - if (!ValidateJpeg(src, src_len)) { - return LIBYUV_FALSE; - } - - buf_.data = src; - buf_.len = (int)(src_len); - buf_vec_.pos = 0; - decompress_struct_->client_data = &buf_vec_; -#ifdef HAVE_SETJMP - if (setjmp(error_mgr_->setjmp_buffer)) { - // We called jpeg_read_header, it experienced an error, and we called - // longjmp() and rewound the stack to here. Return error. - return LIBYUV_FALSE; - } -#endif - if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) { - // ERROR: Bad MJPEG header - return LIBYUV_FALSE; - } - AllocOutputBuffers(GetNumComponents()); - for (int i = 0; i < num_outbufs_; ++i) { - int scanlines_size = GetComponentScanlinesPerImcuRow(i); - if (scanlines_sizes_[i] != scanlines_size) { - if (scanlines_[i]) { - delete scanlines_[i]; - } - scanlines_[i] = new uint8* [scanlines_size]; - scanlines_sizes_[i] = scanlines_size; - } - - // We allocate padding for the final scanline to pad it up to DCTSIZE bytes - // to avoid memory errors, since jpeglib only reads full MCUs blocks. For - // the preceding scanlines, the padding is not needed/wanted because the - // following addresses will already be valid (they are the initial bytes of - // the next scanline) and will be overwritten when jpeglib writes out that - // next scanline. - int databuf_stride = GetComponentStride(i); - int databuf_size = scanlines_size * databuf_stride; - if (databuf_strides_[i] != databuf_stride) { - if (databuf_[i]) { - delete databuf_[i]; - } - databuf_[i] = new uint8[databuf_size]; - databuf_strides_[i] = databuf_stride; - } - - if (GetComponentStride(i) != GetComponentWidth(i)) { - has_scanline_padding_ = LIBYUV_TRUE; - } - } - return LIBYUV_TRUE; -} - -static int DivideAndRoundUp(int numerator, int denominator) { - return (numerator + denominator - 1) / denominator; -} - -static int DivideAndRoundDown(int numerator, int denominator) { - return numerator / denominator; -} - -// Returns width of the last loaded frame. -int MJpegDecoder::GetWidth() { - return decompress_struct_->image_width; -} - -// Returns height of the last loaded frame. -int MJpegDecoder::GetHeight() { - return decompress_struct_->image_height; -} - -// Returns format of the last loaded frame. The return value is one of the -// kColorSpace* constants. -int MJpegDecoder::GetColorSpace() { - return decompress_struct_->jpeg_color_space; -} - -// Number of color components in the color space. -int MJpegDecoder::GetNumComponents() { - return decompress_struct_->num_components; -} - -// Sample factors of the n-th component. -int MJpegDecoder::GetHorizSampFactor(int component) { - return decompress_struct_->comp_info[component].h_samp_factor; -} - -int MJpegDecoder::GetVertSampFactor(int component) { - return decompress_struct_->comp_info[component].v_samp_factor; -} - -int MJpegDecoder::GetHorizSubSampFactor(int component) { - return decompress_struct_->max_h_samp_factor / - GetHorizSampFactor(component); -} - -int MJpegDecoder::GetVertSubSampFactor(int component) { - return decompress_struct_->max_v_samp_factor / - GetVertSampFactor(component); -} - -int MJpegDecoder::GetImageScanlinesPerImcuRow() { - return decompress_struct_->max_v_samp_factor * DCTSIZE; -} - -int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) { - int vs = GetVertSubSampFactor(component); - return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs); -} - -int MJpegDecoder::GetComponentWidth(int component) { - int hs = GetHorizSubSampFactor(component); - return DivideAndRoundUp(GetWidth(), hs); -} - -int MJpegDecoder::GetComponentHeight(int component) { - int vs = GetVertSubSampFactor(component); - return DivideAndRoundUp(GetHeight(), vs); -} - -// Get width in bytes padded out to a multiple of DCTSIZE -int MJpegDecoder::GetComponentStride(int component) { - return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1); -} - -int MJpegDecoder::GetComponentSize(int component) { - return GetComponentWidth(component) * GetComponentHeight(component); -} - -LIBYUV_BOOL MJpegDecoder::UnloadFrame() { -#ifdef HAVE_SETJMP - if (setjmp(error_mgr_->setjmp_buffer)) { - // We called jpeg_abort_decompress, it experienced an error, and we called - // longjmp() and rewound the stack to here. Return error. - return LIBYUV_FALSE; - } -#endif - jpeg_abort_decompress(decompress_struct_); - return LIBYUV_TRUE; -} - -// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height. -LIBYUV_BOOL MJpegDecoder::DecodeToBuffers( - uint8** planes, int dst_width, int dst_height) { - if (dst_width != GetWidth() || - dst_height > GetHeight()) { - // ERROR: Bad dimensions - return LIBYUV_FALSE; - } -#ifdef HAVE_SETJMP - if (setjmp(error_mgr_->setjmp_buffer)) { - // We called into jpeglib, it experienced an error sometime during this - // function call, and we called longjmp() and rewound the stack to here. - // Return error. - return LIBYUV_FALSE; - } -#endif - if (!StartDecode()) { - return LIBYUV_FALSE; - } - SetScanlinePointers(databuf_); - int lines_left = dst_height; - // Compute amount of lines to skip to implement vertical crop. - // TODO(fbarchard): Ensure skip is a multiple of maximum component - // subsample. ie 2 - int skip = (GetHeight() - dst_height) / 2; - if (skip > 0) { - // There is no API to skip lines in the output data, so we read them - // into the temp buffer. - while (skip >= GetImageScanlinesPerImcuRow()) { - if (!DecodeImcuRow()) { - FinishDecode(); - return LIBYUV_FALSE; - } - skip -= GetImageScanlinesPerImcuRow(); - } - if (skip > 0) { - // Have a partial iMCU row left over to skip. Must read it and then - // copy the parts we want into the destination. - if (!DecodeImcuRow()) { - FinishDecode(); - return LIBYUV_FALSE; - } - for (int i = 0; i < num_outbufs_; ++i) { - // TODO(fbarchard): Compute skip to avoid this - assert(skip % GetVertSubSampFactor(i) == 0); - int rows_to_skip = - DivideAndRoundDown(skip, GetVertSubSampFactor(i)); - int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) - - rows_to_skip; - int data_to_skip = rows_to_skip * GetComponentStride(i); - CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i), - planes[i], GetComponentWidth(i), - GetComponentWidth(i), scanlines_to_copy); - planes[i] += scanlines_to_copy * GetComponentWidth(i); - } - lines_left -= (GetImageScanlinesPerImcuRow() - skip); - } - } - - // Read full MCUs but cropped horizontally - for (; lines_left > GetImageScanlinesPerImcuRow(); - lines_left -= GetImageScanlinesPerImcuRow()) { - if (!DecodeImcuRow()) { - FinishDecode(); - return LIBYUV_FALSE; - } - for (int i = 0; i < num_outbufs_; ++i) { - int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i); - CopyPlane(databuf_[i], GetComponentStride(i), - planes[i], GetComponentWidth(i), - GetComponentWidth(i), scanlines_to_copy); - planes[i] += scanlines_to_copy * GetComponentWidth(i); - } - } - - if (lines_left > 0) { - // Have a partial iMCU row left over to decode. - if (!DecodeImcuRow()) { - FinishDecode(); - return LIBYUV_FALSE; - } - for (int i = 0; i < num_outbufs_; ++i) { - int scanlines_to_copy = - DivideAndRoundUp(lines_left, GetVertSubSampFactor(i)); - CopyPlane(databuf_[i], GetComponentStride(i), - planes[i], GetComponentWidth(i), - GetComponentWidth(i), scanlines_to_copy); - planes[i] += scanlines_to_copy * GetComponentWidth(i); - } - } - return FinishDecode(); -} - -LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque, - int dst_width, int dst_height) { - if (dst_width != GetWidth() || - dst_height > GetHeight()) { - // ERROR: Bad dimensions - return LIBYUV_FALSE; - } -#ifdef HAVE_SETJMP - if (setjmp(error_mgr_->setjmp_buffer)) { - // We called into jpeglib, it experienced an error sometime during this - // function call, and we called longjmp() and rewound the stack to here. - // Return error. - return LIBYUV_FALSE; - } -#endif - if (!StartDecode()) { - return LIBYUV_FALSE; - } - SetScanlinePointers(databuf_); - int lines_left = dst_height; - // TODO(fbarchard): Compute amount of lines to skip to implement vertical crop - int skip = (GetHeight() - dst_height) / 2; - if (skip > 0) { - while (skip >= GetImageScanlinesPerImcuRow()) { - if (!DecodeImcuRow()) { - FinishDecode(); - return LIBYUV_FALSE; - } - skip -= GetImageScanlinesPerImcuRow(); - } - if (skip > 0) { - // Have a partial iMCU row left over to skip. - if (!DecodeImcuRow()) { - FinishDecode(); - return LIBYUV_FALSE; - } - for (int i = 0; i < num_outbufs_; ++i) { - // TODO(fbarchard): Compute skip to avoid this - assert(skip % GetVertSubSampFactor(i) == 0); - int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i)); - int data_to_skip = rows_to_skip * GetComponentStride(i); - // Change our own data buffer pointers so we can pass them to the - // callback. - databuf_[i] += data_to_skip; - } - int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip; - (*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy); - // Now change them back. - for (int i = 0; i < num_outbufs_; ++i) { - int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i)); - int data_to_skip = rows_to_skip * GetComponentStride(i); - databuf_[i] -= data_to_skip; - } - lines_left -= scanlines_to_copy; - } - } - // Read full MCUs until we get to the crop point. - for (; lines_left >= GetImageScanlinesPerImcuRow(); - lines_left -= GetImageScanlinesPerImcuRow()) { - if (!DecodeImcuRow()) { - FinishDecode(); - return LIBYUV_FALSE; - } - (*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow()); - } - if (lines_left > 0) { - // Have a partial iMCU row left over to decode. - if (!DecodeImcuRow()) { - FinishDecode(); - return LIBYUV_FALSE; - } - (*fn)(opaque, databuf_, databuf_strides_, lines_left); - } - return FinishDecode(); -} - -void MJpegDecoder::init_source(j_decompress_ptr cinfo) { - fill_input_buffer(cinfo); -} - -boolean MJpegDecoder::fill_input_buffer(j_decompress_ptr cinfo) { - BufferVector* buf_vec = (BufferVector*)(cinfo->client_data); - if (buf_vec->pos >= buf_vec->len) { - assert(0 && "No more data"); - // ERROR: No more data - return FALSE; - } - cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data; - cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len; - ++buf_vec->pos; - return TRUE; -} - -void MJpegDecoder::skip_input_data(j_decompress_ptr cinfo, - long num_bytes) { // NOLINT - cinfo->src->next_input_byte += num_bytes; -} - -void MJpegDecoder::term_source(j_decompress_ptr cinfo) { - // Nothing to do. -} - -#ifdef HAVE_SETJMP -void MJpegDecoder::ErrorHandler(j_common_ptr cinfo) { - // This is called when a jpeglib command experiences an error. Unfortunately - // jpeglib's error handling model is not very flexible, because it expects the - // error handler to not return--i.e., it wants the program to terminate. To - // recover from errors we use setjmp() as shown in their example. setjmp() is - // C's implementation for the "call with current continuation" functionality - // seen in some functional programming languages. - // A formatted message can be output, but is unsafe for release. -#ifdef DEBUG - char buf[JMSG_LENGTH_MAX]; - (*cinfo->err->format_message)(cinfo, buf); - // ERROR: Error in jpeglib: buf -#endif - - SetJmpErrorMgr* mgr = (SetJmpErrorMgr*)(cinfo->err); - // This rewinds the call stack to the point of the corresponding setjmp() - // and causes it to return (for a second time) with value 1. - longjmp(mgr->setjmp_buffer, 1); -} -#endif - -void MJpegDecoder::AllocOutputBuffers(int num_outbufs) { - if (num_outbufs != num_outbufs_) { - // We could perhaps optimize this case to resize the output buffers without - // necessarily having to delete and recreate each one, but it's not worth - // it. - DestroyOutputBuffers(); - - scanlines_ = new uint8** [num_outbufs]; - scanlines_sizes_ = new int[num_outbufs]; - databuf_ = new uint8* [num_outbufs]; - databuf_strides_ = new int[num_outbufs]; - - for (int i = 0; i < num_outbufs; ++i) { - scanlines_[i] = NULL; - scanlines_sizes_[i] = 0; - databuf_[i] = NULL; - databuf_strides_[i] = 0; - } - - num_outbufs_ = num_outbufs; - } -} - -void MJpegDecoder::DestroyOutputBuffers() { - for (int i = 0; i < num_outbufs_; ++i) { - delete [] scanlines_[i]; - delete [] databuf_[i]; - } - delete [] scanlines_; - delete [] databuf_; - delete [] scanlines_sizes_; - delete [] databuf_strides_; - scanlines_ = NULL; - databuf_ = NULL; - scanlines_sizes_ = NULL; - databuf_strides_ = NULL; - num_outbufs_ = 0; -} - -// JDCT_IFAST and do_block_smoothing improve performance substantially. -LIBYUV_BOOL MJpegDecoder::StartDecode() { - decompress_struct_->raw_data_out = TRUE; - decompress_struct_->dct_method = JDCT_IFAST; // JDCT_ISLOW is default - decompress_struct_->dither_mode = JDITHER_NONE; - // Not applicable to 'raw': - decompress_struct_->do_fancy_upsampling = LIBYUV_FALSE; - // Only for buffered mode: - decompress_struct_->enable_2pass_quant = LIBYUV_FALSE; - // Blocky but fast: - decompress_struct_->do_block_smoothing = LIBYUV_FALSE; - - if (!jpeg_start_decompress(decompress_struct_)) { - // ERROR: Couldn't start JPEG decompressor"; - return LIBYUV_FALSE; - } - return LIBYUV_TRUE; -} - -LIBYUV_BOOL MJpegDecoder::FinishDecode() { - // jpeglib considers it an error if we finish without decoding the whole - // image, so we call "abort" rather than "finish". - jpeg_abort_decompress(decompress_struct_); - return LIBYUV_TRUE; -} - -void MJpegDecoder::SetScanlinePointers(uint8** data) { - for (int i = 0; i < num_outbufs_; ++i) { - uint8* data_i = data[i]; - for (int j = 0; j < scanlines_sizes_[i]; ++j) { - scanlines_[i][j] = data_i; - data_i += GetComponentStride(i); - } - } -} - -inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() { - return (unsigned int)(GetImageScanlinesPerImcuRow()) == - jpeg_read_raw_data(decompress_struct_, - scanlines_, - GetImageScanlinesPerImcuRow()); -} - -// The helper function which recognizes the jpeg sub-sampling type. -JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper( - int* subsample_x, int* subsample_y, int number_of_components) { - if (number_of_components == 3) { // Color images. - if (subsample_x[0] == 1 && subsample_y[0] == 1 && - subsample_x[1] == 2 && subsample_y[1] == 2 && - subsample_x[2] == 2 && subsample_y[2] == 2) { - return kJpegYuv420; - } else if (subsample_x[0] == 1 && subsample_y[0] == 1 && - subsample_x[1] == 2 && subsample_y[1] == 1 && - subsample_x[2] == 2 && subsample_y[2] == 1) { - return kJpegYuv422; - } else if (subsample_x[0] == 1 && subsample_y[0] == 1 && - subsample_x[1] == 1 && subsample_y[1] == 1 && - subsample_x[2] == 1 && subsample_y[2] == 1) { - return kJpegYuv444; - } - } else if (number_of_components == 1) { // Grey-scale images. - if (subsample_x[0] == 1 && subsample_y[0] == 1) { - return kJpegYuv400; - } - } - return kJpegUnknown; -} - -} // namespace libyuv -#endif // HAVE_JPEG - diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_validate.cc b/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_validate.cc deleted file mode 100755 index 23d22d099b..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/mjpeg_validate.cc +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/mjpeg_decoder.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Helper function to validate the jpeg appears intact. -// TODO(fbarchard): Optimize case where SOI is found but EOI is not. -LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) { - size_t i; - if (sample_size < 64) { - // ERROR: Invalid jpeg size: sample_size - return LIBYUV_FALSE; - } - if (sample[0] != 0xff || sample[1] != 0xd8) { // Start Of Image - // ERROR: Invalid jpeg initial start code - return LIBYUV_FALSE; - } - for (i = sample_size - 2; i > 1;) { - if (sample[i] != 0xd9) { - if (sample[i] == 0xff && sample[i + 1] == 0xd9) { // End Of Image - return LIBYUV_TRUE; // Success: Valid jpeg. - } - --i; - } - --i; - } - // ERROR: Invalid jpeg end code not found. Size sample_size - return LIBYUV_FALSE; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/planar_functions.cc b/drivers/theoraplayer/src/YUV/libyuv/src/planar_functions.cc deleted file mode 100755 index f0a8989051..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/planar_functions.cc +++ /dev/null @@ -1,2238 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/planar_functions.h" - -#include <string.h> // for memset() - -#include "libyuv/cpu_id.h" -#ifdef HAVE_JPEG -#include "libyuv/mjpeg_decoder.h" -#endif -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Copy a plane of data -LIBYUV_API -void CopyPlane(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { - int y; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; - // Coalesce rows. - if (src_stride_y == width && - dst_stride_y == width) { - width *= height; - height = 1; - src_stride_y = dst_stride_y = 0; - } -#if defined(HAS_COPYROW_X86) - if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { - CopyRow = CopyRow_X86; - } -#endif -#if defined(HAS_COPYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - CopyRow = CopyRow_SSE2; - } -#endif -#if defined(HAS_COPYROW_ERMS) - if (TestCpuFlag(kCpuHasERMS)) { - CopyRow = CopyRow_ERMS; - } -#endif -#if defined(HAS_COPYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { - CopyRow = CopyRow_NEON; - } -#endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif - - // Copy plane - for (y = 0; y < height; ++y) { - CopyRow(src_y, dst_y, width); - src_y += src_stride_y; - dst_y += dst_stride_y; - } -} - -// Copy I422. -LIBYUV_API -int I422Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - int halfwidth = (width + 1) >> 1; - if (!src_y || !src_u || !src_v || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (height - 1) * src_stride_u; - src_v = src_v + (height - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); - CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); - return 0; -} - -// Copy I444. -LIBYUV_API -int I444Copy(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - if (!src_y || !src_u || !src_v || - !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (height - 1) * src_stride_u; - src_v = src_v + (height - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height); - CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height); - return 0; -} - -// Copy I400. -LIBYUV_API -int I400ToI400(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { - if (!src_y || !dst_y || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; - } - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - return 0; -} - -// Convert I420 to I400. -LIBYUV_API -int I420ToI400(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - int width, int height) { - if (!src_y || !dst_y || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; - } - CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - return 0; -} - -// Mirror a plane of data. -void MirrorPlane(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { - int y; - void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; - } -#if defined(HAS_MIRRORROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { - MirrorRow = MirrorRow_NEON; - } -#endif -#if defined(HAS_MIRRORROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { - MirrorRow = MirrorRow_SSE2; - } -#endif -#if defined(HAS_MIRRORROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - MirrorRow = MirrorRow_SSSE3; - } -#endif -#if defined(HAS_MIRRORROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) { - MirrorRow = MirrorRow_AVX2; - } -#endif - - // Mirror plane - for (y = 0; y < height; ++y) { - MirrorRow(src_y, dst_y, width); - src_y += src_stride_y; - dst_y += dst_stride_y; - } -} - -// Convert YUY2 to I422. -LIBYUV_API -int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - int y; - void (*YUY2ToUV422Row)(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix) = - YUY2ToUV422Row_C; - void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) = - YUY2ToYRow_C; - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; - src_stride_yuy2 = -src_stride_yuy2; - } - // Coalesce rows. - if (src_stride_yuy2 == width * 2 && - dst_stride_y == width && - dst_stride_u * 2 == width && - dst_stride_v * 2 == width) { - width *= height; - height = 1; - src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0; - } -#if defined(HAS_YUY2TOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; - YUY2ToYRow = YUY2ToYRow_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2; - YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; - if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) { - YUY2ToUV422Row = YUY2ToUV422Row_SSE2; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - YUY2ToYRow = YUY2ToYRow_SSE2; - } - } - } - } -#endif -#if defined(HAS_YUY2TOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2; - YUY2ToYRow = YUY2ToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - YUY2ToUV422Row = YUY2ToUV422Row_AVX2; - YUY2ToYRow = YUY2ToYRow_AVX2; - } - } -#endif -#if defined(HAS_YUY2TOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - YUY2ToYRow = YUY2ToYRow_Any_NEON; - if (width >= 16) { - YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; - } - if (IS_ALIGNED(width, 16)) { - YUY2ToYRow = YUY2ToYRow_NEON; - YUY2ToUV422Row = YUY2ToUV422Row_NEON; - } - } -#endif - - for (y = 0; y < height; ++y) { - YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); - YUY2ToYRow(src_yuy2, dst_y, width); - src_yuy2 += src_stride_yuy2; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - return 0; -} - -// Convert UYVY to I422. -LIBYUV_API -int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - int y; - void (*UYVYToUV422Row)(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix) = - UYVYToUV422Row_C; - void (*UYVYToYRow)(const uint8* src_uyvy, - uint8* dst_y, int pix) = UYVYToYRow_C; - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; - src_stride_uyvy = -src_stride_uyvy; - } - // Coalesce rows. - if (src_stride_uyvy == width * 2 && - dst_stride_y == width && - dst_stride_u * 2 == width && - dst_stride_v * 2 == width) { - width *= height; - height = 1; - src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0; - } -#if defined(HAS_UYVYTOYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { - UYVYToUV422Row = UYVYToUV422Row_Any_SSE2; - UYVYToYRow = UYVYToYRow_Any_SSE2; - if (IS_ALIGNED(width, 16)) { - UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2; - UYVYToYRow = UYVYToYRow_Unaligned_SSE2; - if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) { - UYVYToUV422Row = UYVYToUV422Row_SSE2; - if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - UYVYToYRow = UYVYToYRow_SSE2; - } - } - } - } -#endif -#if defined(HAS_UYVYTOYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { - UYVYToUV422Row = UYVYToUV422Row_Any_AVX2; - UYVYToYRow = UYVYToYRow_Any_AVX2; - if (IS_ALIGNED(width, 32)) { - UYVYToUV422Row = UYVYToUV422Row_AVX2; - UYVYToYRow = UYVYToYRow_AVX2; - } - } -#endif -#if defined(HAS_UYVYTOYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - UYVYToYRow = UYVYToYRow_Any_NEON; - if (width >= 16) { - UYVYToUV422Row = UYVYToUV422Row_Any_NEON; - } - if (IS_ALIGNED(width, 16)) { - UYVYToYRow = UYVYToYRow_NEON; - UYVYToUV422Row = UYVYToUV422Row_NEON; - } - } -#endif - - for (y = 0; y < height; ++y) { - UYVYToUV422Row(src_uyvy, dst_u, dst_v, width); - UYVYToYRow(src_uyvy, dst_y, width); - src_uyvy += src_stride_uyvy; - dst_y += dst_stride_y; - dst_u += dst_stride_u; - dst_v += dst_stride_v; - } - return 0; -} - -// Mirror I400 with optional flipping -LIBYUV_API -int I400Mirror(const uint8* src_y, int src_stride_y, - uint8* dst_y, int dst_stride_y, - int width, int height) { - if (!src_y || !dst_y || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; - } - - MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - return 0; -} - -// Mirror I420 with optional flipping -LIBYUV_API -int I420Mirror(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height) { - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - halfheight = (height + 1) >> 1; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (halfheight - 1) * src_stride_u; - src_v = src_v + (halfheight - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - - if (dst_y) { - MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); - } - MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); - MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); - return 0; -} - -// ARGB mirror. -LIBYUV_API -int ARGBMirror(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = - ARGBMirrorRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - -#if defined(HAS_ARGBMIRRORROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBMirrorRow = ARGBMirrorRow_SSSE3; - } -#endif -#if defined(HAS_ARGBMIRRORROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { - ARGBMirrorRow = ARGBMirrorRow_AVX2; - } -#endif -#if defined(HAS_ARGBMIRRORROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) { - ARGBMirrorRow = ARGBMirrorRow_NEON; - } -#endif - - // Mirror plane - for (y = 0; y < height; ++y) { - ARGBMirrorRow(src_argb, dst_argb, width); - src_argb += src_stride_argb; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Get a blender that optimized for the CPU, alignment and pixel count. -// As there are 6 blenders to choose from, the caller should try to use -// the same blend function for all pixels if possible. -LIBYUV_API -ARGBBlendRow GetARGBBlend() { - void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width) = ARGBBlendRow_C; -#if defined(HAS_ARGBBLENDROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3)) { - ARGBBlendRow = ARGBBlendRow_SSSE3; - return ARGBBlendRow; - } -#endif -#if defined(HAS_ARGBBLENDROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ARGBBlendRow = ARGBBlendRow_SSE2; - } -#endif -#if defined(HAS_ARGBBLENDROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - ARGBBlendRow = ARGBBlendRow_NEON; - } -#endif - return ARGBBlendRow; -} - -// Alpha Blend 2 ARGB images and store to destination. -LIBYUV_API -int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, - uint8* dst_argb, int width) = GetARGBBlend(); - if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_argb0 == width * 4 && - src_stride_argb1 == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; - } - - for (y = 0; y < height; ++y) { - ARGBBlendRow(src_argb0, src_argb1, dst_argb, width); - src_argb0 += src_stride_argb0; - src_argb1 += src_stride_argb1; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Multiply 2 ARGB images and store to destination. -LIBYUV_API -int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst, - int width) = ARGBMultiplyRow_C; - if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_argb0 == width * 4 && - src_stride_argb1 == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; - } -#if defined(HAS_ARGBMULTIPLYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { - ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBMultiplyRow = ARGBMultiplyRow_SSE2; - } - } -#endif -#if defined(HAS_ARGBMULTIPLYROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { - ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBMultiplyRow = ARGBMultiplyRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBMULTIPLYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBMultiplyRow = ARGBMultiplyRow_NEON; - } - } -#endif - - // Multiply plane - for (y = 0; y < height; ++y) { - ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width); - src_argb0 += src_stride_argb0; - src_argb1 += src_stride_argb1; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Add 2 ARGB images and store to destination. -LIBYUV_API -int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst, - int width) = ARGBAddRow_C; - if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_argb0 == width * 4 && - src_stride_argb1 == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; - } -#if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER) - if (TestCpuFlag(kCpuHasSSE2)) { - ARGBAddRow = ARGBAddRow_SSE2; - } -#endif -#if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { - ARGBAddRow = ARGBAddRow_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBAddRow = ARGBAddRow_SSE2; - } - } -#endif -#if defined(HAS_ARGBADDROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { - ARGBAddRow = ARGBAddRow_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBAddRow = ARGBAddRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBADDROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBAddRow = ARGBAddRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBAddRow = ARGBAddRow_NEON; - } - } -#endif - - // Add plane - for (y = 0; y < height; ++y) { - ARGBAddRow(src_argb0, src_argb1, dst_argb, width); - src_argb0 += src_stride_argb0; - src_argb1 += src_stride_argb1; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Subtract 2 ARGB images and store to destination. -LIBYUV_API -int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst, - int width) = ARGBSubtractRow_C; - if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_argb0 == width * 4 && - src_stride_argb1 == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; - } -#if defined(HAS_ARGBSUBTRACTROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { - ARGBSubtractRow = ARGBSubtractRow_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBSubtractRow = ARGBSubtractRow_SSE2; - } - } -#endif -#if defined(HAS_ARGBSUBTRACTROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { - ARGBSubtractRow = ARGBSubtractRow_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBSubtractRow = ARGBSubtractRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBSUBTRACTROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBSubtractRow = ARGBSubtractRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBSubtractRow = ARGBSubtractRow_NEON; - } - } -#endif - - // Subtract plane - for (y = 0; y < height; ++y) { - ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width); - src_argb0 += src_stride_argb0; - src_argb1 += src_stride_argb1; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Convert I422 to BGRA. -LIBYUV_API -int I422ToBGRA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_bgra, int dst_stride_bgra, - int width, int height) { - int y; - void (*I422ToBGRARow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToBGRARow_C; - if (!src_y || !src_u || !src_v || - !dst_bgra || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; - dst_stride_bgra = -dst_stride_bgra; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_bgra == width * 4) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0; - } -#if defined(HAS_I422TOBGRAROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToBGRARow = I422ToBGRARow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToBGRARow = I422ToBGRARow_NEON; - } - } -#elif defined(HAS_I422TOBGRAROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToBGRARow = I422ToBGRARow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) { - I422ToBGRARow = I422ToBGRARow_SSSE3; - } - } - } -#elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) { - I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2; - } -#endif - - for (y = 0; y < height; ++y) { - I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width); - dst_bgra += dst_stride_bgra; - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - } - return 0; -} - -// Convert I422 to ABGR. -LIBYUV_API -int I422ToABGR(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_abgr, int dst_stride_abgr, - int width, int height) { - int y; - void (*I422ToABGRRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToABGRRow_C; - if (!src_y || !src_u || !src_v || - !dst_abgr || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; - dst_stride_abgr = -dst_stride_abgr; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_abgr == width * 4) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0; - } -#if defined(HAS_I422TOABGRROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToABGRRow = I422ToABGRRow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToABGRRow = I422ToABGRRow_NEON; - } - } -#elif defined(HAS_I422TOABGRROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToABGRRow = I422ToABGRRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) { - I422ToABGRRow = I422ToABGRRow_SSSE3; - } - } - } -#endif - - for (y = 0; y < height; ++y) { - I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); - dst_abgr += dst_stride_abgr; - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - } - return 0; -} - -// Convert I422 to RGBA. -LIBYUV_API -int I422ToRGBA(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_rgba, int dst_stride_rgba, - int width, int height) { - int y; - void (*I422ToRGBARow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToRGBARow_C; - if (!src_y || !src_u || !src_v || - !dst_rgba || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; - dst_stride_rgba = -dst_stride_rgba; - } - // Coalesce rows. - if (src_stride_y == width && - src_stride_u * 2 == width && - src_stride_v * 2 == width && - dst_stride_rgba == width * 4) { - width *= height; - height = 1; - src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0; - } -#if defined(HAS_I422TORGBAROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - I422ToRGBARow = I422ToRGBARow_Any_NEON; - if (IS_ALIGNED(width, 16)) { - I422ToRGBARow = I422ToRGBARow_NEON; - } - } -#elif defined(HAS_I422TORGBAROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - I422ToRGBARow = I422ToRGBARow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) { - I422ToRGBARow = I422ToRGBARow_SSSE3; - } - } - } -#endif - - for (y = 0; y < height; ++y) { - I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width); - dst_rgba += dst_stride_rgba; - src_y += src_stride_y; - src_u += src_stride_u; - src_v += src_stride_v; - } - return 0; -} - -// Convert NV12 to RGB565. -LIBYUV_API -int NV12ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height) { - int y; - void (*NV12ToRGB565Row)(const uint8* y_buf, - const uint8* uv_buf, - uint8* rgb_buf, - int width) = NV12ToRGB565Row_C; - if (!src_y || !src_uv || !dst_rgb565 || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; - dst_stride_rgb565 = -dst_stride_rgb565; - } -#if defined(HAS_NV12TORGB565ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - NV12ToRGB565Row = NV12ToRGB565Row_SSSE3; - } - } -#elif defined(HAS_NV12TORGB565ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - NV12ToRGB565Row = NV12ToRGB565Row_NEON; - } - } -#endif - - for (y = 0; y < height; ++y) { - NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width); - dst_rgb565 += dst_stride_rgb565; - src_y += src_stride_y; - if (y & 1) { - src_uv += src_stride_uv; - } - } - return 0; -} - -// Convert NV21 to RGB565. -LIBYUV_API -int NV21ToRGB565(const uint8* src_y, int src_stride_y, - const uint8* src_vu, int src_stride_vu, - uint8* dst_rgb565, int dst_stride_rgb565, - int width, int height) { - int y; - void (*NV21ToRGB565Row)(const uint8* y_buf, - const uint8* src_vu, - uint8* rgb_buf, - int width) = NV21ToRGB565Row_C; - if (!src_y || !src_vu || !dst_rgb565 || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; - dst_stride_rgb565 = -dst_stride_rgb565; - } -#if defined(HAS_NV21TORGB565ROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - NV21ToRGB565Row = NV21ToRGB565Row_SSSE3; - } - } -#elif defined(HAS_NV21TORGB565ROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON; - if (IS_ALIGNED(width, 8)) { - NV21ToRGB565Row = NV21ToRGB565Row_NEON; - } - } -#endif - - for (y = 0; y < height; ++y) { - NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width); - dst_rgb565 += dst_stride_rgb565; - src_y += src_stride_y; - if (y & 1) { - src_vu += src_stride_vu; - } - } - return 0; -} - -LIBYUV_API -void SetPlane(uint8* dst_y, int dst_stride_y, - int width, int height, - uint32 value) { - int y; - uint32 v32 = value | (value << 8) | (value << 16) | (value << 24); - void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C; - // Coalesce rows. - if (dst_stride_y == width) { - width *= height; - height = 1; - dst_stride_y = 0; - } -#if defined(HAS_SETROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && - IS_ALIGNED(width, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - SetRow = SetRow_NEON; - } -#endif -#if defined(HAS_SETROW_X86) - if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { - SetRow = SetRow_X86; - } -#endif - - // Set plane - for (y = 0; y < height; ++y) { - SetRow(dst_y, v32, width); - dst_y += dst_stride_y; - } -} - -// Draw a rectangle into I420 -LIBYUV_API -int I420Rect(uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int x, int y, - int width, int height, - int value_y, int value_u, int value_v) { - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - uint8* start_y = dst_y + y * dst_stride_y + x; - uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); - uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); - if (!dst_y || !dst_u || !dst_v || - width <= 0 || height <= 0 || - x < 0 || y < 0 || - value_y < 0 || value_y > 255 || - value_u < 0 || value_u > 255 || - value_v < 0 || value_v > 255) { - return -1; - } - - SetPlane(start_y, dst_stride_y, width, height, value_y); - SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u); - SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v); - return 0; -} - -// Draw a rectangle into ARGB -LIBYUV_API -int ARGBRect(uint8* dst_argb, int dst_stride_argb, - int dst_x, int dst_y, - int width, int height, - uint32 value) { - if (!dst_argb || - width <= 0 || height <= 0 || - dst_x < 0 || dst_y < 0) { - return -1; - } - dst_argb += dst_y * dst_stride_argb + dst_x * 4; - // Coalesce rows. - if (dst_stride_argb == width * 4) { - width *= height; - height = 1; - dst_stride_argb = 0; - } -#if defined(HAS_SETROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height); - return 0; - } -#endif -#if defined(HAS_SETROW_X86) - if (TestCpuFlag(kCpuHasX86)) { - ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height); - return 0; - } -#endif - ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height); - return 0; -} - -// Convert unattentuated ARGB to preattenuated ARGB. -// An unattenutated ARGB alpha blend uses the formula -// p = a * f + (1 - a) * b -// where -// p is output pixel -// f is foreground pixel -// b is background pixel -// a is alpha value from foreground pixel -// An preattenutated ARGB alpha blend uses the formula -// p = f + (1 - a) * b -// where -// f is foreground pixel premultiplied by alpha - -LIBYUV_API -int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, - int width) = ARGBAttenuateRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb = dst_stride_argb = 0; - } -#if defined(HAS_ARGBATTENUATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBAttenuateRow = ARGBAttenuateRow_SSE2; - } - } -#endif -#if defined(HAS_ARGBATTENUATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; - if (IS_ALIGNED(width, 4)) { - ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBATTENUATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBATTENUATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBAttenuateRow = ARGBAttenuateRow_NEON; - } - } -#endif - - for (y = 0; y < height; ++y) { - ARGBAttenuateRow(src_argb, dst_argb, width); - src_argb += src_stride_argb; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Convert preattentuated ARGB to unattenuated ARGB. -LIBYUV_API -int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, - int width) = ARGBUnattenuateRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb = dst_stride_argb = 0; - } -#if defined(HAS_ARGBUNATTENUATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { - ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; - } - } -#endif -#if defined(HAS_ARGBUNATTENUATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { - ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2; - } - } -#endif -// TODO(fbarchard): Neon version. - - for (y = 0; y < height; ++y) { - ARGBUnattenuateRow(src_argb, dst_argb, width); - src_argb += src_stride_argb; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Convert ARGB to Grayed ARGB. -LIBYUV_API -int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, - int width) = ARGBGrayRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb = dst_stride_argb = 0; - } -#if defined(HAS_ARGBGRAYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBGrayRow = ARGBGrayRow_SSSE3; - } -#elif defined(HAS_ARGBGRAYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - ARGBGrayRow = ARGBGrayRow_NEON; - } -#endif - - for (y = 0; y < height; ++y) { - ARGBGrayRow(src_argb, dst_argb, width); - src_argb += src_stride_argb; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Make a rectangle of ARGB gray scale. -LIBYUV_API -int ARGBGray(uint8* dst_argb, int dst_stride_argb, - int dst_x, int dst_y, - int width, int height) { - int y; - void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, - int width) = ARGBGrayRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; - if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { - return -1; - } - // Coalesce rows. - if (dst_stride_argb == width * 4) { - width *= height; - height = 1; - dst_stride_argb = 0; - } -#if defined(HAS_ARGBGRAYROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBGrayRow = ARGBGrayRow_SSSE3; - } -#elif defined(HAS_ARGBGRAYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - ARGBGrayRow = ARGBGrayRow_NEON; - } -#endif - for (y = 0; y < height; ++y) { - ARGBGrayRow(dst, dst, width); - dst += dst_stride_argb; - } - return 0; -} - -// Make a rectangle of ARGB Sepia tone. -LIBYUV_API -int ARGBSepia(uint8* dst_argb, int dst_stride_argb, - int dst_x, int dst_y, int width, int height) { - int y; - void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; - if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { - return -1; - } - // Coalesce rows. - if (dst_stride_argb == width * 4) { - width *= height; - height = 1; - dst_stride_argb = 0; - } -#if defined(HAS_ARGBSEPIAROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBSepiaRow = ARGBSepiaRow_SSSE3; - } -#elif defined(HAS_ARGBSEPIAROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - ARGBSepiaRow = ARGBSepiaRow_NEON; - } -#endif - for (y = 0; y < height; ++y) { - ARGBSepiaRow(dst, width); - dst += dst_stride_argb; - } - return 0; -} - -// Apply a 4x4 matrix to each ARGB pixel. -// Note: Normally for shading, but can be used to swizzle or invert. -LIBYUV_API -int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const int8* matrix_argb, - int width, int height) { - int y; - void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) = ARGBColorMatrixRow_C; - if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb = dst_stride_argb = 0; - } -#if defined(HAS_ARGBCOLORMATRIXROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3; - } -#elif defined(HAS_ARGBCOLORMATRIXROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - ARGBColorMatrixRow = ARGBColorMatrixRow_NEON; - } -#endif - for (y = 0; y < height; ++y) { - ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); - src_argb += src_stride_argb; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Apply a 4x3 matrix to each ARGB pixel. -// Deprecated. -LIBYUV_API -int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, - const int8* matrix_rgb, - int dst_x, int dst_y, int width, int height) { - SIMD_ALIGNED(int8 matrix_argb[16]); - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; - if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || - dst_x < 0 || dst_y < 0) { - return -1; - } - - // Convert 4x3 7 bit matrix to 4x4 6 bit matrix. - matrix_argb[0] = matrix_rgb[0] / 2; - matrix_argb[1] = matrix_rgb[1] / 2; - matrix_argb[2] = matrix_rgb[2] / 2; - matrix_argb[3] = matrix_rgb[3] / 2; - matrix_argb[4] = matrix_rgb[4] / 2; - matrix_argb[5] = matrix_rgb[5] / 2; - matrix_argb[6] = matrix_rgb[6] / 2; - matrix_argb[7] = matrix_rgb[7] / 2; - matrix_argb[8] = matrix_rgb[8] / 2; - matrix_argb[9] = matrix_rgb[9] / 2; - matrix_argb[10] = matrix_rgb[10] / 2; - matrix_argb[11] = matrix_rgb[11] / 2; - matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0; - matrix_argb[15] = 64; // 1.0 - - return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, - dst, dst_stride_argb, - &matrix_argb[0], width, height); -} - -// Apply a color table each ARGB pixel. -// Table contains 256 ARGB values. -LIBYUV_API -int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int dst_x, int dst_y, int width, int height) { - int y; - void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, - int width) = ARGBColorTableRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; - if (!dst_argb || !table_argb || width <= 0 || height <= 0 || - dst_x < 0 || dst_y < 0) { - return -1; - } - // Coalesce rows. - if (dst_stride_argb == width * 4) { - width *= height; - height = 1; - dst_stride_argb = 0; - } -#if defined(HAS_ARGBCOLORTABLEROW_X86) - if (TestCpuFlag(kCpuHasX86)) { - ARGBColorTableRow = ARGBColorTableRow_X86; - } -#endif - for (y = 0; y < height; ++y) { - ARGBColorTableRow(dst, table_argb, width); - dst += dst_stride_argb; - } - return 0; -} - -// Apply a color table each ARGB pixel but preserve destination alpha. -// Table contains 256 ARGB values. -LIBYUV_API -int RGBColorTable(uint8* dst_argb, int dst_stride_argb, - const uint8* table_argb, - int dst_x, int dst_y, int width, int height) { - int y; - void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, - int width) = RGBColorTableRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; - if (!dst_argb || !table_argb || width <= 0 || height <= 0 || - dst_x < 0 || dst_y < 0) { - return -1; - } - // Coalesce rows. - if (dst_stride_argb == width * 4) { - width *= height; - height = 1; - dst_stride_argb = 0; - } -#if defined(HAS_RGBCOLORTABLEROW_X86) - if (TestCpuFlag(kCpuHasX86)) { - RGBColorTableRow = RGBColorTableRow_X86; - } -#endif - for (y = 0; y < height; ++y) { - RGBColorTableRow(dst, table_argb, width); - dst += dst_stride_argb; - } - return 0; -} - -// ARGBQuantize is used to posterize art. -// e.g. rgb / qvalue * qvalue + qvalue / 2 -// But the low levels implement efficiently with 3 parameters, and could be -// used for other high level operations. -// dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; -// where scale is 1 / interval_size as a fixed point value. -// The divide is replaces with a multiply by reciprocal fixed point multiply. -// Caveat - although SSE2 saturates, the C function does not and should be used -// with care if doing anything but quantization. -LIBYUV_API -int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, - int scale, int interval_size, int interval_offset, - int dst_x, int dst_y, int width, int height) { - int y; - void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width) = ARGBQuantizeRow_C; - uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; - if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || - interval_size < 1 || interval_size > 255) { - return -1; - } - // Coalesce rows. - if (dst_stride_argb == width * 4) { - width *= height; - height = 1; - dst_stride_argb = 0; - } -#if defined(HAS_ARGBQUANTIZEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBQuantizeRow = ARGBQuantizeRow_SSE2; - } -#elif defined(HAS_ARGBQUANTIZEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - ARGBQuantizeRow = ARGBQuantizeRow_NEON; - } -#endif - for (y = 0; y < height; ++y) { - ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width); - dst += dst_stride_argb; - } - return 0; -} - -// Computes table of cumulative sum for image where the value is the sum -// of all values above and to the left of the entry. Used by ARGBBlur. -LIBYUV_API -int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, - int32* dst_cumsum, int dst_stride32_cumsum, - int width, int height) { - int y; - void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; - int32* previous_cumsum = dst_cumsum; - if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) { - return -1; - } -#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; - } -#endif - memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel. - for (y = 0; y < height; ++y) { - ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width); - previous_cumsum = dst_cumsum; - dst_cumsum += dst_stride32_cumsum; - src_argb += src_stride_argb; - } - return 0; -} - -// Blur ARGB image. -// Caller should allocate CumulativeSum table of width * height * 16 bytes -// aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory -// as the buffer is treated as circular. -LIBYUV_API -int ARGBBlur(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int32* dst_cumsum, int dst_stride32_cumsum, - int width, int height, int radius) { - int y; - void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum, - const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; - void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C; - int32* cumsum_bot_row; - int32* max_cumsum_bot_row; - int32* cumsum_top_row; - - if (!src_argb || !dst_argb || width <= 0 || height == 0) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - if (radius > height) { - radius = height; - } - if (radius > (width / 2 - 1)) { - radius = width / 2 - 1; - } - if (radius <= 0) { - return -1; - } -#if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; - CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2; - } -#endif - // Compute enough CumulativeSum for first row to be blurred. After this - // one row of CumulativeSum is updated at a time. - ARGBComputeCumulativeSum(src_argb, src_stride_argb, - dst_cumsum, dst_stride32_cumsum, - width, radius); - - src_argb = src_argb + radius * src_stride_argb; - cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum]; - - max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum]; - cumsum_top_row = &dst_cumsum[0]; - - for (y = 0; y < height; ++y) { - int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0; - int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1); - int area = radius * (bot_y - top_y); - int boxwidth = radius * 4; - int x; - int n; - - // Increment cumsum_top_row pointer with circular buffer wrap around. - if (top_y) { - cumsum_top_row += dst_stride32_cumsum; - if (cumsum_top_row >= max_cumsum_bot_row) { - cumsum_top_row = dst_cumsum; - } - } - // Increment cumsum_bot_row pointer with circular buffer wrap around and - // then fill in a row of CumulativeSum. - if ((y + radius) < height) { - const int32* prev_cumsum_bot_row = cumsum_bot_row; - cumsum_bot_row += dst_stride32_cumsum; - if (cumsum_bot_row >= max_cumsum_bot_row) { - cumsum_bot_row = dst_cumsum; - } - ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row, - width); - src_argb += src_stride_argb; - } - - // Left clipped. - for (x = 0; x < radius + 1; ++x) { - CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, - boxwidth, area, &dst_argb[x * 4], 1); - area += (bot_y - top_y); - boxwidth += 4; - } - - // Middle unclipped. - n = (width - 1) - radius - x + 1; - CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, - boxwidth, area, &dst_argb[x * 4], n); - - // Right clipped. - for (x += n; x <= width - 1; ++x) { - area -= (bot_y - top_y); - boxwidth -= 4; - CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4, - cumsum_bot_row + (x - radius - 1) * 4, - boxwidth, area, &dst_argb[x * 4], 1); - } - dst_argb += dst_stride_argb; - } - return 0; -} - -// Multiply ARGB image by a specified ARGB value. -LIBYUV_API -int ARGBShade(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height, uint32 value) { - int y; - void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, - int width, uint32 value) = ARGBShadeRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { - return -1; - } - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb = dst_stride_argb = 0; - } -#if defined(HAS_ARGBSHADEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBShadeRow = ARGBShadeRow_SSE2; - } -#elif defined(HAS_ARGBSHADEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - ARGBShadeRow = ARGBShadeRow_NEON; - } -#endif - - for (y = 0; y < height; ++y) { - ARGBShadeRow(src_argb, dst_argb, width, value); - src_argb += src_stride_argb; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Interpolate 2 ARGB images by specified amount (0 to 255). -LIBYUV_API -int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, - const uint8* src_argb1, int src_stride_argb1, - uint8* dst_argb, int dst_stride_argb, - int width, int height, int interpolation) { - int y; - void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) = InterpolateRow_C; - if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - dst_argb = dst_argb + (height - 1) * dst_stride_argb; - dst_stride_argb = -dst_stride_argb; - } - // Coalesce rows. - if (src_stride_argb0 == width * 4 && - src_stride_argb1 == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; - } -#if defined(HAS_INTERPOLATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { - InterpolateRow = InterpolateRow_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - InterpolateRow = InterpolateRow_Unaligned_SSE2; - if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && - IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - InterpolateRow = InterpolateRow_SSE2; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { - InterpolateRow = InterpolateRow_Any_SSSE3; - if (IS_ALIGNED(width, 4)) { - InterpolateRow = InterpolateRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && - IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - InterpolateRow = InterpolateRow_SSSE3; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { - InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(width, 8)) { - InterpolateRow = InterpolateRow_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 4) { - InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(width, 4)) { - InterpolateRow = InterpolateRow_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 && - IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) && - IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2; - } -#endif - - for (y = 0; y < height; ++y) { - InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0, - width * 4, interpolation); - src_argb0 += src_stride_argb0; - src_argb1 += src_stride_argb1; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Shuffle ARGB channel order. e.g. BGRA to ARGB. -LIBYUV_API -int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_argb, int dst_stride_argb, - const uint8* shuffler, int width, int height) { - int y; - void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb, - const uint8* shuffler, int pix) = ARGBShuffleRow_C; - if (!src_bgra || !dst_argb || - width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_bgra = src_bgra + (height - 1) * src_stride_bgra; - src_stride_bgra = -src_stride_bgra; - } - // Coalesce rows. - if (src_stride_bgra == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_bgra = dst_stride_argb = 0; - } -#if defined(HAS_ARGBSHUFFLEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { - ARGBShuffleRow = ARGBShuffleRow_Any_SSE2; - if (IS_ALIGNED(width, 4)) { - ARGBShuffleRow = ARGBShuffleRow_SSE2; - } - } -#endif -#if defined(HAS_ARGBSHUFFLEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { - ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - ARGBShuffleRow = ARGBShuffleRow_SSSE3; - } - } - } -#endif -#if defined(HAS_ARGBSHUFFLEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { - ARGBShuffleRow = ARGBShuffleRow_Any_AVX2; - if (IS_ALIGNED(width, 16)) { - ARGBShuffleRow = ARGBShuffleRow_AVX2; - } - } -#endif -#if defined(HAS_ARGBSHUFFLEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 4) { - ARGBShuffleRow = ARGBShuffleRow_Any_NEON; - if (IS_ALIGNED(width, 4)) { - ARGBShuffleRow = ARGBShuffleRow_NEON; - } - } -#endif - - for (y = 0; y < height; ++y) { - ARGBShuffleRow(src_bgra, dst_argb, shuffler, width); - src_bgra += src_stride_bgra; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Sobel ARGB effect. -static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height, - void (*SobelRow)(const uint8* src_sobelx, - const uint8* src_sobely, - uint8* dst, int width)) { - int y; - void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) = ARGBToBayerGGRow_C; - void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) = SobelYRow_C; - void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobely, int width) = - SobelXRow_C; - const int kEdge = 16; // Extra pixels at start of row for extrude/align. - if (!src_argb || !dst_argb || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // ARGBToBayer used to select G channel from ARGB. -#if defined(HAS_ARGBTOBAYERGGROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2; - if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerGGRow_SSE2; - } - } -#endif -#if defined(HAS_ARGBTOBAYERROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { - ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3; - if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerRow_SSSE3; - } - } -#endif -#if defined(HAS_ARGBTOBAYERGGROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && width >= 8) { - ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON; - if (IS_ALIGNED(width, 8)) { - ARGBToBayerRow = ARGBToBayerGGRow_NEON; - } - } -#endif -#if defined(HAS_SOBELYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - SobelYRow = SobelYRow_SSE2; - } -#endif -#if defined(HAS_SOBELYROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - SobelYRow = SobelYRow_NEON; - } -#endif -#if defined(HAS_SOBELXROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2)) { - SobelXRow = SobelXRow_SSE2; - } -#endif -#if defined(HAS_SOBELXROW_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - SobelXRow = SobelXRow_NEON; - } -#endif - { - // 3 rows with edges before/after. - const int kRowSize = (width + kEdge + 15) & ~15; - align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); - uint8* row_sobelx = rows; - uint8* row_sobely = rows + kRowSize; - uint8* row_y = rows + kRowSize * 2; - - // Convert first row. - uint8* row_y0 = row_y + kEdge; - uint8* row_y1 = row_y0 + kRowSize; - uint8* row_y2 = row_y1 + kRowSize; - ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width); - row_y0[-1] = row_y0[0]; - memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. - ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width); - row_y1[-1] = row_y1[0]; - memset(row_y1 + width, row_y1[width - 1], 16); - memset(row_y2 + width, 0, 16); - - for (y = 0; y < height; ++y) { - // Convert next row of ARGB to Y. - if (y < (height - 1)) { - src_argb += src_stride_argb; - } - ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width); - row_y2[-1] = row_y2[0]; - row_y2[width] = row_y2[width - 1]; - - SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width); - SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width); - SobelRow(row_sobelx, row_sobely, dst_argb, width); - - // Cycle thru circular queue of 3 row_y buffers. - { - uint8* row_yt = row_y0; - row_y0 = row_y1; - row_y1 = row_y2; - row_y2 = row_yt; - } - - dst_argb += dst_stride_argb; - } - free_aligned_buffer_64(rows); - } - return 0; -} - -// Sobel ARGB effect. -LIBYUV_API -int ARGBSobel(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) = SobelRow_C; -#if defined(HAS_SOBELROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - SobelRow = SobelRow_SSE2; - } -#endif -#if defined(HAS_SOBELROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - SobelRow = SobelRow_NEON; - } -#endif - return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, - width, height, SobelRow); -} - -// Sobel ARGB effect with planar output. -LIBYUV_API -int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, - uint8* dst_y, int dst_stride_y, - int width, int height) { - void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_, int width) = SobelToPlaneRow_C; -#if defined(HAS_SOBELTOPLANEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && - IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { - SobelToPlaneRow = SobelToPlaneRow_SSE2; - } -#endif -#if defined(HAS_SOBELTOPLANEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { - SobelToPlaneRow = SobelToPlaneRow_NEON; - } -#endif - return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, - width, height, SobelToPlaneRow); -} - -// SobelXY ARGB effect. -// Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. -LIBYUV_API -int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) = SobelXYRow_C; -#if defined(HAS_SOBELXYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - SobelXYRow = SobelXYRow_SSE2; - } -#endif -#if defined(HAS_SOBELXYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - SobelXYRow = SobelXYRow_NEON; - } -#endif - return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, - width, height, SobelXYRow); -} - -// Apply a 4x4 polynomial to each ARGB pixel. -LIBYUV_API -int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const float* poly, - int width, int height) { - int y; - void (*ARGBPolynomialRow)(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width) = ARGBPolynomialRow_C; - if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb = dst_stride_argb = 0; - } -#if defined(HAS_ARGBPOLYNOMIALROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) { - ARGBPolynomialRow = ARGBPolynomialRow_SSE2; - } -#endif -#if defined(HAS_ARGBPOLYNOMIALROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) && - IS_ALIGNED(width, 2)) { - ARGBPolynomialRow = ARGBPolynomialRow_AVX2; - } -#endif - - for (y = 0; y < height; ++y) { - ARGBPolynomialRow(src_argb, dst_argb, poly, width); - src_argb += src_stride_argb; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Apply a lumacolortable to each ARGB pixel. -LIBYUV_API -int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - const uint8* luma, - int width, int height) { - int y; - void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb, - int width, const uint8* luma, const uint32 lumacoeff) = - ARGBLumaColorTableRow_C; - if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb = dst_stride_argb = 0; - } -#if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { - ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3; - } -#endif - - for (y = 0; y < height; ++y) { - ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f); - src_argb += src_stride_argb; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Copy Alpha from one ARGB image to another. -LIBYUV_API -int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) = - ARGBCopyAlphaRow_C; - if (!src_argb || !dst_argb || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - // Coalesce rows. - if (src_stride_argb == width * 4 && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_argb = dst_stride_argb = 0; - } -#if defined(HAS_ARGBCOPYALPHAROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && - IS_ALIGNED(width, 8)) { - ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2; - } -#endif -#if defined(HAS_ARGBCOPYALPHAROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { - ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2; - } -#endif - - for (y = 0; y < height; ++y) { - ARGBCopyAlphaRow(src_argb, dst_argb, width); - src_argb += src_stride_argb; - dst_argb += dst_stride_argb; - } - return 0; -} - -// Copy a planar Y channel to the alpha channel of a destination ARGB image. -LIBYUV_API -int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, - uint8* dst_argb, int dst_stride_argb, - int width, int height) { - int y; - void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = - ARGBCopyYToAlphaRow_C; - if (!src_y || !dst_argb || width <= 0 || height == 0) { - return -1; - } - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_y = src_y + (height - 1) * src_stride_y; - src_stride_y = -src_stride_y; - } - // Coalesce rows. - if (src_stride_y == width && - dst_stride_argb == width * 4) { - width *= height; - height = 1; - src_stride_y = dst_stride_argb = 0; - } -#if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && - IS_ALIGNED(width, 8)) { - ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; - } -#endif -#if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { - ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; - } -#endif - - for (y = 0; y < height; ++y) { - ARGBCopyYToAlphaRow(src_y, dst_argb, width); - src_y += src_stride_y; - dst_argb += dst_stride_argb; - } - return 0; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate.cc deleted file mode 100755 index b052ac1dc4..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate.cc +++ /dev/null @@ -1,1301 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/rotate.h" - -#include "libyuv/cpu_id.h" -#include "libyuv/convert.h" -#include "libyuv/planar_functions.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) -#if defined(__APPLE__) && defined(__i386__) -#define DECLARE_FUNCTION(name) \ - ".text \n" \ - ".private_extern _" #name " \n" \ - ".align 4,0x90 \n" \ -"_" #name ": \n" -#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__) -#define DECLARE_FUNCTION(name) \ - ".text \n" \ - ".align 4,0x90 \n" \ -"_" #name ": \n" -#else -#define DECLARE_FUNCTION(name) \ - ".text \n" \ - ".align 4,0x90 \n" \ -#name ": \n" -#endif -#endif - -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -#define HAS_MIRRORROW_NEON -void MirrorRow_NEON(const uint8* src, uint8* dst, int width); -#define HAS_MIRRORROW_UV_NEON -void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width); -#define HAS_TRANSPOSE_WX8_NEON -void TransposeWx8_NEON(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -#define HAS_TRANSPOSE_UVWX8_NEON -void TransposeUVWx8_NEON(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width); -#endif // defined(__ARM_NEON__) - -#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \ - defined(__mips__) && \ - defined(__mips_dsp) && (__mips_dsp_rev >= 2) -#define HAS_TRANSPOSE_WX8_MIPS_DSPR2 -void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); - -void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width); -#define HAS_TRANSPOSE_UVWx8_MIPS_DSPR2 -void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width); -#endif // defined(__mips__) - -#if !defined(LIBYUV_DISABLE_X86) && \ - defined(_M_IX86) && defined(_MSC_VER) -#define HAS_TRANSPOSE_WX8_SSSE3 -__declspec(naked) __declspec(align(16)) -static void TransposeWx8_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) { - __asm { - push edi - push esi - push ebp - mov eax, [esp + 12 + 4] // src - mov edi, [esp + 12 + 8] // src_stride - mov edx, [esp + 12 + 12] // dst - mov esi, [esp + 12 + 16] // dst_stride - mov ecx, [esp + 12 + 20] // width - - // Read in the data from the source pointer. - // First round of bit swap. - align 4 - convertloop: - movq xmm0, qword ptr [eax] - lea ebp, [eax + 8] - movq xmm1, qword ptr [eax + edi] - lea eax, [eax + 2 * edi] - punpcklbw xmm0, xmm1 - movq xmm2, qword ptr [eax] - movdqa xmm1, xmm0 - palignr xmm1, xmm1, 8 - movq xmm3, qword ptr [eax + edi] - lea eax, [eax + 2 * edi] - punpcklbw xmm2, xmm3 - movdqa xmm3, xmm2 - movq xmm4, qword ptr [eax] - palignr xmm3, xmm3, 8 - movq xmm5, qword ptr [eax + edi] - punpcklbw xmm4, xmm5 - lea eax, [eax + 2 * edi] - movdqa xmm5, xmm4 - movq xmm6, qword ptr [eax] - palignr xmm5, xmm5, 8 - movq xmm7, qword ptr [eax + edi] - punpcklbw xmm6, xmm7 - mov eax, ebp - movdqa xmm7, xmm6 - palignr xmm7, xmm7, 8 - // Second round of bit swap. - punpcklwd xmm0, xmm2 - punpcklwd xmm1, xmm3 - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - palignr xmm2, xmm2, 8 - palignr xmm3, xmm3, 8 - punpcklwd xmm4, xmm6 - punpcklwd xmm5, xmm7 - movdqa xmm6, xmm4 - movdqa xmm7, xmm5 - palignr xmm6, xmm6, 8 - palignr xmm7, xmm7, 8 - // Third round of bit swap. - // Write to the destination pointer. - punpckldq xmm0, xmm4 - movq qword ptr [edx], xmm0 - movdqa xmm4, xmm0 - palignr xmm4, xmm4, 8 - movq qword ptr [edx + esi], xmm4 - lea edx, [edx + 2 * esi] - punpckldq xmm2, xmm6 - movdqa xmm6, xmm2 - palignr xmm6, xmm6, 8 - movq qword ptr [edx], xmm2 - punpckldq xmm1, xmm5 - movq qword ptr [edx + esi], xmm6 - lea edx, [edx + 2 * esi] - movdqa xmm5, xmm1 - movq qword ptr [edx], xmm1 - palignr xmm5, xmm5, 8 - punpckldq xmm3, xmm7 - movq qword ptr [edx + esi], xmm5 - lea edx, [edx + 2 * esi] - movq qword ptr [edx], xmm3 - movdqa xmm7, xmm3 - palignr xmm7, xmm7, 8 - sub ecx, 8 - movq qword ptr [edx + esi], xmm7 - lea edx, [edx + 2 * esi] - jg convertloop - - pop ebp - pop esi - pop edi - ret - } -} - -#define HAS_TRANSPOSE_UVWX8_SSE2 -__declspec(naked) __declspec(align(16)) -static void TransposeUVWx8_SSE2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int w) { - __asm { - push ebx - push esi - push edi - push ebp - mov eax, [esp + 16 + 4] // src - mov edi, [esp + 16 + 8] // src_stride - mov edx, [esp + 16 + 12] // dst_a - mov esi, [esp + 16 + 16] // dst_stride_a - mov ebx, [esp + 16 + 20] // dst_b - mov ebp, [esp + 16 + 24] // dst_stride_b - mov ecx, esp - sub esp, 4 + 16 - and esp, ~15 - mov [esp + 16], ecx - mov ecx, [ecx + 16 + 28] // w - - align 4 - convertloop: - // Read in the data from the source pointer. - // First round of bit swap. - movdqa xmm0, [eax] - movdqa xmm1, [eax + edi] - lea eax, [eax + 2 * edi] - movdqa xmm7, xmm0 // use xmm7 as temp register. - punpcklbw xmm0, xmm1 - punpckhbw xmm7, xmm1 - movdqa xmm1, xmm7 - movdqa xmm2, [eax] - movdqa xmm3, [eax + edi] - lea eax, [eax + 2 * edi] - movdqa xmm7, xmm2 - punpcklbw xmm2, xmm3 - punpckhbw xmm7, xmm3 - movdqa xmm3, xmm7 - movdqa xmm4, [eax] - movdqa xmm5, [eax + edi] - lea eax, [eax + 2 * edi] - movdqa xmm7, xmm4 - punpcklbw xmm4, xmm5 - punpckhbw xmm7, xmm5 - movdqa xmm5, xmm7 - movdqa xmm6, [eax] - movdqa xmm7, [eax + edi] - lea eax, [eax + 2 * edi] - movdqa [esp], xmm5 // backup xmm5 - neg edi - movdqa xmm5, xmm6 // use xmm5 as temp register. - punpcklbw xmm6, xmm7 - punpckhbw xmm5, xmm7 - movdqa xmm7, xmm5 - lea eax, [eax + 8 * edi + 16] - neg edi - // Second round of bit swap. - movdqa xmm5, xmm0 - punpcklwd xmm0, xmm2 - punpckhwd xmm5, xmm2 - movdqa xmm2, xmm5 - movdqa xmm5, xmm1 - punpcklwd xmm1, xmm3 - punpckhwd xmm5, xmm3 - movdqa xmm3, xmm5 - movdqa xmm5, xmm4 - punpcklwd xmm4, xmm6 - punpckhwd xmm5, xmm6 - movdqa xmm6, xmm5 - movdqa xmm5, [esp] // restore xmm5 - movdqa [esp], xmm6 // backup xmm6 - movdqa xmm6, xmm5 // use xmm6 as temp register. - punpcklwd xmm5, xmm7 - punpckhwd xmm6, xmm7 - movdqa xmm7, xmm6 - // Third round of bit swap. - // Write to the destination pointer. - movdqa xmm6, xmm0 - punpckldq xmm0, xmm4 - punpckhdq xmm6, xmm4 - movdqa xmm4, xmm6 - movdqa xmm6, [esp] // restore xmm6 - movlpd qword ptr [edx], xmm0 - movhpd qword ptr [ebx], xmm0 - movlpd qword ptr [edx + esi], xmm4 - lea edx, [edx + 2 * esi] - movhpd qword ptr [ebx + ebp], xmm4 - lea ebx, [ebx + 2 * ebp] - movdqa xmm0, xmm2 // use xmm0 as the temp register. - punpckldq xmm2, xmm6 - movlpd qword ptr [edx], xmm2 - movhpd qword ptr [ebx], xmm2 - punpckhdq xmm0, xmm6 - movlpd qword ptr [edx + esi], xmm0 - lea edx, [edx + 2 * esi] - movhpd qword ptr [ebx + ebp], xmm0 - lea ebx, [ebx + 2 * ebp] - movdqa xmm0, xmm1 // use xmm0 as the temp register. - punpckldq xmm1, xmm5 - movlpd qword ptr [edx], xmm1 - movhpd qword ptr [ebx], xmm1 - punpckhdq xmm0, xmm5 - movlpd qword ptr [edx + esi], xmm0 - lea edx, [edx + 2 * esi] - movhpd qword ptr [ebx + ebp], xmm0 - lea ebx, [ebx + 2 * ebp] - movdqa xmm0, xmm3 // use xmm0 as the temp register. - punpckldq xmm3, xmm7 - movlpd qword ptr [edx], xmm3 - movhpd qword ptr [ebx], xmm3 - punpckhdq xmm0, xmm7 - sub ecx, 8 - movlpd qword ptr [edx + esi], xmm0 - lea edx, [edx + 2 * esi] - movhpd qword ptr [ebx + ebp], xmm0 - lea ebx, [ebx + 2 * ebp] - jg convertloop - - mov esp, [esp + 16] - pop ebp - pop edi - pop esi - pop ebx - ret - } -} -#elif !defined(LIBYUV_DISABLE_X86) && \ - (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__))) -#define HAS_TRANSPOSE_WX8_SSSE3 -static void TransposeWx8_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) { - asm volatile ( - // Read in the data from the source pointer. - // First round of bit swap. - ".p2align 2 \n" - "1: \n" - "movq (%0),%%xmm0 \n" - "movq (%0,%3),%%xmm1 \n" - "lea (%0,%3,2),%0 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "movq (%0),%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "palignr $0x8,%%xmm1,%%xmm1 \n" - "movq (%0,%3),%%xmm3 \n" - "lea (%0,%3,2),%0 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "movdqa %%xmm2,%%xmm3 \n" - "movq (%0),%%xmm4 \n" - "palignr $0x8,%%xmm3,%%xmm3 \n" - "movq (%0,%3),%%xmm5 \n" - "lea (%0,%3,2),%0 \n" - "punpcklbw %%xmm5,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "movq (%0),%%xmm6 \n" - "palignr $0x8,%%xmm5,%%xmm5 \n" - "movq (%0,%3),%%xmm7 \n" - "lea (%0,%3,2),%0 \n" - "punpcklbw %%xmm7,%%xmm6 \n" - "neg %3 \n" - "movdqa %%xmm6,%%xmm7 \n" - "lea 0x8(%0,%3,8),%0 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - "neg %3 \n" - // Second round of bit swap. - "punpcklwd %%xmm2,%%xmm0 \n" - "punpcklwd %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "palignr $0x8,%%xmm2,%%xmm2 \n" - "palignr $0x8,%%xmm3,%%xmm3 \n" - "punpcklwd %%xmm6,%%xmm4 \n" - "punpcklwd %%xmm7,%%xmm5 \n" - "movdqa %%xmm4,%%xmm6 \n" - "movdqa %%xmm5,%%xmm7 \n" - "palignr $0x8,%%xmm6,%%xmm6 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - // Third round of bit swap. - // Write to the destination pointer. - "punpckldq %%xmm4,%%xmm0 \n" - "movq %%xmm0,(%1) \n" - "movdqa %%xmm0,%%xmm4 \n" - "palignr $0x8,%%xmm4,%%xmm4 \n" - "movq %%xmm4,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm6,%%xmm2 \n" - "movdqa %%xmm2,%%xmm6 \n" - "movq %%xmm2,(%1) \n" - "palignr $0x8,%%xmm6,%%xmm6 \n" - "punpckldq %%xmm5,%%xmm1 \n" - "movq %%xmm6,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "movdqa %%xmm1,%%xmm5 \n" - "movq %%xmm1,(%1) \n" - "palignr $0x8,%%xmm5,%%xmm5 \n" - "movq %%xmm5,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm7,%%xmm3 \n" - "movq %%xmm3,(%1) \n" - "movdqa %%xmm3,%%xmm7 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - "sub $0x8,%2 \n" - "movq %%xmm7,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "r"((intptr_t)(dst_stride)) // %4 - : "memory", "cc" - #if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" - #endif - ); -} - -#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__) -#define HAS_TRANSPOSE_UVWX8_SSE2 -extern "C" void TransposeUVWx8_SSE2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int w); - asm ( - DECLARE_FUNCTION(TransposeUVWx8_SSE2) - "push %ebx \n" - "push %esi \n" - "push %edi \n" - "push %ebp \n" - "mov 0x14(%esp),%eax \n" - "mov 0x18(%esp),%edi \n" - "mov 0x1c(%esp),%edx \n" - "mov 0x20(%esp),%esi \n" - "mov 0x24(%esp),%ebx \n" - "mov 0x28(%esp),%ebp \n" - "mov %esp,%ecx \n" - "sub $0x14,%esp \n" - "and $0xfffffff0,%esp \n" - "mov %ecx,0x10(%esp) \n" - "mov 0x2c(%ecx),%ecx \n" - -"1: \n" - "movdqa (%eax),%xmm0 \n" - "movdqa (%eax,%edi,1),%xmm1 \n" - "lea (%eax,%edi,2),%eax \n" - "movdqa %xmm0,%xmm7 \n" - "punpcklbw %xmm1,%xmm0 \n" - "punpckhbw %xmm1,%xmm7 \n" - "movdqa %xmm7,%xmm1 \n" - "movdqa (%eax),%xmm2 \n" - "movdqa (%eax,%edi,1),%xmm3 \n" - "lea (%eax,%edi,2),%eax \n" - "movdqa %xmm2,%xmm7 \n" - "punpcklbw %xmm3,%xmm2 \n" - "punpckhbw %xmm3,%xmm7 \n" - "movdqa %xmm7,%xmm3 \n" - "movdqa (%eax),%xmm4 \n" - "movdqa (%eax,%edi,1),%xmm5 \n" - "lea (%eax,%edi,2),%eax \n" - "movdqa %xmm4,%xmm7 \n" - "punpcklbw %xmm5,%xmm4 \n" - "punpckhbw %xmm5,%xmm7 \n" - "movdqa %xmm7,%xmm5 \n" - "movdqa (%eax),%xmm6 \n" - "movdqa (%eax,%edi,1),%xmm7 \n" - "lea (%eax,%edi,2),%eax \n" - "movdqa %xmm5,(%esp) \n" - "neg %edi \n" - "movdqa %xmm6,%xmm5 \n" - "punpcklbw %xmm7,%xmm6 \n" - "punpckhbw %xmm7,%xmm5 \n" - "movdqa %xmm5,%xmm7 \n" - "lea 0x10(%eax,%edi,8),%eax \n" - "neg %edi \n" - "movdqa %xmm0,%xmm5 \n" - "punpcklwd %xmm2,%xmm0 \n" - "punpckhwd %xmm2,%xmm5 \n" - "movdqa %xmm5,%xmm2 \n" - "movdqa %xmm1,%xmm5 \n" - "punpcklwd %xmm3,%xmm1 \n" - "punpckhwd %xmm3,%xmm5 \n" - "movdqa %xmm5,%xmm3 \n" - "movdqa %xmm4,%xmm5 \n" - "punpcklwd %xmm6,%xmm4 \n" - "punpckhwd %xmm6,%xmm5 \n" - "movdqa %xmm5,%xmm6 \n" - "movdqa (%esp),%xmm5 \n" - "movdqa %xmm6,(%esp) \n" - "movdqa %xmm5,%xmm6 \n" - "punpcklwd %xmm7,%xmm5 \n" - "punpckhwd %xmm7,%xmm6 \n" - "movdqa %xmm6,%xmm7 \n" - "movdqa %xmm0,%xmm6 \n" - "punpckldq %xmm4,%xmm0 \n" - "punpckhdq %xmm4,%xmm6 \n" - "movdqa %xmm6,%xmm4 \n" - "movdqa (%esp),%xmm6 \n" - "movlpd %xmm0,(%edx) \n" - "movhpd %xmm0,(%ebx) \n" - "movlpd %xmm4,(%edx,%esi,1) \n" - "lea (%edx,%esi,2),%edx \n" - "movhpd %xmm4,(%ebx,%ebp,1) \n" - "lea (%ebx,%ebp,2),%ebx \n" - "movdqa %xmm2,%xmm0 \n" - "punpckldq %xmm6,%xmm2 \n" - "movlpd %xmm2,(%edx) \n" - "movhpd %xmm2,(%ebx) \n" - "punpckhdq %xmm6,%xmm0 \n" - "movlpd %xmm0,(%edx,%esi,1) \n" - "lea (%edx,%esi,2),%edx \n" - "movhpd %xmm0,(%ebx,%ebp,1) \n" - "lea (%ebx,%ebp,2),%ebx \n" - "movdqa %xmm1,%xmm0 \n" - "punpckldq %xmm5,%xmm1 \n" - "movlpd %xmm1,(%edx) \n" - "movhpd %xmm1,(%ebx) \n" - "punpckhdq %xmm5,%xmm0 \n" - "movlpd %xmm0,(%edx,%esi,1) \n" - "lea (%edx,%esi,2),%edx \n" - "movhpd %xmm0,(%ebx,%ebp,1) \n" - "lea (%ebx,%ebp,2),%ebx \n" - "movdqa %xmm3,%xmm0 \n" - "punpckldq %xmm7,%xmm3 \n" - "movlpd %xmm3,(%edx) \n" - "movhpd %xmm3,(%ebx) \n" - "punpckhdq %xmm7,%xmm0 \n" - "sub $0x8,%ecx \n" - "movlpd %xmm0,(%edx,%esi,1) \n" - "lea (%edx,%esi,2),%edx \n" - "movhpd %xmm0,(%ebx,%ebp,1) \n" - "lea (%ebx,%ebp,2),%ebx \n" - "jg 1b \n" - "mov 0x10(%esp),%esp \n" - "pop %ebp \n" - "pop %edi \n" - "pop %esi \n" - "pop %ebx \n" -#if defined(__native_client__) - "pop %ecx \n" - "and $0xffffffe0,%ecx \n" - "jmp *%ecx \n" -#else - "ret \n" -#endif -); -#elif !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \ - defined(__x86_64__) -// 64 bit version has enough registers to do 16x8 to 8x16 at a time. -#define HAS_TRANSPOSE_WX8_FAST_SSSE3 -static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride, - uint8* dst, int dst_stride, int width) { - asm volatile ( - // Read in the data from the source pointer. - // First round of bit swap. - ".p2align 2 \n" -"1: \n" - "movdqa (%0),%%xmm0 \n" - "movdqa (%0,%3),%%xmm1 \n" - "lea (%0,%3,2),%0 \n" - "movdqa %%xmm0,%%xmm8 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm8 \n" - "movdqa (%0),%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm8,%%xmm9 \n" - "palignr $0x8,%%xmm1,%%xmm1 \n" - "palignr $0x8,%%xmm9,%%xmm9 \n" - "movdqa (%0,%3),%%xmm3 \n" - "lea (%0,%3,2),%0 \n" - "movdqa %%xmm2,%%xmm10 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "punpckhbw %%xmm3,%%xmm10 \n" - "movdqa %%xmm2,%%xmm3 \n" - "movdqa %%xmm10,%%xmm11 \n" - "movdqa (%0),%%xmm4 \n" - "palignr $0x8,%%xmm3,%%xmm3 \n" - "palignr $0x8,%%xmm11,%%xmm11 \n" - "movdqa (%0,%3),%%xmm5 \n" - "lea (%0,%3,2),%0 \n" - "movdqa %%xmm4,%%xmm12 \n" - "punpcklbw %%xmm5,%%xmm4 \n" - "punpckhbw %%xmm5,%%xmm12 \n" - "movdqa %%xmm4,%%xmm5 \n" - "movdqa %%xmm12,%%xmm13 \n" - "movdqa (%0),%%xmm6 \n" - "palignr $0x8,%%xmm5,%%xmm5 \n" - "palignr $0x8,%%xmm13,%%xmm13 \n" - "movdqa (%0,%3),%%xmm7 \n" - "lea (%0,%3,2),%0 \n" - "movdqa %%xmm6,%%xmm14 \n" - "punpcklbw %%xmm7,%%xmm6 \n" - "punpckhbw %%xmm7,%%xmm14 \n" - "neg %3 \n" - "movdqa %%xmm6,%%xmm7 \n" - "movdqa %%xmm14,%%xmm15 \n" - "lea 0x10(%0,%3,8),%0 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - "palignr $0x8,%%xmm15,%%xmm15 \n" - "neg %3 \n" - // Second round of bit swap. - "punpcklwd %%xmm2,%%xmm0 \n" - "punpcklwd %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "palignr $0x8,%%xmm2,%%xmm2 \n" - "palignr $0x8,%%xmm3,%%xmm3 \n" - "punpcklwd %%xmm6,%%xmm4 \n" - "punpcklwd %%xmm7,%%xmm5 \n" - "movdqa %%xmm4,%%xmm6 \n" - "movdqa %%xmm5,%%xmm7 \n" - "palignr $0x8,%%xmm6,%%xmm6 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - "punpcklwd %%xmm10,%%xmm8 \n" - "punpcklwd %%xmm11,%%xmm9 \n" - "movdqa %%xmm8,%%xmm10 \n" - "movdqa %%xmm9,%%xmm11 \n" - "palignr $0x8,%%xmm10,%%xmm10 \n" - "palignr $0x8,%%xmm11,%%xmm11 \n" - "punpcklwd %%xmm14,%%xmm12 \n" - "punpcklwd %%xmm15,%%xmm13 \n" - "movdqa %%xmm12,%%xmm14 \n" - "movdqa %%xmm13,%%xmm15 \n" - "palignr $0x8,%%xmm14,%%xmm14 \n" - "palignr $0x8,%%xmm15,%%xmm15 \n" - // Third round of bit swap. - // Write to the destination pointer. - "punpckldq %%xmm4,%%xmm0 \n" - "movq %%xmm0,(%1) \n" - "movdqa %%xmm0,%%xmm4 \n" - "palignr $0x8,%%xmm4,%%xmm4 \n" - "movq %%xmm4,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm6,%%xmm2 \n" - "movdqa %%xmm2,%%xmm6 \n" - "movq %%xmm2,(%1) \n" - "palignr $0x8,%%xmm6,%%xmm6 \n" - "punpckldq %%xmm5,%%xmm1 \n" - "movq %%xmm6,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "movdqa %%xmm1,%%xmm5 \n" - "movq %%xmm1,(%1) \n" - "palignr $0x8,%%xmm5,%%xmm5 \n" - "movq %%xmm5,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm7,%%xmm3 \n" - "movq %%xmm3,(%1) \n" - "movdqa %%xmm3,%%xmm7 \n" - "palignr $0x8,%%xmm7,%%xmm7 \n" - "movq %%xmm7,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm12,%%xmm8 \n" - "movq %%xmm8,(%1) \n" - "movdqa %%xmm8,%%xmm12 \n" - "palignr $0x8,%%xmm12,%%xmm12 \n" - "movq %%xmm12,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm14,%%xmm10 \n" - "movdqa %%xmm10,%%xmm14 \n" - "movq %%xmm10,(%1) \n" - "palignr $0x8,%%xmm14,%%xmm14 \n" - "punpckldq %%xmm13,%%xmm9 \n" - "movq %%xmm14,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "movdqa %%xmm9,%%xmm13 \n" - "movq %%xmm9,(%1) \n" - "palignr $0x8,%%xmm13,%%xmm13 \n" - "movq %%xmm13,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "punpckldq %%xmm15,%%xmm11 \n" - "movq %%xmm11,(%1) \n" - "movdqa %%xmm11,%%xmm15 \n" - "palignr $0x8,%%xmm15,%%xmm15 \n" - "sub $0x10,%2 \n" - "movq %%xmm15,(%1,%4) \n" - "lea (%1,%4,2),%1 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "r"((intptr_t)(dst_stride)) // %4 - : "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", - "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" -); -} - -#define HAS_TRANSPOSE_UVWX8_SSE2 -static void TransposeUVWx8_SSE2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int w) { - asm volatile ( - // Read in the data from the source pointer. - // First round of bit swap. - ".p2align 2 \n" -"1: \n" - "movdqa (%0),%%xmm0 \n" - "movdqa (%0,%4),%%xmm1 \n" - "lea (%0,%4,2),%0 \n" - "movdqa %%xmm0,%%xmm8 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm8 \n" - "movdqa %%xmm8,%%xmm1 \n" - "movdqa (%0),%%xmm2 \n" - "movdqa (%0,%4),%%xmm3 \n" - "lea (%0,%4,2),%0 \n" - "movdqa %%xmm2,%%xmm8 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "punpckhbw %%xmm3,%%xmm8 \n" - "movdqa %%xmm8,%%xmm3 \n" - "movdqa (%0),%%xmm4 \n" - "movdqa (%0,%4),%%xmm5 \n" - "lea (%0,%4,2),%0 \n" - "movdqa %%xmm4,%%xmm8 \n" - "punpcklbw %%xmm5,%%xmm4 \n" - "punpckhbw %%xmm5,%%xmm8 \n" - "movdqa %%xmm8,%%xmm5 \n" - "movdqa (%0),%%xmm6 \n" - "movdqa (%0,%4),%%xmm7 \n" - "lea (%0,%4,2),%0 \n" - "movdqa %%xmm6,%%xmm8 \n" - "punpcklbw %%xmm7,%%xmm6 \n" - "neg %4 \n" - "lea 0x10(%0,%4,8),%0 \n" - "punpckhbw %%xmm7,%%xmm8 \n" - "movdqa %%xmm8,%%xmm7 \n" - "neg %4 \n" - // Second round of bit swap. - "movdqa %%xmm0,%%xmm8 \n" - "movdqa %%xmm1,%%xmm9 \n" - "punpckhwd %%xmm2,%%xmm8 \n" - "punpckhwd %%xmm3,%%xmm9 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpcklwd %%xmm3,%%xmm1 \n" - "movdqa %%xmm8,%%xmm2 \n" - "movdqa %%xmm9,%%xmm3 \n" - "movdqa %%xmm4,%%xmm8 \n" - "movdqa %%xmm5,%%xmm9 \n" - "punpckhwd %%xmm6,%%xmm8 \n" - "punpckhwd %%xmm7,%%xmm9 \n" - "punpcklwd %%xmm6,%%xmm4 \n" - "punpcklwd %%xmm7,%%xmm5 \n" - "movdqa %%xmm8,%%xmm6 \n" - "movdqa %%xmm9,%%xmm7 \n" - // Third round of bit swap. - // Write to the destination pointer. - "movdqa %%xmm0,%%xmm8 \n" - "punpckldq %%xmm4,%%xmm0 \n" - "movlpd %%xmm0,(%1) \n" // Write back U channel - "movhpd %%xmm0,(%2) \n" // Write back V channel - "punpckhdq %%xmm4,%%xmm8 \n" - "movlpd %%xmm8,(%1,%5) \n" - "lea (%1,%5,2),%1 \n" - "movhpd %%xmm8,(%2,%6) \n" - "lea (%2,%6,2),%2 \n" - "movdqa %%xmm2,%%xmm8 \n" - "punpckldq %%xmm6,%%xmm2 \n" - "movlpd %%xmm2,(%1) \n" - "movhpd %%xmm2,(%2) \n" - "punpckhdq %%xmm6,%%xmm8 \n" - "movlpd %%xmm8,(%1,%5) \n" - "lea (%1,%5,2),%1 \n" - "movhpd %%xmm8,(%2,%6) \n" - "lea (%2,%6,2),%2 \n" - "movdqa %%xmm1,%%xmm8 \n" - "punpckldq %%xmm5,%%xmm1 \n" - "movlpd %%xmm1,(%1) \n" - "movhpd %%xmm1,(%2) \n" - "punpckhdq %%xmm5,%%xmm8 \n" - "movlpd %%xmm8,(%1,%5) \n" - "lea (%1,%5,2),%1 \n" - "movhpd %%xmm8,(%2,%6) \n" - "lea (%2,%6,2),%2 \n" - "movdqa %%xmm3,%%xmm8 \n" - "punpckldq %%xmm7,%%xmm3 \n" - "movlpd %%xmm3,(%1) \n" - "movhpd %%xmm3,(%2) \n" - "punpckhdq %%xmm7,%%xmm8 \n" - "sub $0x8,%3 \n" - "movlpd %%xmm8,(%1,%5) \n" - "lea (%1,%5,2),%1 \n" - "movhpd %%xmm8,(%2,%6) \n" - "lea (%2,%6,2),%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst_a), // %1 - "+r"(dst_b), // %2 - "+r"(w) // %3 - : "r"((intptr_t)(src_stride)), // %4 - "r"((intptr_t)(dst_stride_a)), // %5 - "r"((intptr_t)(dst_stride_b)) // %6 - : "memory", "cc", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", - "xmm8", "xmm9" -); -} -#endif -#endif - -static void TransposeWx8_C(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width) { - int i; - for (i = 0; i < width; ++i) { - dst[0] = src[0 * src_stride]; - dst[1] = src[1 * src_stride]; - dst[2] = src[2 * src_stride]; - dst[3] = src[3 * src_stride]; - dst[4] = src[4 * src_stride]; - dst[5] = src[5 * src_stride]; - dst[6] = src[6 * src_stride]; - dst[7] = src[7 * src_stride]; - ++src; - dst += dst_stride; - } -} - -static void TransposeWxH_C(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - int i; - for (i = 0; i < width; ++i) { - int j; - for (j = 0; j < height; ++j) { - dst[i * dst_stride + j] = src[j * src_stride + i]; - } - } -} - -LIBYUV_API -void TransposePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - int i = height; - void (*TransposeWx8)(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width) = TransposeWx8_C; -#if defined(HAS_TRANSPOSE_WX8_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - TransposeWx8 = TransposeWx8_NEON; - } -#endif -#if defined(HAS_TRANSPOSE_WX8_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { - TransposeWx8 = TransposeWx8_SSSE3; - } -#endif -#if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && - IS_ALIGNED(width, 16) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { - TransposeWx8 = TransposeWx8_FAST_SSSE3; - } -#endif -#if defined(HAS_TRANSPOSE_WX8_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2)) { - if (IS_ALIGNED(width, 4) && - IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { - TransposeWx8 = TransposeWx8_FAST_MIPS_DSPR2; - } else { - TransposeWx8 = TransposeWx8_MIPS_DSPR2; - } - } -#endif - - // Work across the source in 8x8 tiles - while (i >= 8) { - TransposeWx8(src, src_stride, dst, dst_stride, width); - src += 8 * src_stride; // Go down 8 rows. - dst += 8; // Move over 8 columns. - i -= 8; - } - - TransposeWxH_C(src, src_stride, dst, dst_stride, width, i); -} - -LIBYUV_API -void RotatePlane90(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - // Rotate by 90 is a transpose with the source read - // from bottom to top. So set the source pointer to the end - // of the buffer and flip the sign of the source stride. - src += src_stride * (height - 1); - src_stride = -src_stride; - TransposePlane(src, src_stride, dst, dst_stride, width, height); -} - -LIBYUV_API -void RotatePlane270(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - // Rotate by 270 is a transpose with the destination written - // from bottom to top. So set the destination pointer to the end - // of the buffer and flip the sign of the destination stride. - dst += dst_stride * (width - 1); - dst_stride = -dst_stride; - TransposePlane(src, src_stride, dst, dst_stride, width, height); -} - -LIBYUV_API -void RotatePlane180(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - // Swap first and last row and mirror the content. Uses a temporary row. - align_buffer_64(row, width); - const uint8* src_bot = src + src_stride * (height - 1); - uint8* dst_bot = dst + dst_stride * (height - 1); - int half_height = (height + 1) >> 1; - int y; - void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; -#if defined(HAS_MIRRORROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { - MirrorRow = MirrorRow_NEON; - } -#endif -#if defined(HAS_MIRRORROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - MirrorRow = MirrorRow_SSE2; - } -#endif -#if defined(HAS_MIRRORROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - MirrorRow = MirrorRow_SSSE3; - } -#endif -#if defined(HAS_MIRRORROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) { - MirrorRow = MirrorRow_AVX2; - } -#endif -#if defined(HAS_MIRRORROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && - IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) { - MirrorRow = MirrorRow_MIPS_DSPR2; - } -#endif -#if defined(HAS_COPYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { - CopyRow = CopyRow_NEON; - } -#endif -#if defined(HAS_COPYROW_X86) - if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { - CopyRow = CopyRow_X86; - } -#endif -#if defined(HAS_COPYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - CopyRow = CopyRow_SSE2; - } -#endif -#if defined(HAS_COPYROW_ERMS) - if (TestCpuFlag(kCpuHasERMS)) { - CopyRow = CopyRow_ERMS; - } -#endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif - - // Odd height will harmlessly mirror the middle row twice. - for (y = 0; y < half_height; ++y) { - MirrorRow(src, row, width); // Mirror first row into a buffer - src += src_stride; - MirrorRow(src_bot, dst, width); // Mirror last row into first row - dst += dst_stride; - CopyRow(row, dst_bot, width); // Copy first mirrored row into last - src_bot -= src_stride; - dst_bot -= dst_stride; - } - free_aligned_buffer_64(row); -} - -static void TransposeUVWx8_C(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width) { - int i; - for (i = 0; i < width; ++i) { - dst_a[0] = src[0 * src_stride + 0]; - dst_b[0] = src[0 * src_stride + 1]; - dst_a[1] = src[1 * src_stride + 0]; - dst_b[1] = src[1 * src_stride + 1]; - dst_a[2] = src[2 * src_stride + 0]; - dst_b[2] = src[2 * src_stride + 1]; - dst_a[3] = src[3 * src_stride + 0]; - dst_b[3] = src[3 * src_stride + 1]; - dst_a[4] = src[4 * src_stride + 0]; - dst_b[4] = src[4 * src_stride + 1]; - dst_a[5] = src[5 * src_stride + 0]; - dst_b[5] = src[5 * src_stride + 1]; - dst_a[6] = src[6 * src_stride + 0]; - dst_b[6] = src[6 * src_stride + 1]; - dst_a[7] = src[7 * src_stride + 0]; - dst_b[7] = src[7 * src_stride + 1]; - src += 2; - dst_a += dst_stride_a; - dst_b += dst_stride_b; - } -} - -static void TransposeUVWxH_C(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { - int i; - for (i = 0; i < width * 2; i += 2) { - int j; - for (j = 0; j < height; ++j) { - dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)]; - dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1]; - } - } -} - -LIBYUV_API -void TransposeUV(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { - int i = height; - void (*TransposeUVWx8)(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width) = TransposeUVWx8_C; -#if defined(HAS_TRANSPOSE_UVWX8_NEON) - if (TestCpuFlag(kCpuHasNEON)) { - TransposeUVWx8 = TransposeUVWx8_NEON; - } -#elif defined(HAS_TRANSPOSE_UVWX8_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(width, 8) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { - TransposeUVWx8 = TransposeUVWx8_SSE2; - } -#elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) && - IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { - TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2; - } -#endif - - // Work through the source in 8x8 tiles. - while (i >= 8) { - TransposeUVWx8(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width); - src += 8 * src_stride; // Go down 8 rows. - dst_a += 8; // Move over 8 columns. - dst_b += 8; // Move over 8 columns. - i -= 8; - } - - TransposeUVWxH_C(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width, i); -} - -LIBYUV_API -void RotateUV90(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { - src += src_stride * (height - 1); - src_stride = -src_stride; - - TransposeUV(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width, height); -} - -LIBYUV_API -void RotateUV270(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { - dst_a += dst_stride_a * (width - 1); - dst_b += dst_stride_b * (width - 1); - dst_stride_a = -dst_stride_a; - dst_stride_b = -dst_stride_b; - - TransposeUV(src, src_stride, - dst_a, dst_stride_a, - dst_b, dst_stride_b, - width, height); -} - -// Rotate 180 is a horizontal and vertical flip. -LIBYUV_API -void RotateUV180(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width, int height) { - int i; - void (*MirrorRowUV)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) = - MirrorUVRow_C; -#if defined(HAS_MIRRORUVROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { - MirrorRowUV = MirrorUVRow_NEON; - } -#elif defined(HAS_MIRRORROW_UV_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) { - MirrorRowUV = MirrorUVRow_SSSE3; - } -#elif defined(HAS_MIRRORUVROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && - IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) { - MirrorRowUV = MirrorUVRow_MIPS_DSPR2; - } -#endif - - dst_a += dst_stride_a * (height - 1); - dst_b += dst_stride_b * (height - 1); - - for (i = 0; i < height; ++i) { - MirrorRowUV(src, dst_a, dst_b, width); - src += src_stride; - dst_a -= dst_stride_a; - dst_b -= dst_stride_b; - } -} - -LIBYUV_API -int RotatePlane(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height, - enum RotationMode mode) { - if (!src || width <= 0 || height == 0 || !dst) { - return -1; - } - - // Negative height means invert the image. - if (height < 0) { - height = -height; - src = src + (height - 1) * src_stride; - src_stride = -src_stride; - } - - switch (mode) { - case kRotate0: - // copy frame - CopyPlane(src, src_stride, - dst, dst_stride, - width, height); - return 0; - case kRotate90: - RotatePlane90(src, src_stride, - dst, dst_stride, - width, height); - return 0; - case kRotate270: - RotatePlane270(src, src_stride, - dst, dst_stride, - width, height); - return 0; - case kRotate180: - RotatePlane180(src, src_stride, - dst, dst_stride, - width, height); - return 0; - default: - break; - } - return -1; -} - -LIBYUV_API -int I420Rotate(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height, - enum RotationMode mode) { - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || - !dst_y || !dst_u || !dst_v) { - return -1; - } - - // Negative height means invert the image. - if (height < 0) { - height = -height; - halfheight = (height + 1) >> 1; - src_y = src_y + (height - 1) * src_stride_y; - src_u = src_u + (halfheight - 1) * src_stride_u; - src_v = src_v + (halfheight - 1) * src_stride_v; - src_stride_y = -src_stride_y; - src_stride_u = -src_stride_u; - src_stride_v = -src_stride_v; - } - - switch (mode) { - case kRotate0: - // copy frame - return I420Copy(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height); - case kRotate90: - RotatePlane90(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotatePlane90(src_u, src_stride_u, - dst_u, dst_stride_u, - halfwidth, halfheight); - RotatePlane90(src_v, src_stride_v, - dst_v, dst_stride_v, - halfwidth, halfheight); - return 0; - case kRotate270: - RotatePlane270(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotatePlane270(src_u, src_stride_u, - dst_u, dst_stride_u, - halfwidth, halfheight); - RotatePlane270(src_v, src_stride_v, - dst_v, dst_stride_v, - halfwidth, halfheight); - return 0; - case kRotate180: - RotatePlane180(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotatePlane180(src_u, src_stride_u, - dst_u, dst_stride_u, - halfwidth, halfheight); - RotatePlane180(src_v, src_stride_v, - dst_v, dst_stride_v, - halfwidth, halfheight); - return 0; - default: - break; - } - return -1; -} - -LIBYUV_API -int NV12ToI420Rotate(const uint8* src_y, int src_stride_y, - const uint8* src_uv, int src_stride_uv, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int width, int height, - enum RotationMode mode) { - int halfwidth = (width + 1) >> 1; - int halfheight = (height + 1) >> 1; - if (!src_y || !src_uv || width <= 0 || height == 0 || - !dst_y || !dst_u || !dst_v) { - return -1; - } - - // Negative height means invert the image. - if (height < 0) { - height = -height; - halfheight = (height + 1) >> 1; - src_y = src_y + (height - 1) * src_stride_y; - src_uv = src_uv + (halfheight - 1) * src_stride_uv; - src_stride_y = -src_stride_y; - src_stride_uv = -src_stride_uv; - } - - switch (mode) { - case kRotate0: - // copy frame - return NV12ToI420(src_y, src_stride_y, - src_uv, src_stride_uv, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - width, height); - case kRotate90: - RotatePlane90(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotateUV90(src_uv, src_stride_uv, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - halfwidth, halfheight); - return 0; - case kRotate270: - RotatePlane270(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotateUV270(src_uv, src_stride_uv, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - halfwidth, halfheight); - return 0; - case kRotate180: - RotatePlane180(src_y, src_stride_y, - dst_y, dst_stride_y, - width, height); - RotateUV180(src_uv, src_stride_uv, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - halfwidth, halfheight); - return 0; - default: - break; - } - return -1; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate_argb.cc deleted file mode 100755 index ab0f9ce070..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_argb.cc +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/rotate.h" - -#include "libyuv/cpu_id.h" -#include "libyuv/convert.h" -#include "libyuv/planar_functions.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// ARGBScale has a function to copy pixels to a row, striding each source -// pixel by a constant. -#if !defined(LIBYUV_DISABLE_X86) && \ - (defined(_M_IX86) || \ - (defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__)) -#define HAS_SCALEARGBROWDOWNEVEN_SSE2 -void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride, - int src_stepx, - uint8* dst_ptr, int dst_width); -#endif -#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -#define HAS_SCALEARGBROWDOWNEVEN_NEON -void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride, - int src_stepx, - uint8* dst_ptr, int dst_width); -#endif - -void ScaleARGBRowDownEven_C(const uint8* src_ptr, int, - int src_stepx, - uint8* dst_ptr, int dst_width); - -static void ARGBTranspose(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - int i; - int src_pixel_step = src_stride >> 2; - void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride, - int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C; -#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) && // Width of dest. - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2; - } -#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) && // Width of dest. - IS_ALIGNED(src, 4)) { - ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON; - } -#endif - - for (i = 0; i < width; ++i) { // column of source to row of dest. - ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height); - dst += dst_stride; - src += 4; - } -} - -void ARGBRotate90(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - // Rotate by 90 is a ARGBTranspose with the source read - // from bottom to top. So set the source pointer to the end - // of the buffer and flip the sign of the source stride. - src += src_stride * (height - 1); - src_stride = -src_stride; - ARGBTranspose(src, src_stride, dst, dst_stride, width, height); -} - -void ARGBRotate270(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - // Rotate by 270 is a ARGBTranspose with the destination written - // from bottom to top. So set the destination pointer to the end - // of the buffer and flip the sign of the destination stride. - dst += dst_stride * (width - 1); - dst_stride = -dst_stride; - ARGBTranspose(src, src_stride, dst, dst_stride, width, height); -} - -void ARGBRotate180(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width, int height) { - // Swap first and last row and mirror the content. Uses a temporary row. - align_buffer_64(row, width * 4); - const uint8* src_bot = src + src_stride * (height - 1); - uint8* dst_bot = dst + dst_stride * (height - 1); - int half_height = (height + 1) >> 1; - int y; - void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = - ARGBMirrorRow_C; - void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; -#if defined(HAS_ARGBMIRRORROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - ARGBMirrorRow = ARGBMirrorRow_SSSE3; - } -#endif -#if defined(HAS_ARGBMIRRORROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { - ARGBMirrorRow = ARGBMirrorRow_AVX2; - } -#endif -#if defined(HAS_ARGBMIRRORROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) { - ARGBMirrorRow = ARGBMirrorRow_NEON; - } -#endif -#if defined(HAS_COPYROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 32)) { - CopyRow = CopyRow_NEON; - } -#endif -#if defined(HAS_COPYROW_X86) - if (TestCpuFlag(kCpuHasX86)) { - CopyRow = CopyRow_X86; - } -#endif -#if defined(HAS_COPYROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) && - IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) { - CopyRow = CopyRow_SSE2; - } -#endif -#if defined(HAS_COPYROW_ERMS) - if (TestCpuFlag(kCpuHasERMS)) { - CopyRow = CopyRow_ERMS; - } -#endif -#if defined(HAS_COPYROW_MIPS) - if (TestCpuFlag(kCpuHasMIPS)) { - CopyRow = CopyRow_MIPS; - } -#endif - - // Odd height will harmlessly mirror the middle row twice. - for (y = 0; y < half_height; ++y) { - ARGBMirrorRow(src, row, width); // Mirror first row into a buffer - ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row - CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last - src += src_stride; - dst += dst_stride; - src_bot -= src_stride; - dst_bot -= dst_stride; - } - free_aligned_buffer_64(row); -} - -LIBYUV_API -int ARGBRotate(const uint8* src_argb, int src_stride_argb, - uint8* dst_argb, int dst_stride_argb, - int width, int height, - enum RotationMode mode) { - if (!src_argb || width <= 0 || height == 0 || !dst_argb) { - return -1; - } - - // Negative height means invert the image. - if (height < 0) { - height = -height; - src_argb = src_argb + (height - 1) * src_stride_argb; - src_stride_argb = -src_stride_argb; - } - - switch (mode) { - case kRotate0: - // copy frame - return ARGBCopy(src_argb, src_stride_argb, - dst_argb, dst_stride_argb, - width, height); - case kRotate90: - ARGBRotate90(src_argb, src_stride_argb, - dst_argb, dst_stride_argb, - width, height); - return 0; - case kRotate270: - ARGBRotate270(src_argb, src_stride_argb, - dst_argb, dst_stride_argb, - width, height); - return 0; - case kRotate180: - ARGBRotate180(src_argb, src_stride_argb, - dst_argb, dst_stride_argb, - width, height); - return 0; - default: - break; - } - return -1; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_mips.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate_mips.cc deleted file mode 100755 index 04d5a663f7..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_mips.cc +++ /dev/null @@ -1,486 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_MIPS) && \ - defined(__mips_dsp) && (__mips_dsp_rev >= 2) - -void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - "andi $t0, %[dst], 0x3 \n" - "andi $t1, %[dst_stride], 0x3 \n" - "or $t0, $t0, $t1 \n" - "bnez $t0, 11f \n" - " subu $t7, $t9, %[src_stride] \n" -//dst + dst_stride word aligned - "1: \n" - "lbu $t0, 0(%[src]) \n" - "lbux $t1, %[src_stride](%[src]) \n" - "lbux $t8, $t2(%[src]) \n" - "lbux $t9, $t3(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s0, $t8, $t0 \n" - "lbux $t0, $t4(%[src]) \n" - "lbux $t1, $t5(%[src]) \n" - "lbux $t8, $t6(%[src]) \n" - "lbux $t9, $t7(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s1, $t8, $t0 \n" - "sw $s0, 0(%[dst]) \n" - "addiu %[width], -1 \n" - "addiu %[src], 1 \n" - "sw $s1, 4(%[dst]) \n" - "bnez %[width], 1b \n" - " addu %[dst], %[dst], %[dst_stride] \n" - "b 2f \n" -//dst + dst_stride unaligned - "11: \n" - "lbu $t0, 0(%[src]) \n" - "lbux $t1, %[src_stride](%[src]) \n" - "lbux $t8, $t2(%[src]) \n" - "lbux $t9, $t3(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s0, $t8, $t0 \n" - "lbux $t0, $t4(%[src]) \n" - "lbux $t1, $t5(%[src]) \n" - "lbux $t8, $t6(%[src]) \n" - "lbux $t9, $t7(%[src]) \n" - "sll $t1, $t1, 16 \n" - "sll $t9, $t9, 16 \n" - "or $t0, $t0, $t1 \n" - "or $t8, $t8, $t9 \n" - "precr.qb.ph $s1, $t8, $t0 \n" - "swr $s0, 0(%[dst]) \n" - "swl $s0, 3(%[dst]) \n" - "addiu %[width], -1 \n" - "addiu %[src], 1 \n" - "swr $s1, 4(%[dst]) \n" - "swl $s1, 7(%[dst]) \n" - "bnez %[width], 11b \n" - "addu %[dst], %[dst], %[dst_stride] \n" - "2: \n" - ".set pop \n" - :[src] "+r" (src), - [dst] "+r" (dst), - [width] "+r" (width) - :[src_stride] "r" (src_stride), - [dst_stride] "r" (dst_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9", - "s0", "s1" - ); -} - -void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width) { - __asm__ __volatile__ ( - ".set noat \n" - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - - "srl $AT, %[width], 0x2 \n" - "andi $t0, %[dst], 0x3 \n" - "andi $t1, %[dst_stride], 0x3 \n" - "or $t0, $t0, $t1 \n" - "bnez $t0, 11f \n" - " subu $t7, $t9, %[src_stride] \n" -//dst + dst_stride word aligned - "1: \n" - "lw $t0, 0(%[src]) \n" - "lwx $t1, %[src_stride](%[src]) \n" - "lwx $t8, $t2(%[src]) \n" - "lwx $t9, $t3(%[src]) \n" - -// t0 = | 30 | 20 | 10 | 00 | -// t1 = | 31 | 21 | 11 | 01 | -// t8 = | 32 | 22 | 12 | 02 | -// t9 = | 33 | 23 | 13 | 03 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 21 | 01 | 20 | 00 | - // s1 = | 23 | 03 | 22 | 02 | - // s2 = | 31 | 11 | 30 | 10 | - // s3 = | 33 | 13 | 32 | 12 | - - "precr.qb.ph $s4, $s1, $s0 \n" - "precrq.qb.ph $s5, $s1, $s0 \n" - "precr.qb.ph $s6, $s3, $s2 \n" - "precrq.qb.ph $s7, $s3, $s2 \n" - - // s4 = | 03 | 02 | 01 | 00 | - // s5 = | 23 | 22 | 21 | 20 | - // s6 = | 13 | 12 | 11 | 10 | - // s7 = | 33 | 32 | 31 | 30 | - - "lwx $t0, $t4(%[src]) \n" - "lwx $t1, $t5(%[src]) \n" - "lwx $t8, $t6(%[src]) \n" - "lwx $t9, $t7(%[src]) \n" - -// t0 = | 34 | 24 | 14 | 04 | -// t1 = | 35 | 25 | 15 | 05 | -// t8 = | 36 | 26 | 16 | 06 | -// t9 = | 37 | 27 | 17 | 07 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 25 | 05 | 24 | 04 | - // s1 = | 27 | 07 | 26 | 06 | - // s2 = | 35 | 15 | 34 | 14 | - // s3 = | 37 | 17 | 36 | 16 | - - "precr.qb.ph $t0, $s1, $s0 \n" - "precrq.qb.ph $t1, $s1, $s0 \n" - "precr.qb.ph $t8, $s3, $s2 \n" - "precrq.qb.ph $t9, $s3, $s2 \n" - - // t0 = | 07 | 06 | 05 | 04 | - // t1 = | 27 | 26 | 25 | 24 | - // t8 = | 17 | 16 | 15 | 14 | - // t9 = | 37 | 36 | 35 | 34 | - - "addu $s0, %[dst], %[dst_stride] \n" - "addu $s1, $s0, %[dst_stride] \n" - "addu $s2, $s1, %[dst_stride] \n" - - "sw $s4, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $s6, 0($s0) \n" - "sw $t8, 4($s0) \n" - "sw $s5, 0($s1) \n" - "sw $t1, 4($s1) \n" - "sw $s7, 0($s2) \n" - "sw $t9, 4($s2) \n" - - "addiu $AT, -1 \n" - "addiu %[src], 4 \n" - - "bnez $AT, 1b \n" - " addu %[dst], $s2, %[dst_stride] \n" - "b 2f \n" -//dst + dst_stride unaligned - "11: \n" - "lw $t0, 0(%[src]) \n" - "lwx $t1, %[src_stride](%[src]) \n" - "lwx $t8, $t2(%[src]) \n" - "lwx $t9, $t3(%[src]) \n" - -// t0 = | 30 | 20 | 10 | 00 | -// t1 = | 31 | 21 | 11 | 01 | -// t8 = | 32 | 22 | 12 | 02 | -// t9 = | 33 | 23 | 13 | 03 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 21 | 01 | 20 | 00 | - // s1 = | 23 | 03 | 22 | 02 | - // s2 = | 31 | 11 | 30 | 10 | - // s3 = | 33 | 13 | 32 | 12 | - - "precr.qb.ph $s4, $s1, $s0 \n" - "precrq.qb.ph $s5, $s1, $s0 \n" - "precr.qb.ph $s6, $s3, $s2 \n" - "precrq.qb.ph $s7, $s3, $s2 \n" - - // s4 = | 03 | 02 | 01 | 00 | - // s5 = | 23 | 22 | 21 | 20 | - // s6 = | 13 | 12 | 11 | 10 | - // s7 = | 33 | 32 | 31 | 30 | - - "lwx $t0, $t4(%[src]) \n" - "lwx $t1, $t5(%[src]) \n" - "lwx $t8, $t6(%[src]) \n" - "lwx $t9, $t7(%[src]) \n" - -// t0 = | 34 | 24 | 14 | 04 | -// t1 = | 35 | 25 | 15 | 05 | -// t8 = | 36 | 26 | 16 | 06 | -// t9 = | 37 | 27 | 17 | 07 | - - "precr.qb.ph $s0, $t1, $t0 \n" - "precr.qb.ph $s1, $t9, $t8 \n" - "precrq.qb.ph $s2, $t1, $t0 \n" - "precrq.qb.ph $s3, $t9, $t8 \n" - - // s0 = | 25 | 05 | 24 | 04 | - // s1 = | 27 | 07 | 26 | 06 | - // s2 = | 35 | 15 | 34 | 14 | - // s3 = | 37 | 17 | 36 | 16 | - - "precr.qb.ph $t0, $s1, $s0 \n" - "precrq.qb.ph $t1, $s1, $s0 \n" - "precr.qb.ph $t8, $s3, $s2 \n" - "precrq.qb.ph $t9, $s3, $s2 \n" - - // t0 = | 07 | 06 | 05 | 04 | - // t1 = | 27 | 26 | 25 | 24 | - // t8 = | 17 | 16 | 15 | 14 | - // t9 = | 37 | 36 | 35 | 34 | - - "addu $s0, %[dst], %[dst_stride] \n" - "addu $s1, $s0, %[dst_stride] \n" - "addu $s2, $s1, %[dst_stride] \n" - - "swr $s4, 0(%[dst]) \n" - "swl $s4, 3(%[dst]) \n" - "swr $t0, 4(%[dst]) \n" - "swl $t0, 7(%[dst]) \n" - "swr $s6, 0($s0) \n" - "swl $s6, 3($s0) \n" - "swr $t8, 4($s0) \n" - "swl $t8, 7($s0) \n" - "swr $s5, 0($s1) \n" - "swl $s5, 3($s1) \n" - "swr $t1, 4($s1) \n" - "swl $t1, 7($s1) \n" - "swr $s7, 0($s2) \n" - "swl $s7, 3($s2) \n" - "swr $t9, 4($s2) \n" - "swl $t9, 7($s2) \n" - - "addiu $AT, -1 \n" - "addiu %[src], 4 \n" - - "bnez $AT, 11b \n" - " addu %[dst], $s2, %[dst_stride] \n" - "2: \n" - ".set pop \n" - ".set at \n" - :[src] "+r" (src), - [dst] "+r" (dst), - [width] "+r" (width) - :[src_stride] "r" (src_stride), - [dst_stride] "r" (dst_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9", - "s0", "s1", "s2", "s3", "s4", - "s5", "s6", "s7" - ); -} - -void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " sll $t2, %[src_stride], 0x1 \n" // src_stride x 2 - "sll $t4, %[src_stride], 0x2 \n" // src_stride x 4 - "sll $t9, %[src_stride], 0x3 \n" // src_stride x 8 - "addu $t3, $t2, %[src_stride] \n" - "addu $t5, $t4, %[src_stride] \n" - "addu $t6, $t2, $t4 \n" - "subu $t7, $t9, %[src_stride] \n" - "srl $t1, %[width], 1 \n" - -// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b - "andi $t0, %[dst_a], 0x3 \n" - "andi $t8, %[dst_b], 0x3 \n" - "or $t0, $t0, $t8 \n" - "andi $t8, %[dst_stride_a], 0x3 \n" - "andi $s5, %[dst_stride_b], 0x3 \n" - "or $t8, $t8, $s5 \n" - "or $t0, $t0, $t8 \n" - "bnez $t0, 11f \n" - " nop \n" -// dst + dst_stride word aligned (both, a & b dst addresses) - "1: \n" - "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0| - "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1| - "addu $s5, %[dst_a], %[dst_stride_a] \n" - "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2| - "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3| - "addu $s6, %[dst_b], %[dst_stride_b] \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0| - "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2| - "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2| - - "sw $s3, 0($s5) \n" - "sw $s4, 0($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0| - - "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4| - "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5| - "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6| - "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7| - "sw $s3, 0(%[dst_a]) \n" - "sw $s4, 0(%[dst_b]) \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4| - "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7| - "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6| - "sw $s3, 4($s5) \n" - "sw $s4, 4($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4| - - "addiu %[src], 4 \n" - "addiu $t1, -1 \n" - "sll $t0, %[dst_stride_a], 1 \n" - "sll $t8, %[dst_stride_b], 1 \n" - "sw $s3, 4(%[dst_a]) \n" - "sw $s4, 4(%[dst_b]) \n" - "addu %[dst_a], %[dst_a], $t0 \n" - "bnez $t1, 1b \n" - " addu %[dst_b], %[dst_b], $t8 \n" - "b 2f \n" - " nop \n" - -// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned - "11: \n" - "lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0| - "lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1| - "addu $s5, %[dst_a], %[dst_stride_a] \n" - "lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2| - "lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3| - "addu $s6, %[dst_b], %[dst_stride_b] \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0| - "precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2| - "precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2| - - "swr $s3, 0($s5) \n" - "swl $s3, 3($s5) \n" - "swr $s4, 0($s6) \n" - "swl $s4, 3($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0| - - "lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4| - "lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5| - "lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6| - "lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7| - "swr $s3, 0(%[dst_a]) \n" - "swl $s3, 3(%[dst_a]) \n" - "swr $s4, 0(%[dst_b]) \n" - "swl $s4, 3(%[dst_b]) \n" - - "precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4| - "precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7| - "precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4| - - "sll $t0, $t0, 16 \n" - "packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4| - "sll $t9, $t9, 16 \n" - "packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6| - - "swr $s3, 4($s5) \n" - "swl $s3, 7($s5) \n" - "swr $s4, 4($s6) \n" - "swl $s4, 7($s6) \n" - - "precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4| - "precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4| - - "addiu %[src], 4 \n" - "addiu $t1, -1 \n" - "sll $t0, %[dst_stride_a], 1 \n" - "sll $t8, %[dst_stride_b], 1 \n" - "swr $s3, 4(%[dst_a]) \n" - "swl $s3, 7(%[dst_a]) \n" - "swr $s4, 4(%[dst_b]) \n" - "swl $s4, 7(%[dst_b]) \n" - "addu %[dst_a], %[dst_a], $t0 \n" - "bnez $t1, 11b \n" - " addu %[dst_b], %[dst_b], $t8 \n" - - "2: \n" - ".set pop \n" - : [src] "+r" (src), - [dst_a] "+r" (dst_a), - [dst_b] "+r" (dst_b), - [width] "+r" (width), - [src_stride] "+r" (src_stride) - : [dst_stride_a] "r" (dst_stride_a), - [dst_stride_b] "r" (dst_stride_b) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9", - "s0", "s1", "s2", "s3", - "s4", "s5", "s6" - ); -} - -#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/rotate_neon.cc deleted file mode 100755 index 274c4109cd..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/rotate_neon.cc +++ /dev/null @@ -1,412 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) -static uvec8 kVTbl4x4Transpose = - { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; - -void TransposeWx8_NEON(const uint8* src, int src_stride, - uint8* dst, int dst_stride, - int width) { - const uint8* src_temp = NULL; - asm volatile ( - // loops are on blocks of 8. loop will stop when - // counter gets to or below 0. starting the counter - // at w-8 allow for this -#ifdef _ANDROID - ".fpu neon\n" -#endif - "sub %5, #8 \n" - - // handle 8x8 blocks. this should be the majority of the plane - ".p2align 2 \n" - "1: \n" - "mov %0, %1 \n" - - "vld1.8 {d0}, [%0], %2 \n" - "vld1.8 {d1}, [%0], %2 \n" - "vld1.8 {d2}, [%0], %2 \n" - "vld1.8 {d3}, [%0], %2 \n" - "vld1.8 {d4}, [%0], %2 \n" - "vld1.8 {d5}, [%0], %2 \n" - "vld1.8 {d6}, [%0], %2 \n" - "vld1.8 {d7}, [%0] \n" - - "vtrn.8 d1, d0 \n" - "vtrn.8 d3, d2 \n" - "vtrn.8 d5, d4 \n" - "vtrn.8 d7, d6 \n" - - "vtrn.16 d1, d3 \n" - "vtrn.16 d0, d2 \n" - "vtrn.16 d5, d7 \n" - "vtrn.16 d4, d6 \n" - - "vtrn.32 d1, d5 \n" - "vtrn.32 d0, d4 \n" - "vtrn.32 d3, d7 \n" - "vtrn.32 d2, d6 \n" - - "vrev16.8 q0, q0 \n" - "vrev16.8 q1, q1 \n" - "vrev16.8 q2, q2 \n" - "vrev16.8 q3, q3 \n" - - "mov %0, %3 \n" - - "vst1.8 {d1}, [%0], %4 \n" - "vst1.8 {d0}, [%0], %4 \n" - "vst1.8 {d3}, [%0], %4 \n" - "vst1.8 {d2}, [%0], %4 \n" - "vst1.8 {d5}, [%0], %4 \n" - "vst1.8 {d4}, [%0], %4 \n" - "vst1.8 {d7}, [%0], %4 \n" - "vst1.8 {d6}, [%0] \n" - - "add %1, #8 \n" // src += 8 - "add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride - "subs %5, #8 \n" // w -= 8 - "bge 1b \n" - - // add 8 back to counter. if the result is 0 there are - // no residuals. - "adds %5, #8 \n" - "beq 4f \n" - - // some residual, so between 1 and 7 lines left to transpose - "cmp %5, #2 \n" - "blt 3f \n" - - "cmp %5, #4 \n" - "blt 2f \n" - - // 4x8 block - "mov %0, %1 \n" - "vld1.32 {d0[0]}, [%0], %2 \n" - "vld1.32 {d0[1]}, [%0], %2 \n" - "vld1.32 {d1[0]}, [%0], %2 \n" - "vld1.32 {d1[1]}, [%0], %2 \n" - "vld1.32 {d2[0]}, [%0], %2 \n" - "vld1.32 {d2[1]}, [%0], %2 \n" - "vld1.32 {d3[0]}, [%0], %2 \n" - "vld1.32 {d3[1]}, [%0] \n" - - "mov %0, %3 \n" - - "vld1.8 {q3}, [%6] \n" - - "vtbl.8 d4, {d0, d1}, d6 \n" - "vtbl.8 d5, {d0, d1}, d7 \n" - "vtbl.8 d0, {d2, d3}, d6 \n" - "vtbl.8 d1, {d2, d3}, d7 \n" - - // TODO(frkoenig): Rework shuffle above to - // write out with 4 instead of 8 writes. - "vst1.32 {d4[0]}, [%0], %4 \n" - "vst1.32 {d4[1]}, [%0], %4 \n" - "vst1.32 {d5[0]}, [%0], %4 \n" - "vst1.32 {d5[1]}, [%0] \n" - - "add %0, %3, #4 \n" - "vst1.32 {d0[0]}, [%0], %4 \n" - "vst1.32 {d0[1]}, [%0], %4 \n" - "vst1.32 {d1[0]}, [%0], %4 \n" - "vst1.32 {d1[1]}, [%0] \n" - - "add %1, #4 \n" // src += 4 - "add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride - "subs %5, #4 \n" // w -= 4 - "beq 4f \n" - - // some residual, check to see if it includes a 2x8 block, - // or less - "cmp %5, #2 \n" - "blt 3f \n" - - // 2x8 block - "2: \n" - "mov %0, %1 \n" - "vld1.16 {d0[0]}, [%0], %2 \n" - "vld1.16 {d1[0]}, [%0], %2 \n" - "vld1.16 {d0[1]}, [%0], %2 \n" - "vld1.16 {d1[1]}, [%0], %2 \n" - "vld1.16 {d0[2]}, [%0], %2 \n" - "vld1.16 {d1[2]}, [%0], %2 \n" - "vld1.16 {d0[3]}, [%0], %2 \n" - "vld1.16 {d1[3]}, [%0] \n" - - "vtrn.8 d0, d1 \n" - - "mov %0, %3 \n" - - "vst1.64 {d0}, [%0], %4 \n" - "vst1.64 {d1}, [%0] \n" - - "add %1, #2 \n" // src += 2 - "add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride - "subs %5, #2 \n" // w -= 2 - "beq 4f \n" - - // 1x8 block - "3: \n" - "vld1.8 {d0[0]}, [%1], %2 \n" - "vld1.8 {d0[1]}, [%1], %2 \n" - "vld1.8 {d0[2]}, [%1], %2 \n" - "vld1.8 {d0[3]}, [%1], %2 \n" - "vld1.8 {d0[4]}, [%1], %2 \n" - "vld1.8 {d0[5]}, [%1], %2 \n" - "vld1.8 {d0[6]}, [%1], %2 \n" - "vld1.8 {d0[7]}, [%1] \n" - - "vst1.64 {d0}, [%3] \n" - - "4: \n" - - : "+r"(src_temp), // %0 - "+r"(src), // %1 - "+r"(src_stride), // %2 - "+r"(dst), // %3 - "+r"(dst_stride), // %4 - "+r"(width) // %5 - : "r"(&kVTbl4x4Transpose) // %6 - : "memory", "cc", "q0", "q1", "q2", "q3" - ); -} - -static uvec8 kVTbl4x4TransposeDi = - { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 }; - -void TransposeUVWx8_NEON(const uint8* src, int src_stride, - uint8* dst_a, int dst_stride_a, - uint8* dst_b, int dst_stride_b, - int width) { - const uint8* src_temp = NULL; - asm volatile ( - // loops are on blocks of 8. loop will stop when - // counter gets to or below 0. starting the counter - // at w-8 allow for this - "sub %7, #8 \n" - - // handle 8x8 blocks. this should be the majority of the plane - ".p2align 2 \n" - "1: \n" - "mov %0, %1 \n" - - "vld2.8 {d0, d1}, [%0], %2 \n" - "vld2.8 {d2, d3}, [%0], %2 \n" - "vld2.8 {d4, d5}, [%0], %2 \n" - "vld2.8 {d6, d7}, [%0], %2 \n" - "vld2.8 {d16, d17}, [%0], %2 \n" - "vld2.8 {d18, d19}, [%0], %2 \n" - "vld2.8 {d20, d21}, [%0], %2 \n" - "vld2.8 {d22, d23}, [%0] \n" - - "vtrn.8 q1, q0 \n" - "vtrn.8 q3, q2 \n" - "vtrn.8 q9, q8 \n" - "vtrn.8 q11, q10 \n" - - "vtrn.16 q1, q3 \n" - "vtrn.16 q0, q2 \n" - "vtrn.16 q9, q11 \n" - "vtrn.16 q8, q10 \n" - - "vtrn.32 q1, q9 \n" - "vtrn.32 q0, q8 \n" - "vtrn.32 q3, q11 \n" - "vtrn.32 q2, q10 \n" - - "vrev16.8 q0, q0 \n" - "vrev16.8 q1, q1 \n" - "vrev16.8 q2, q2 \n" - "vrev16.8 q3, q3 \n" - "vrev16.8 q8, q8 \n" - "vrev16.8 q9, q9 \n" - "vrev16.8 q10, q10 \n" - "vrev16.8 q11, q11 \n" - - "mov %0, %3 \n" - - "vst1.8 {d2}, [%0], %4 \n" - "vst1.8 {d0}, [%0], %4 \n" - "vst1.8 {d6}, [%0], %4 \n" - "vst1.8 {d4}, [%0], %4 \n" - "vst1.8 {d18}, [%0], %4 \n" - "vst1.8 {d16}, [%0], %4 \n" - "vst1.8 {d22}, [%0], %4 \n" - "vst1.8 {d20}, [%0] \n" - - "mov %0, %5 \n" - - "vst1.8 {d3}, [%0], %6 \n" - "vst1.8 {d1}, [%0], %6 \n" - "vst1.8 {d7}, [%0], %6 \n" - "vst1.8 {d5}, [%0], %6 \n" - "vst1.8 {d19}, [%0], %6 \n" - "vst1.8 {d17}, [%0], %6 \n" - "vst1.8 {d23}, [%0], %6 \n" - "vst1.8 {d21}, [%0] \n" - - "add %1, #8*2 \n" // src += 8*2 - "add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a - "add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b - "subs %7, #8 \n" // w -= 8 - "bge 1b \n" - - // add 8 back to counter. if the result is 0 there are - // no residuals. - "adds %7, #8 \n" - "beq 4f \n" - - // some residual, so between 1 and 7 lines left to transpose - "cmp %7, #2 \n" - "blt 3f \n" - - "cmp %7, #4 \n" - "blt 2f \n" - - //TODO(frkoenig): Clean this up - // 4x8 block - "mov %0, %1 \n" - "vld1.64 {d0}, [%0], %2 \n" - "vld1.64 {d1}, [%0], %2 \n" - "vld1.64 {d2}, [%0], %2 \n" - "vld1.64 {d3}, [%0], %2 \n" - "vld1.64 {d4}, [%0], %2 \n" - "vld1.64 {d5}, [%0], %2 \n" - "vld1.64 {d6}, [%0], %2 \n" - "vld1.64 {d7}, [%0] \n" - - "vld1.8 {q15}, [%8] \n" - - "vtrn.8 q0, q1 \n" - "vtrn.8 q2, q3 \n" - - "vtbl.8 d16, {d0, d1}, d30 \n" - "vtbl.8 d17, {d0, d1}, d31 \n" - "vtbl.8 d18, {d2, d3}, d30 \n" - "vtbl.8 d19, {d2, d3}, d31 \n" - "vtbl.8 d20, {d4, d5}, d30 \n" - "vtbl.8 d21, {d4, d5}, d31 \n" - "vtbl.8 d22, {d6, d7}, d30 \n" - "vtbl.8 d23, {d6, d7}, d31 \n" - - "mov %0, %3 \n" - - "vst1.32 {d16[0]}, [%0], %4 \n" - "vst1.32 {d16[1]}, [%0], %4 \n" - "vst1.32 {d17[0]}, [%0], %4 \n" - "vst1.32 {d17[1]}, [%0], %4 \n" - - "add %0, %3, #4 \n" - "vst1.32 {d20[0]}, [%0], %4 \n" - "vst1.32 {d20[1]}, [%0], %4 \n" - "vst1.32 {d21[0]}, [%0], %4 \n" - "vst1.32 {d21[1]}, [%0] \n" - - "mov %0, %5 \n" - - "vst1.32 {d18[0]}, [%0], %6 \n" - "vst1.32 {d18[1]}, [%0], %6 \n" - "vst1.32 {d19[0]}, [%0], %6 \n" - "vst1.32 {d19[1]}, [%0], %6 \n" - - "add %0, %5, #4 \n" - "vst1.32 {d22[0]}, [%0], %6 \n" - "vst1.32 {d22[1]}, [%0], %6 \n" - "vst1.32 {d23[0]}, [%0], %6 \n" - "vst1.32 {d23[1]}, [%0] \n" - - "add %1, #4*2 \n" // src += 4 * 2 - "add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a - "add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b - "subs %7, #4 \n" // w -= 4 - "beq 4f \n" - - // some residual, check to see if it includes a 2x8 block, - // or less - "cmp %7, #2 \n" - "blt 3f \n" - - // 2x8 block - "2: \n" - "mov %0, %1 \n" - "vld2.16 {d0[0], d2[0]}, [%0], %2 \n" - "vld2.16 {d1[0], d3[0]}, [%0], %2 \n" - "vld2.16 {d0[1], d2[1]}, [%0], %2 \n" - "vld2.16 {d1[1], d3[1]}, [%0], %2 \n" - "vld2.16 {d0[2], d2[2]}, [%0], %2 \n" - "vld2.16 {d1[2], d3[2]}, [%0], %2 \n" - "vld2.16 {d0[3], d2[3]}, [%0], %2 \n" - "vld2.16 {d1[3], d3[3]}, [%0] \n" - - "vtrn.8 d0, d1 \n" - "vtrn.8 d2, d3 \n" - - "mov %0, %3 \n" - - "vst1.64 {d0}, [%0], %4 \n" - "vst1.64 {d2}, [%0] \n" - - "mov %0, %5 \n" - - "vst1.64 {d1}, [%0], %6 \n" - "vst1.64 {d3}, [%0] \n" - - "add %1, #2*2 \n" // src += 2 * 2 - "add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a - "add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b - "subs %7, #2 \n" // w -= 2 - "beq 4f \n" - - // 1x8 block - "3: \n" - "vld2.8 {d0[0], d1[0]}, [%1], %2 \n" - "vld2.8 {d0[1], d1[1]}, [%1], %2 \n" - "vld2.8 {d0[2], d1[2]}, [%1], %2 \n" - "vld2.8 {d0[3], d1[3]}, [%1], %2 \n" - "vld2.8 {d0[4], d1[4]}, [%1], %2 \n" - "vld2.8 {d0[5], d1[5]}, [%1], %2 \n" - "vld2.8 {d0[6], d1[6]}, [%1], %2 \n" - "vld2.8 {d0[7], d1[7]}, [%1] \n" - - "vst1.64 {d0}, [%3] \n" - "vst1.64 {d1}, [%5] \n" - - "4: \n" - - : "+r"(src_temp), // %0 - "+r"(src), // %1 - "+r"(src_stride), // %2 - "+r"(dst_a), // %3 - "+r"(dst_stride_a), // %4 - "+r"(dst_b), // %5 - "+r"(dst_stride_b), // %6 - "+r"(width) // %7 - : "r"(&kVTbl4x4TransposeDi) // %8 - : "memory", "cc", - "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" - ); -} -#endif - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_any.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_any.cc deleted file mode 100755 index 90c6a3ff5f..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/row_any.cc +++ /dev/null @@ -1,542 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// TODO(fbarchard): Consider 'any' functions handling any quantity of pixels. -// TODO(fbarchard): Consider 'any' functions handling odd alignment. -// YUV to RGB does multiple of 8 with SIMD and remainder with C. -#define YANY(NAMEANY, I420TORGB_SIMD, I420TORGB_C, UV_SHIFT, BPP, MASK) \ - void NAMEANY(const uint8* y_buf, \ - const uint8* u_buf, \ - const uint8* v_buf, \ - uint8* rgb_buf, \ - int width) { \ - int n = width & ~MASK; \ - I420TORGB_SIMD(y_buf, u_buf, v_buf, rgb_buf, n); \ - I420TORGB_C(y_buf + n, \ - u_buf + (n >> UV_SHIFT), \ - v_buf + (n >> UV_SHIFT), \ - rgb_buf + n * BPP, width & MASK); \ - } - -#ifdef HAS_I422TOARGBROW_SSSE3 -YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C, - 0, 4, 7) -YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C, - 1, 4, 7) -YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C, - 2, 4, 7) -YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C, - 1, 4, 7) -YANY(I422ToABGRRow_Any_SSSE3, I422ToABGRRow_Unaligned_SSSE3, I422ToABGRRow_C, - 1, 4, 7) -YANY(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_Unaligned_SSSE3, I422ToRGBARow_C, - 1, 4, 7) -// I422ToRGB565Row_SSSE3 is unaligned. -YANY(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, I422ToARGB4444Row_C, - 1, 2, 7) -YANY(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, I422ToARGB1555Row_C, - 1, 2, 7) -YANY(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, I422ToRGB565Row_C, - 1, 2, 7) -// I422ToRGB24Row_SSSE3 is unaligned. -YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7) -YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7) -YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15) -YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15) -#endif // HAS_I422TOARGBROW_SSSE3 -#ifdef HAS_I422TOARGBROW_AVX2 -YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15) -#endif // HAS_I422TOARGBROW_AVX2 -#ifdef HAS_I422TOARGBROW_NEON -YANY(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, I444ToARGBRow_C, 0, 4, 7) -YANY(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, I422ToARGBRow_C, 1, 4, 7) -YANY(I411ToARGBRow_Any_NEON, I411ToARGBRow_NEON, I411ToARGBRow_C, 2, 4, 7) -YANY(I422ToBGRARow_Any_NEON, I422ToBGRARow_NEON, I422ToBGRARow_C, 1, 4, 7) -YANY(I422ToABGRRow_Any_NEON, I422ToABGRRow_NEON, I422ToABGRRow_C, 1, 4, 7) -YANY(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, I422ToRGBARow_C, 1, 4, 7) -YANY(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, I422ToRGB24Row_C, 1, 3, 7) -YANY(I422ToRAWRow_Any_NEON, I422ToRAWRow_NEON, I422ToRAWRow_C, 1, 3, 7) -YANY(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, I422ToARGB4444Row_C, - 1, 2, 7) -YANY(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, I422ToARGB1555Row_C, - 1, 2, 7) -YANY(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, I422ToRGB565Row_C, 1, 2, 7) -YANY(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, I422ToYUY2Row_C, 1, 2, 15) -YANY(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, I422ToUYVYRow_C, 1, 2, 15) -#endif // HAS_I422TOARGBROW_NEON -#undef YANY - -// Wrappers to handle odd width -#define NV2NY(NAMEANY, NV12TORGB_SIMD, NV12TORGB_C, UV_SHIFT, BPP) \ - void NAMEANY(const uint8* y_buf, \ - const uint8* uv_buf, \ - uint8* rgb_buf, \ - int width) { \ - int n = width & ~7; \ - NV12TORGB_SIMD(y_buf, uv_buf, rgb_buf, n); \ - NV12TORGB_C(y_buf + n, \ - uv_buf + (n >> UV_SHIFT), \ - rgb_buf + n * BPP, width & 7); \ - } - -#ifdef HAS_NV12TOARGBROW_SSSE3 -NV2NY(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_Unaligned_SSSE3, NV12ToARGBRow_C, - 0, 4) -NV2NY(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_Unaligned_SSSE3, NV21ToARGBRow_C, - 0, 4) -#endif // HAS_NV12TOARGBROW_SSSE3 -#ifdef HAS_NV12TOARGBROW_NEON -NV2NY(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, NV12ToARGBRow_C, 0, 4) -NV2NY(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, NV21ToARGBRow_C, 0, 4) -#endif // HAS_NV12TOARGBROW_NEON -#ifdef HAS_NV12TORGB565ROW_SSSE3 -NV2NY(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, NV12ToRGB565Row_C, - 0, 2) -NV2NY(NV21ToRGB565Row_Any_SSSE3, NV21ToRGB565Row_SSSE3, NV21ToRGB565Row_C, - 0, 2) -#endif // HAS_NV12TORGB565ROW_SSSE3 -#ifdef HAS_NV12TORGB565ROW_NEON -NV2NY(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, NV12ToRGB565Row_C, 0, 2) -NV2NY(NV21ToRGB565Row_Any_NEON, NV21ToRGB565Row_NEON, NV21ToRGB565Row_C, 0, 2) -#endif // HAS_NV12TORGB565ROW_NEON -#undef NVANY - -#define RGBANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \ - void NAMEANY(const uint8* src, \ - uint8* dst, \ - int width) { \ - int n = width & ~MASK; \ - ARGBTORGB_SIMD(src, dst, n); \ - ARGBTORGB_C(src + n * SBPP, dst + n * BPP, width & MASK); \ - } - -#if defined(HAS_ARGBTORGB24ROW_SSSE3) -RGBANY(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, ARGBToRGB24Row_C, - 15, 4, 3) -RGBANY(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, ARGBToRAWRow_C, - 15, 4, 3) -RGBANY(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, ARGBToRGB565Row_C, - 3, 4, 2) -RGBANY(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, ARGBToARGB1555Row_C, - 3, 4, 2) -RGBANY(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, ARGBToARGB4444Row_C, - 3, 4, 2) -#endif -#if defined(HAS_I400TOARGBROW_SSE2) -RGBANY(I400ToARGBRow_Any_SSE2, I400ToARGBRow_Unaligned_SSE2, I400ToARGBRow_C, - 7, 1, 4) -#endif -#if defined(HAS_YTOARGBROW_SSE2) -RGBANY(YToARGBRow_Any_SSE2, YToARGBRow_SSE2, YToARGBRow_C, - 7, 1, 4) -RGBANY(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_Unaligned_SSSE3, YUY2ToARGBRow_C, - 15, 2, 4) -RGBANY(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_Unaligned_SSSE3, UYVYToARGBRow_C, - 15, 2, 4) -// These require alignment on ARGB, so C is used for remainder. -RGBANY(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, RGB24ToARGBRow_C, - 15, 3, 4) -RGBANY(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, RAWToARGBRow_C, - 15, 3, 4) -RGBANY(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, RGB565ToARGBRow_C, - 7, 2, 4) -RGBANY(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, ARGB1555ToARGBRow_C, - 7, 2, 4) -RGBANY(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, ARGB4444ToARGBRow_C, - 7, 2, 4) -#endif -#if defined(HAS_ARGBTORGB24ROW_NEON) -RGBANY(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, ARGBToRGB24Row_C, 7, 4, 3) -RGBANY(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, ARGBToRAWRow_C, 7, 4, 3) -RGBANY(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, ARGBToRGB565Row_C, - 7, 4, 2) -RGBANY(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, ARGBToARGB1555Row_C, - 7, 4, 2) -RGBANY(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, ARGBToARGB4444Row_C, - 7, 4, 2) -RGBANY(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, I400ToARGBRow_C, - 7, 1, 4) -RGBANY(YToARGBRow_Any_NEON, YToARGBRow_NEON, YToARGBRow_C, - 7, 1, 4) -RGBANY(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, YUY2ToARGBRow_C, - 7, 2, 4) -RGBANY(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, UYVYToARGBRow_C, - 7, 2, 4) -#endif -#undef RGBANY - -// ARGB to Bayer does multiple of 4 pixels, SSSE3 aligned src, unaligned dst. -#define BAYERANY(NAMEANY, ARGBTORGB_SIMD, ARGBTORGB_C, MASK, SBPP, BPP) \ - void NAMEANY(const uint8* src, \ - uint8* dst, uint32 selector, \ - int width) { \ - int n = width & ~MASK; \ - ARGBTORGB_SIMD(src, dst, selector, n); \ - ARGBTORGB_C(src + n * SBPP, dst + n * BPP, selector, width & MASK); \ - } - -#if defined(HAS_ARGBTOBAYERROW_SSSE3) -BAYERANY(ARGBToBayerRow_Any_SSSE3, ARGBToBayerRow_SSSE3, ARGBToBayerRow_C, - 7, 4, 1) -#endif -#if defined(HAS_ARGBTOBAYERROW_NEON) -BAYERANY(ARGBToBayerRow_Any_NEON, ARGBToBayerRow_NEON, ARGBToBayerRow_C, - 7, 4, 1) -#endif -#if defined(HAS_ARGBTOBAYERGGROW_SSE2) -BAYERANY(ARGBToBayerGGRow_Any_SSE2, ARGBToBayerGGRow_SSE2, ARGBToBayerGGRow_C, - 7, 4, 1) -#endif -#if defined(HAS_ARGBTOBAYERGGROW_NEON) -BAYERANY(ARGBToBayerGGRow_Any_NEON, ARGBToBayerGGRow_NEON, ARGBToBayerGGRow_C, - 7, 4, 1) -#endif - -#undef BAYERANY - -// RGB/YUV to Y does multiple of 16 with SIMD and last 16 with SIMD. -#define YANY(NAMEANY, ARGBTOY_SIMD, SBPP, BPP, NUM) \ - void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ - ARGBTOY_SIMD(src_argb, dst_y, width - NUM); \ - ARGBTOY_SIMD(src_argb + (width - NUM) * SBPP, \ - dst_y + (width - NUM) * BPP, NUM); \ - } - -#ifdef HAS_ARGBTOYROW_AVX2 -YANY(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 4, 1, 32) -YANY(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 4, 1, 32) -YANY(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 2, 1, 32) -YANY(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 2, 1, 32) -#endif -#ifdef HAS_ARGBTOYROW_SSSE3 -YANY(ARGBToYRow_Any_SSSE3, ARGBToYRow_Unaligned_SSSE3, 4, 1, 16) -#endif -#ifdef HAS_BGRATOYROW_SSSE3 -YANY(BGRAToYRow_Any_SSSE3, BGRAToYRow_Unaligned_SSSE3, 4, 1, 16) -YANY(ABGRToYRow_Any_SSSE3, ABGRToYRow_Unaligned_SSSE3, 4, 1, 16) -YANY(RGBAToYRow_Any_SSSE3, RGBAToYRow_Unaligned_SSSE3, 4, 1, 16) -YANY(YUY2ToYRow_Any_SSE2, YUY2ToYRow_Unaligned_SSE2, 2, 1, 16) -YANY(UYVYToYRow_Any_SSE2, UYVYToYRow_Unaligned_SSE2, 2, 1, 16) -#endif -#ifdef HAS_ARGBTOYJROW_SSSE3 -YANY(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_Unaligned_SSSE3, 4, 1, 16) -#endif -#ifdef HAS_ARGBTOYROW_NEON -YANY(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 4, 1, 8) -YANY(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 4, 1, 8) -YANY(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 4, 1, 8) -YANY(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 4, 1, 8) -YANY(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 4, 1, 8) -YANY(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 3, 1, 8) -YANY(RAWToYRow_Any_NEON, RAWToYRow_NEON, 3, 1, 8) -YANY(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 2, 1, 8) -YANY(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 2, 1, 8) -YANY(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 2, 1, 8) -YANY(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 2, 1, 16) -YANY(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 2, 1, 16) -YANY(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 3, 4, 8) -YANY(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 3, 4, 8) -YANY(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 2, 4, 8) -YANY(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 2, 4, 8) -YANY(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 2, 4, 8) -#endif -#undef YANY - -#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_argb, uint8* dst_y, int width) { \ - int n = width & ~MASK; \ - ARGBTOY_SIMD(src_argb, dst_y, n); \ - ARGBTOY_C(src_argb + n * SBPP, \ - dst_y + n * BPP, width & MASK); \ - } - -// Attenuate is destructive so last16 method can not be used due to overlap. -#ifdef HAS_ARGBATTENUATEROW_SSSE3 -YANY(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, ARGBAttenuateRow_C, - 4, 4, 3) -#endif -#ifdef HAS_ARGBATTENUATEROW_SSE2 -YANY(ARGBAttenuateRow_Any_SSE2, ARGBAttenuateRow_SSE2, ARGBAttenuateRow_C, - 4, 4, 3) -#endif -#ifdef HAS_ARGBUNATTENUATEROW_SSE2 -YANY(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, ARGBUnattenuateRow_C, - 4, 4, 3) -#endif -#ifdef HAS_ARGBATTENUATEROW_AVX2 -YANY(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, ARGBAttenuateRow_C, - 4, 4, 7) -#endif -#ifdef HAS_ARGBUNATTENUATEROW_AVX2 -YANY(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, ARGBUnattenuateRow_C, - 4, 4, 7) -#endif -#ifdef HAS_ARGBATTENUATEROW_NEON -YANY(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, ARGBAttenuateRow_C, - 4, 4, 7) -#endif -#undef YANY - -// RGB/YUV to UV does multiple of 16 with SIMD and remainder with C. -#define UVANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK) \ - void NAMEANY(const uint8* src_argb, int src_stride_argb, \ - uint8* dst_u, uint8* dst_v, int width) { \ - int n = width & ~MASK; \ - ANYTOUV_SIMD(src_argb, src_stride_argb, dst_u, dst_v, n); \ - ANYTOUV_C(src_argb + n * BPP, src_stride_argb, \ - dst_u + (n >> 1), \ - dst_v + (n >> 1), \ - width & MASK); \ - } - -#ifdef HAS_ARGBTOUVROW_AVX2 -UVANY(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, ARGBToUVRow_C, 4, 31) -UVANY(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, YUY2ToUVRow_C, 2, 31) -UVANY(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, UYVYToUVRow_C, 2, 31) -#endif -#ifdef HAS_ARGBTOUVROW_SSSE3 -UVANY(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_Unaligned_SSSE3, ARGBToUVRow_C, 4, 15) -UVANY(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_Unaligned_SSSE3, ARGBToUVJRow_C, - 4, 15) -UVANY(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_Unaligned_SSSE3, BGRAToUVRow_C, 4, 15) -UVANY(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_Unaligned_SSSE3, ABGRToUVRow_C, 4, 15) -UVANY(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_Unaligned_SSSE3, RGBAToUVRow_C, 4, 15) -UVANY(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_Unaligned_SSE2, YUY2ToUVRow_C, 2, 15) -UVANY(UYVYToUVRow_Any_SSE2, UYVYToUVRow_Unaligned_SSE2, UYVYToUVRow_C, 2, 15) -#endif -#ifdef HAS_ARGBTOUVROW_NEON -UVANY(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, ARGBToUVRow_C, 4, 15) -UVANY(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, ARGBToUVJRow_C, 4, 15) -UVANY(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, BGRAToUVRow_C, 4, 15) -UVANY(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, ABGRToUVRow_C, 4, 15) -UVANY(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, RGBAToUVRow_C, 4, 15) -UVANY(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, RGB24ToUVRow_C, 3, 15) -UVANY(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, RAWToUVRow_C, 3, 15) -UVANY(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, RGB565ToUVRow_C, 2, 15) -UVANY(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, ARGB1555ToUVRow_C, 2, 15) -UVANY(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, ARGB4444ToUVRow_C, 2, 15) -UVANY(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, YUY2ToUVRow_C, 2, 15) -UVANY(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, UYVYToUVRow_C, 2, 15) -#endif -#undef UVANY - -#define UV422ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, BPP, MASK, SHIFT) \ - void NAMEANY(const uint8* src_uv, \ - uint8* dst_u, uint8* dst_v, int width) { \ - int n = width & ~MASK; \ - ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \ - ANYTOUV_C(src_uv + n * BPP, \ - dst_u + (n >> SHIFT), \ - dst_v + (n >> SHIFT), \ - width & MASK); \ - } - -#ifdef HAS_ARGBTOUV444ROW_SSSE3 -UV422ANY(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_Unaligned_SSSE3, - ARGBToUV444Row_C, 4, 15, 0) -#endif -#ifdef HAS_YUY2TOUV422ROW_AVX2 -UV422ANY(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, - YUY2ToUV422Row_C, 2, 31, 1) -UV422ANY(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, - UYVYToUV422Row_C, 2, 31, 1) -#endif -#ifdef HAS_ARGBTOUVROW_SSSE3 -UV422ANY(ARGBToUV422Row_Any_SSSE3, ARGBToUV422Row_Unaligned_SSSE3, - ARGBToUV422Row_C, 4, 15, 1) -UV422ANY(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_Unaligned_SSE2, - YUY2ToUV422Row_C, 2, 15, 1) -UV422ANY(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_Unaligned_SSE2, - UYVYToUV422Row_C, 2, 15, 1) -#endif -#ifdef HAS_YUY2TOUV422ROW_NEON -UV422ANY(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, - ARGBToUV444Row_C, 4, 7, 0) -UV422ANY(ARGBToUV422Row_Any_NEON, ARGBToUV422Row_NEON, - ARGBToUV422Row_C, 4, 15, 1) -UV422ANY(ARGBToUV411Row_Any_NEON, ARGBToUV411Row_NEON, - ARGBToUV411Row_C, 4, 31, 2) -UV422ANY(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, - YUY2ToUV422Row_C, 2, 15, 1) -UV422ANY(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, - UYVYToUV422Row_C, 2, 15, 1) -#endif -#undef UV422ANY - -#define SPLITUVROWANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ - void NAMEANY(const uint8* src_uv, \ - uint8* dst_u, uint8* dst_v, int width) { \ - int n = width & ~MASK; \ - ANYTOUV_SIMD(src_uv, dst_u, dst_v, n); \ - ANYTOUV_C(src_uv + n * 2, \ - dst_u + n, \ - dst_v + n, \ - width & MASK); \ - } - -#ifdef HAS_SPLITUVROW_SSE2 -SPLITUVROWANY(SplitUVRow_Any_SSE2, SplitUVRow_Unaligned_SSE2, SplitUVRow_C, 15) -#endif -#ifdef HAS_SPLITUVROW_AVX2 -SPLITUVROWANY(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, SplitUVRow_C, 31) -#endif -#ifdef HAS_SPLITUVROW_NEON -SPLITUVROWANY(SplitUVRow_Any_NEON, SplitUVRow_NEON, SplitUVRow_C, 15) -#endif -#ifdef HAS_SPLITUVROW_MIPS_DSPR2 -SPLITUVROWANY(SplitUVRow_Any_MIPS_DSPR2, SplitUVRow_Unaligned_MIPS_DSPR2, - SplitUVRow_C, 15) -#endif -#undef SPLITUVROWANY - -#define MERGEUVROW_ANY(NAMEANY, ANYTOUV_SIMD, ANYTOUV_C, MASK) \ - void NAMEANY(const uint8* src_u, const uint8* src_v, \ - uint8* dst_uv, int width) { \ - int n = width & ~MASK; \ - ANYTOUV_SIMD(src_u, src_v, dst_uv, n); \ - ANYTOUV_C(src_u + n, \ - src_v + n, \ - dst_uv + n * 2, \ - width & MASK); \ - } - -#ifdef HAS_MERGEUVROW_SSE2 -MERGEUVROW_ANY(MergeUVRow_Any_SSE2, MergeUVRow_Unaligned_SSE2, MergeUVRow_C, 15) -#endif -#ifdef HAS_MERGEUVROW_AVX2 -MERGEUVROW_ANY(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, MergeUVRow_C, 31) -#endif -#ifdef HAS_MERGEUVROW_NEON -MERGEUVROW_ANY(MergeUVRow_Any_NEON, MergeUVRow_NEON, MergeUVRow_C, 15) -#endif -#undef MERGEUVROW_ANY - -#define MATHROW_ANY(NAMEANY, ARGBMATH_SIMD, ARGBMATH_C, MASK) \ - void NAMEANY(const uint8* src_argb0, const uint8* src_argb1, \ - uint8* dst_argb, int width) { \ - int n = width & ~MASK; \ - ARGBMATH_SIMD(src_argb0, src_argb1, dst_argb, n); \ - ARGBMATH_C(src_argb0 + n * 4, \ - src_argb1 + n * 4, \ - dst_argb + n * 4, \ - width & MASK); \ - } - -#ifdef HAS_ARGBMULTIPLYROW_SSE2 -MATHROW_ANY(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, ARGBMultiplyRow_C, - 3) -#endif -#ifdef HAS_ARGBADDROW_SSE2 -MATHROW_ANY(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, ARGBAddRow_C, 3) -#endif -#ifdef HAS_ARGBSUBTRACTROW_SSE2 -MATHROW_ANY(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, ARGBSubtractRow_C, - 3) -#endif -#ifdef HAS_ARGBMULTIPLYROW_AVX2 -MATHROW_ANY(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, ARGBMultiplyRow_C, - 7) -#endif -#ifdef HAS_ARGBADDROW_AVX2 -MATHROW_ANY(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, ARGBAddRow_C, 7) -#endif -#ifdef HAS_ARGBSUBTRACTROW_AVX2 -MATHROW_ANY(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, ARGBSubtractRow_C, - 7) -#endif -#ifdef HAS_ARGBMULTIPLYROW_NEON -MATHROW_ANY(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, ARGBMultiplyRow_C, - 7) -#endif -#ifdef HAS_ARGBADDROW_NEON -MATHROW_ANY(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, ARGBAddRow_C, 7) -#endif -#ifdef HAS_ARGBSUBTRACTROW_NEON -MATHROW_ANY(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, ARGBSubtractRow_C, - 7) -#endif -#undef MATHROW_ANY - -// Shuffle may want to work in place, so last16 method can not be used. -#define YANY(NAMEANY, ARGBTOY_SIMD, ARGBTOY_C, SBPP, BPP, MASK) \ - void NAMEANY(const uint8* src_argb, uint8* dst_argb, \ - const uint8* shuffler, int width) { \ - int n = width & ~MASK; \ - ARGBTOY_SIMD(src_argb, dst_argb, shuffler, n); \ - ARGBTOY_C(src_argb + n * SBPP, \ - dst_argb + n * BPP, shuffler, width & MASK); \ - } - -#ifdef HAS_ARGBSHUFFLEROW_SSE2 -YANY(ARGBShuffleRow_Any_SSE2, ARGBShuffleRow_SSE2, - ARGBShuffleRow_C, 4, 4, 3) -#endif -#ifdef HAS_ARGBSHUFFLEROW_SSSE3 -YANY(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_Unaligned_SSSE3, - ARGBShuffleRow_C, 4, 4, 7) -#endif -#ifdef HAS_ARGBSHUFFLEROW_AVX2 -YANY(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, - ARGBShuffleRow_C, 4, 4, 15) -#endif -#ifdef HAS_ARGBSHUFFLEROW_NEON -YANY(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, - ARGBShuffleRow_C, 4, 4, 3) -#endif -#undef YANY - -// Interpolate may want to work in place, so last16 method can not be used. -#define NANY(NAMEANY, TERP_SIMD, TERP_C, SBPP, BPP, MASK) \ - void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \ - ptrdiff_t src_stride_ptr, int width, \ - int source_y_fraction) { \ - int n = width & ~MASK; \ - TERP_SIMD(dst_ptr, src_ptr, src_stride_ptr, \ - n, source_y_fraction); \ - TERP_C(dst_ptr + n * BPP, \ - src_ptr + n * SBPP, src_stride_ptr, \ - width & MASK, source_y_fraction); \ - } - -#ifdef HAS_INTERPOLATEROW_AVX2 -NANY(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, - InterpolateRow_C, 1, 1, 32) -#endif -#ifdef HAS_INTERPOLATEROW_SSSE3 -NANY(InterpolateRow_Any_SSSE3, InterpolateRow_Unaligned_SSSE3, - InterpolateRow_C, 1, 1, 15) -#endif -#ifdef HAS_INTERPOLATEROW_SSE2 -NANY(InterpolateRow_Any_SSE2, InterpolateRow_Unaligned_SSE2, - InterpolateRow_C, 1, 1, 15) -#endif -#ifdef HAS_INTERPOLATEROW_NEON -NANY(InterpolateRow_Any_NEON, InterpolateRow_NEON, - InterpolateRow_C, 1, 1, 15) -#endif -#ifdef HAS_INTERPOLATEROW_MIPS_DSPR2 -NANY(InterpolateRow_Any_MIPS_DSPR2, InterpolateRow_MIPS_DSPR2, - InterpolateRow_C, 1, 1, 3) -#endif -#undef NANY - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_common.cc deleted file mode 100755 index 135bdc9084..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/row_common.cc +++ /dev/null @@ -1,2247 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#include <string.h> // For memcpy and memset. - -#include "libyuv/basic_types.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// llvm x86 is poor at ternary operator, so use branchless min/max. - -#define USE_BRANCHLESS 1 -#if USE_BRANCHLESS -static __inline int32 clamp0(int32 v) { - return ((-(v) >> 31) & (v)); -} - -static __inline int32 clamp255(int32 v) { - return (((255 - (v)) >> 31) | (v)) & 255; -} - -static __inline uint32 Clamp(int32 val) { - int v = clamp0(val); - return (uint32)(clamp255(v)); -} - -static __inline uint32 Abs(int32 v) { - int m = v >> 31; - return (v + m) ^ m; -} -#else // USE_BRANCHLESS -static __inline int32 clamp0(int32 v) { - return (v < 0) ? 0 : v; -} - -static __inline int32 clamp255(int32 v) { - return (v > 255) ? 255 : v; -} - -static __inline uint32 Clamp(int32 val) { - int v = clamp0(val); - return (uint32)(clamp255(v)); -} - -static __inline uint32 Abs(int32 v) { - return (v < 0) ? -v : v; -} -#endif // USE_BRANCHLESS - -#ifdef LIBYUV_LITTLE_ENDIAN -#define WRITEWORD(p, v) *(uint32*)(p) = v -#else -static inline void WRITEWORD(uint8* p, uint32 v) { - p[0] = (uint8)(v & 255); - p[1] = (uint8)((v >> 8) & 255); - p[2] = (uint8)((v >> 16) & 255); - p[3] = (uint8)((v >> 24) & 255); -} -#endif - -void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 b = src_rgb24[0]; - uint8 g = src_rgb24[1]; - uint8 r = src_rgb24[2]; - dst_argb[0] = b; - dst_argb[1] = g; - dst_argb[2] = r; - dst_argb[3] = 255u; - dst_argb += 4; - src_rgb24 += 3; - } -} - -void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 r = src_raw[0]; - uint8 g = src_raw[1]; - uint8 b = src_raw[2]; - dst_argb[0] = b; - dst_argb[1] = g; - dst_argb[2] = r; - dst_argb[3] = 255u; - dst_argb += 4; - src_raw += 3; - } -} - -void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 b = src_rgb565[0] & 0x1f; - uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r = src_rgb565[1] >> 3; - dst_argb[0] = (b << 3) | (b >> 2); - dst_argb[1] = (g << 2) | (g >> 4); - dst_argb[2] = (r << 3) | (r >> 2); - dst_argb[3] = 255u; - dst_argb += 4; - src_rgb565 += 2; - } -} - -void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb, - int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 b = src_argb1555[0] & 0x1f; - uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r = (src_argb1555[1] & 0x7c) >> 2; - uint8 a = src_argb1555[1] >> 7; - dst_argb[0] = (b << 3) | (b >> 2); - dst_argb[1] = (g << 3) | (g >> 2); - dst_argb[2] = (r << 3) | (r >> 2); - dst_argb[3] = -a; - dst_argb += 4; - src_argb1555 += 2; - } -} - -void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb, - int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 b = src_argb4444[0] & 0x0f; - uint8 g = src_argb4444[0] >> 4; - uint8 r = src_argb4444[1] & 0x0f; - uint8 a = src_argb4444[1] >> 4; - dst_argb[0] = (b << 4) | b; - dst_argb[1] = (g << 4) | g; - dst_argb[2] = (r << 4) | r; - dst_argb[3] = (a << 4) | a; - dst_argb += 4; - src_argb4444 += 2; - } -} - -void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 b = src_argb[0]; - uint8 g = src_argb[1]; - uint8 r = src_argb[2]; - dst_rgb[0] = b; - dst_rgb[1] = g; - dst_rgb[2] = r; - dst_rgb += 3; - src_argb += 4; - } -} - -void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 b = src_argb[0]; - uint8 g = src_argb[1]; - uint8 r = src_argb[2]; - dst_rgb[0] = r; - dst_rgb[1] = g; - dst_rgb[2] = b; - dst_rgb += 3; - src_argb += 4; - } -} - -void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 2; - uint8 r0 = src_argb[2] >> 3; - uint8 b1 = src_argb[4] >> 3; - uint8 g1 = src_argb[5] >> 2; - uint8 r1 = src_argb[6] >> 3; - WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | - (b1 << 16) | (g1 << 21) | (r1 << 27)); - dst_rgb += 4; - src_argb += 8; - } - if (width & 1) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 2; - uint8 r0 = src_argb[2] >> 3; - *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); - } -} - -void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 3; - uint8 r0 = src_argb[2] >> 3; - uint8 a0 = src_argb[3] >> 7; - uint8 b1 = src_argb[4] >> 3; - uint8 g1 = src_argb[5] >> 3; - uint8 r1 = src_argb[6] >> 3; - uint8 a1 = src_argb[7] >> 7; - *(uint32*)(dst_rgb) = - b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | - (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); - dst_rgb += 4; - src_argb += 8; - } - if (width & 1) { - uint8 b0 = src_argb[0] >> 3; - uint8 g0 = src_argb[1] >> 3; - uint8 r0 = src_argb[2] >> 3; - uint8 a0 = src_argb[3] >> 7; - *(uint16*)(dst_rgb) = - b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); - } -} - -void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb[0] >> 4; - uint8 g0 = src_argb[1] >> 4; - uint8 r0 = src_argb[2] >> 4; - uint8 a0 = src_argb[3] >> 4; - uint8 b1 = src_argb[4] >> 4; - uint8 g1 = src_argb[5] >> 4; - uint8 r1 = src_argb[6] >> 4; - uint8 a1 = src_argb[7] >> 4; - *(uint32*)(dst_rgb) = - b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | - (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28); - dst_rgb += 4; - src_argb += 8; - } - if (width & 1) { - uint8 b0 = src_argb[0] >> 4; - uint8 g0 = src_argb[1] >> 4; - uint8 r0 = src_argb[2] >> 4; - uint8 a0 = src_argb[3] >> 4; - *(uint16*)(dst_rgb) = - b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); - } -} - -static __inline int RGBToY(uint8 r, uint8 g, uint8 b) { - return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; -} - -static __inline int RGBToU(uint8 r, uint8 g, uint8 b) { - return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; -} -static __inline int RGBToV(uint8 r, uint8 g, uint8 b) { - return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; -} - -#define MAKEROWY(NAME, R, G, B, BPP) \ -void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += BPP; \ - dst_y += 1; \ - } \ -} \ -void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \ - uint8* dst_u, uint8* dst_v, int width) { \ - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ - int x; \ - for (x = 0; x < width - 1; x += 2) { \ - uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + \ - src_rgb1[B] + src_rgb1[B + BPP]) >> 2; \ - uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + \ - src_rgb1[G] + src_rgb1[G + BPP]) >> 2; \ - uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + \ - src_rgb1[R] + src_rgb1[R + BPP]) >> 2; \ - dst_u[0] = RGBToU(ar, ag, ab); \ - dst_v[0] = RGBToV(ar, ag, ab); \ - src_rgb0 += BPP * 2; \ - src_rgb1 += BPP * 2; \ - dst_u += 1; \ - dst_v += 1; \ - } \ - if (width & 1) { \ - uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ - uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ - uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ - dst_u[0] = RGBToU(ar, ag, ab); \ - dst_v[0] = RGBToV(ar, ag, ab); \ - } \ -} - -MAKEROWY(ARGB, 2, 1, 0, 4) -MAKEROWY(BGRA, 1, 2, 3, 4) -MAKEROWY(ABGR, 0, 1, 2, 4) -MAKEROWY(RGBA, 3, 2, 1, 4) -MAKEROWY(RGB24, 2, 1, 0, 3) -MAKEROWY(RAW, 0, 1, 2, 3) -#undef MAKEROWY - -// JPeg uses a variation on BT.601-1 full range -// y = 0.29900 * r + 0.58700 * g + 0.11400 * b -// u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center -// v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center -// BT.601 Mpeg range uses: -// b 0.1016 * 255 = 25.908 = 25 -// g 0.5078 * 255 = 129.489 = 129 -// r 0.2578 * 255 = 65.739 = 66 -// JPeg 8 bit Y (not used): -// b 0.11400 * 256 = 29.184 = 29 -// g 0.58700 * 256 = 150.272 = 150 -// r 0.29900 * 256 = 76.544 = 77 -// JPeg 7 bit Y: -// b 0.11400 * 128 = 14.592 = 15 -// g 0.58700 * 128 = 75.136 = 75 -// r 0.29900 * 128 = 38.272 = 38 -// JPeg 8 bit U: -// b 0.50000 * 255 = 127.5 = 127 -// g -0.33126 * 255 = -84.4713 = -84 -// r -0.16874 * 255 = -43.0287 = -43 -// JPeg 8 bit V: -// b -0.08131 * 255 = -20.73405 = -20 -// g -0.41869 * 255 = -106.76595 = -107 -// r 0.50000 * 255 = 127.5 = 127 - -static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) { - return (38 * r + 75 * g + 15 * b + 64) >> 7; -} - -static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) { - return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; -} -static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) { - return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; -} - -#define AVGB(a, b) (((a) + (b) + 1) >> 1) - -#define MAKEROWYJ(NAME, R, G, B, BPP) \ -void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \ - int x; \ - for (x = 0; x < width; ++x) { \ - dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ - src_argb0 += BPP; \ - dst_y += 1; \ - } \ -} \ -void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \ - uint8* dst_u, uint8* dst_v, int width) { \ - const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \ - int x; \ - for (x = 0; x < width - 1; x += 2) { \ - uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ - AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ - uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ - AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ - uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ - AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ - dst_u[0] = RGBToUJ(ar, ag, ab); \ - dst_v[0] = RGBToVJ(ar, ag, ab); \ - src_rgb0 += BPP * 2; \ - src_rgb1 += BPP * 2; \ - dst_u += 1; \ - dst_v += 1; \ - } \ - if (width & 1) { \ - uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \ - uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \ - uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \ - dst_u[0] = RGBToUJ(ar, ag, ab); \ - dst_v[0] = RGBToVJ(ar, ag, ab); \ - } \ -} - -MAKEROWYJ(ARGB, 2, 1, 0, 4) -#undef MAKEROWYJ - -void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 b = src_rgb565[0] & 0x1f; - uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r = src_rgb565[1] >> 3; - b = (b << 3) | (b >> 2); - g = (g << 2) | (g >> 4); - r = (r << 3) | (r >> 2); - dst_y[0] = RGBToY(r, g, b); - src_rgb565 += 2; - dst_y += 1; - } -} - -void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 b = src_argb1555[0] & 0x1f; - uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r = (src_argb1555[1] & 0x7c) >> 2; - b = (b << 3) | (b >> 2); - g = (g << 3) | (g >> 2); - r = (r << 3) | (r >> 2); - dst_y[0] = RGBToY(r, g, b); - src_argb1555 += 2; - dst_y += 1; - } -} - -void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 b = src_argb4444[0] & 0x0f; - uint8 g = src_argb4444[0] >> 4; - uint8 r = src_argb4444[1] & 0x0f; - b = (b << 4) | b; - g = (g << 4) | g; - r = (r << 4) | r; - dst_y[0] = RGBToY(r, g, b); - src_argb4444 += 2; - dst_y += 1; - } -} - -void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565; - int x; - for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_rgb565[0] & 0x1f; - uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r0 = src_rgb565[1] >> 3; - uint8 b1 = src_rgb565[2] & 0x1f; - uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); - uint8 r1 = src_rgb565[3] >> 3; - uint8 b2 = next_rgb565[0] & 0x1f; - uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8 r2 = next_rgb565[1] >> 3; - uint8 b3 = next_rgb565[2] & 0x1f; - uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); - uint8 r3 = next_rgb565[3] >> 3; - uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787. - uint8 g = (g0 + g1 + g2 + g3); - uint8 r = (r0 + r1 + r2 + r3); - b = (b << 1) | (b >> 6); // 787 -> 888. - r = (r << 1) | (r >> 6); - dst_u[0] = RGBToU(r, g, b); - dst_v[0] = RGBToV(r, g, b); - src_rgb565 += 4; - next_rgb565 += 4; - dst_u += 1; - dst_v += 1; - } - if (width & 1) { - uint8 b0 = src_rgb565[0] & 0x1f; - uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); - uint8 r0 = src_rgb565[1] >> 3; - uint8 b2 = next_rgb565[0] & 0x1f; - uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); - uint8 r2 = next_rgb565[1] >> 3; - uint8 b = (b0 + b2); // 565 * 2 = 676. - uint8 g = (g0 + g2); - uint8 r = (r0 + r2); - b = (b << 2) | (b >> 4); // 676 -> 888 - g = (g << 1) | (g >> 6); - r = (r << 2) | (r >> 4); - dst_u[0] = RGBToU(r, g, b); - dst_v[0] = RGBToV(r, g, b); - } -} - -void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555; - int x; - for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb1555[0] & 0x1f; - uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8 b1 = src_argb1555[2] & 0x1f; - uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); - uint8 r1 = (src_argb1555[3] & 0x7c) >> 2; - uint8 b2 = next_argb1555[0] & 0x1f; - uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8 r2 = (next_argb1555[1] & 0x7c) >> 2; - uint8 b3 = next_argb1555[2] & 0x1f; - uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); - uint8 r3 = (next_argb1555[3] & 0x7c) >> 2; - uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777. - uint8 g = (g0 + g1 + g2 + g3); - uint8 r = (r0 + r1 + r2 + r3); - b = (b << 1) | (b >> 6); // 777 -> 888. - g = (g << 1) | (g >> 6); - r = (r << 1) | (r >> 6); - dst_u[0] = RGBToU(r, g, b); - dst_v[0] = RGBToV(r, g, b); - src_argb1555 += 4; - next_argb1555 += 4; - dst_u += 1; - dst_v += 1; - } - if (width & 1) { - uint8 b0 = src_argb1555[0] & 0x1f; - uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); - uint8 r0 = (src_argb1555[1] & 0x7c) >> 2; - uint8 b2 = next_argb1555[0] & 0x1f; - uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); - uint8 r2 = next_argb1555[1] >> 3; - uint8 b = (b0 + b2); // 555 * 2 = 666. - uint8 g = (g0 + g2); - uint8 r = (r0 + r2); - b = (b << 2) | (b >> 4); // 666 -> 888. - g = (g << 2) | (g >> 4); - r = (r << 2) | (r >> 4); - dst_u[0] = RGBToU(r, g, b); - dst_v[0] = RGBToV(r, g, b); - } -} - -void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int width) { - const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444; - int x; - for (x = 0; x < width - 1; x += 2) { - uint8 b0 = src_argb4444[0] & 0x0f; - uint8 g0 = src_argb4444[0] >> 4; - uint8 r0 = src_argb4444[1] & 0x0f; - uint8 b1 = src_argb4444[2] & 0x0f; - uint8 g1 = src_argb4444[2] >> 4; - uint8 r1 = src_argb4444[3] & 0x0f; - uint8 b2 = next_argb4444[0] & 0x0f; - uint8 g2 = next_argb4444[0] >> 4; - uint8 r2 = next_argb4444[1] & 0x0f; - uint8 b3 = next_argb4444[2] & 0x0f; - uint8 g3 = next_argb4444[2] >> 4; - uint8 r3 = next_argb4444[3] & 0x0f; - uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666. - uint8 g = (g0 + g1 + g2 + g3); - uint8 r = (r0 + r1 + r2 + r3); - b = (b << 2) | (b >> 4); // 666 -> 888. - g = (g << 2) | (g >> 4); - r = (r << 2) | (r >> 4); - dst_u[0] = RGBToU(r, g, b); - dst_v[0] = RGBToV(r, g, b); - src_argb4444 += 4; - next_argb4444 += 4; - dst_u += 1; - dst_v += 1; - } - if (width & 1) { - uint8 b0 = src_argb4444[0] & 0x0f; - uint8 g0 = src_argb4444[0] >> 4; - uint8 r0 = src_argb4444[1] & 0x0f; - uint8 b2 = next_argb4444[0] & 0x0f; - uint8 g2 = next_argb4444[0] >> 4; - uint8 r2 = next_argb4444[1] & 0x0f; - uint8 b = (b0 + b2); // 444 * 2 = 555. - uint8 g = (g0 + g2); - uint8 r = (r0 + r2); - b = (b << 3) | (b >> 2); // 555 -> 888. - g = (g << 3) | (g >> 2); - r = (r << 3) | (r >> 2); - dst_u[0] = RGBToU(r, g, b); - dst_v[0] = RGBToV(r, g, b); - } -} - -void ARGBToUV444Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 ab = src_argb[0]; - uint8 ag = src_argb[1]; - uint8 ar = src_argb[2]; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - src_argb += 4; - dst_u += 1; - dst_v += 1; - } -} - -void ARGBToUV422Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - uint8 ab = (src_argb[0] + src_argb[4]) >> 1; - uint8 ag = (src_argb[1] + src_argb[5]) >> 1; - uint8 ar = (src_argb[2] + src_argb[6]) >> 1; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - src_argb += 8; - dst_u += 1; - dst_v += 1; - } - if (width & 1) { - uint8 ab = src_argb[0]; - uint8 ag = src_argb[1]; - uint8 ar = src_argb[2]; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } -} - -void ARGBToUV411Row_C(const uint8* src_argb, - uint8* dst_u, uint8* dst_v, int width) { - int x; - for (x = 0; x < width - 3; x += 4) { - uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2; - uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2; - uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - src_argb += 16; - dst_u += 1; - dst_v += 1; - } - if ((width & 3) == 3) { - uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8]) / 3; - uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9]) / 3; - uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10]) / 3; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } else if ((width & 3) == 2) { - uint8 ab = (src_argb[0] + src_argb[4]) >> 1; - uint8 ag = (src_argb[1] + src_argb[5]) >> 1; - uint8 ar = (src_argb[2] + src_argb[6]) >> 1; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } else if ((width & 3) == 1) { - uint8 ab = src_argb[0]; - uint8 ag = src_argb[1]; - uint8 ar = src_argb[2]; - dst_u[0] = RGBToU(ar, ag, ab); - dst_v[0] = RGBToV(ar, ag, ab); - } -} - -void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) { - int x; - for (x = 0; x < width; ++x) { - uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]); - dst_argb[2] = dst_argb[1] = dst_argb[0] = y; - dst_argb[3] = src_argb[3]; - dst_argb += 4; - src_argb += 4; - } -} - -// Convert a row of image to Sepia tone. -void ARGBSepiaRow_C(uint8* dst_argb, int width) { - int x; - for (x = 0; x < width; ++x) { - int b = dst_argb[0]; - int g = dst_argb[1]; - int r = dst_argb[2]; - int sb = (b * 17 + g * 68 + r * 35) >> 7; - int sg = (b * 22 + g * 88 + r * 45) >> 7; - int sr = (b * 24 + g * 98 + r * 50) >> 7; - // b does not over flow. a is preserved from original. - dst_argb[0] = sb; - dst_argb[1] = clamp255(sg); - dst_argb[2] = clamp255(sr); - dst_argb += 4; - } -} - -// Apply color matrix to a row of image. Matrix is signed. -// TODO(fbarchard): Consider adding rounding (+32). -void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) { - int x; - for (x = 0; x < width; ++x) { - int b = src_argb[0]; - int g = src_argb[1]; - int r = src_argb[2]; - int a = src_argb[3]; - int sb = (b * matrix_argb[0] + g * matrix_argb[1] + - r * matrix_argb[2] + a * matrix_argb[3]) >> 6; - int sg = (b * matrix_argb[4] + g * matrix_argb[5] + - r * matrix_argb[6] + a * matrix_argb[7]) >> 6; - int sr = (b * matrix_argb[8] + g * matrix_argb[9] + - r * matrix_argb[10] + a * matrix_argb[11]) >> 6; - int sa = (b * matrix_argb[12] + g * matrix_argb[13] + - r * matrix_argb[14] + a * matrix_argb[15]) >> 6; - dst_argb[0] = Clamp(sb); - dst_argb[1] = Clamp(sg); - dst_argb[2] = Clamp(sr); - dst_argb[3] = Clamp(sa); - src_argb += 4; - dst_argb += 4; - } -} - -// Apply color table to a row of image. -void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { - int x; - for (x = 0; x < width; ++x) { - int b = dst_argb[0]; - int g = dst_argb[1]; - int r = dst_argb[2]; - int a = dst_argb[3]; - dst_argb[0] = table_argb[b * 4 + 0]; - dst_argb[1] = table_argb[g * 4 + 1]; - dst_argb[2] = table_argb[r * 4 + 2]; - dst_argb[3] = table_argb[a * 4 + 3]; - dst_argb += 4; - } -} - -// Apply color table to a row of image. -void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) { - int x; - for (x = 0; x < width; ++x) { - int b = dst_argb[0]; - int g = dst_argb[1]; - int r = dst_argb[2]; - dst_argb[0] = table_argb[b * 4 + 0]; - dst_argb[1] = table_argb[g * 4 + 1]; - dst_argb[2] = table_argb[r * 4 + 2]; - dst_argb += 4; - } -} - -void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width) { - int x; - for (x = 0; x < width; ++x) { - int b = dst_argb[0]; - int g = dst_argb[1]; - int r = dst_argb[2]; - dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; - dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset; - dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset; - dst_argb += 4; - } -} - -#define REPEAT8(v) (v) | ((v) << 8) -#define SHADE(f, v) v * f >> 24 - -void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value) { - const uint32 b_scale = REPEAT8(value & 0xff); - const uint32 g_scale = REPEAT8((value >> 8) & 0xff); - const uint32 r_scale = REPEAT8((value >> 16) & 0xff); - const uint32 a_scale = REPEAT8(value >> 24); - - int i; - for (i = 0; i < width; ++i) { - const uint32 b = REPEAT8(src_argb[0]); - const uint32 g = REPEAT8(src_argb[1]); - const uint32 r = REPEAT8(src_argb[2]); - const uint32 a = REPEAT8(src_argb[3]); - dst_argb[0] = SHADE(b, b_scale); - dst_argb[1] = SHADE(g, g_scale); - dst_argb[2] = SHADE(r, r_scale); - dst_argb[3] = SHADE(a, a_scale); - src_argb += 4; - dst_argb += 4; - } -} -#undef REPEAT8 -#undef SHADE - -#define REPEAT8(v) (v) | ((v) << 8) -#define SHADE(f, v) v * f >> 16 - -void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - int i; - for (i = 0; i < width; ++i) { - const uint32 b = REPEAT8(src_argb0[0]); - const uint32 g = REPEAT8(src_argb0[1]); - const uint32 r = REPEAT8(src_argb0[2]); - const uint32 a = REPEAT8(src_argb0[3]); - const uint32 b_scale = src_argb1[0]; - const uint32 g_scale = src_argb1[1]; - const uint32 r_scale = src_argb1[2]; - const uint32 a_scale = src_argb1[3]; - dst_argb[0] = SHADE(b, b_scale); - dst_argb[1] = SHADE(g, g_scale); - dst_argb[2] = SHADE(r, r_scale); - dst_argb[3] = SHADE(a, a_scale); - src_argb0 += 4; - src_argb1 += 4; - dst_argb += 4; - } -} -#undef REPEAT8 -#undef SHADE - -#define SHADE(f, v) clamp255(v + f) - -void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - int i; - for (i = 0; i < width; ++i) { - const int b = src_argb0[0]; - const int g = src_argb0[1]; - const int r = src_argb0[2]; - const int a = src_argb0[3]; - const int b_add = src_argb1[0]; - const int g_add = src_argb1[1]; - const int r_add = src_argb1[2]; - const int a_add = src_argb1[3]; - dst_argb[0] = SHADE(b, b_add); - dst_argb[1] = SHADE(g, g_add); - dst_argb[2] = SHADE(r, r_add); - dst_argb[3] = SHADE(a, a_add); - src_argb0 += 4; - src_argb1 += 4; - dst_argb += 4; - } -} -#undef SHADE - -#define SHADE(f, v) clamp0(f - v) - -void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - int i; - for (i = 0; i < width; ++i) { - const int b = src_argb0[0]; - const int g = src_argb0[1]; - const int r = src_argb0[2]; - const int a = src_argb0[3]; - const int b_sub = src_argb1[0]; - const int g_sub = src_argb1[1]; - const int r_sub = src_argb1[2]; - const int a_sub = src_argb1[3]; - dst_argb[0] = SHADE(b, b_sub); - dst_argb[1] = SHADE(g, g_sub); - dst_argb[2] = SHADE(r, r_sub); - dst_argb[3] = SHADE(a, a_sub); - src_argb0 += 4; - src_argb1 += 4; - dst_argb += 4; - } -} -#undef SHADE - -// Sobel functions which mimics SSSE3. -void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2, - uint8* dst_sobelx, int width) { - int i; - for (i = 0; i < width; ++i) { - int a = src_y0[i]; - int b = src_y1[i]; - int c = src_y2[i]; - int a_sub = src_y0[i + 2]; - int b_sub = src_y1[i + 2]; - int c_sub = src_y2[i + 2]; - int a_diff = a - a_sub; - int b_diff = b - b_sub; - int c_diff = c - c_sub; - int sobel = Abs(a_diff + b_diff * 2 + c_diff); - dst_sobelx[i] = (uint8)(clamp255(sobel)); - } -} - -void SobelYRow_C(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) { - int i; - for (i = 0; i < width; ++i) { - int a = src_y0[i + 0]; - int b = src_y0[i + 1]; - int c = src_y0[i + 2]; - int a_sub = src_y1[i + 0]; - int b_sub = src_y1[i + 1]; - int c_sub = src_y1[i + 2]; - int a_diff = a - a_sub; - int b_diff = b - b_sub; - int c_diff = c - c_sub; - int sobel = Abs(a_diff + b_diff * 2 + c_diff); - dst_sobely[i] = (uint8)(clamp255(sobel)); - } -} - -void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - int i; - for (i = 0; i < width; ++i) { - int r = src_sobelx[i]; - int b = src_sobely[i]; - int s = clamp255(r + b); - dst_argb[0] = (uint8)(s); - dst_argb[1] = (uint8)(s); - dst_argb[2] = (uint8)(s); - dst_argb[3] = (uint8)(255u); - dst_argb += 4; - } -} - -void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width) { - int i; - for (i = 0; i < width; ++i) { - int r = src_sobelx[i]; - int b = src_sobely[i]; - int s = clamp255(r + b); - dst_y[i] = (uint8)(s); - } -} - -void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - int i; - for (i = 0; i < width; ++i) { - int r = src_sobelx[i]; - int b = src_sobely[i]; - int g = clamp255(r + b); - dst_argb[0] = (uint8)(b); - dst_argb[1] = (uint8)(g); - dst_argb[2] = (uint8)(r); - dst_argb[3] = (uint8)(255u); - dst_argb += 4; - } -} - -void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) { - // Copy a Y to RGB. - int x; - for (x = 0; x < width; ++x) { - uint8 y = src_y[0]; - dst_argb[2] = dst_argb[1] = dst_argb[0] = y; - dst_argb[3] = 255u; - dst_argb += 4; - ++src_y; - } -} - -// C reference code that mimics the YUV assembly. - -#define YG 74 /* (int8)(1.164 * 64 + 0.5) */ - -#define UB 127 /* min(63,(int8)(2.018 * 64)) */ -#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */ -#define UR 0 - -#define VB 0 -#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */ -#define VR 102 /* (int8)(1.596 * 64 + 0.5) */ - -// Bias -#define BB UB * 128 + VB * 128 -#define BG UG * 128 + VG * 128 -#define BR UR * 128 + VR * 128 - -static __inline void YuvPixel(uint8 y, uint8 u, uint8 v, - uint8* b, uint8* g, uint8* r) { - int32 y1 = ((int32)(y) - 16) * YG; - *b = Clamp((int32)((u * UB + v * VB) - (BB) + y1) >> 6); - *g = Clamp((int32)((u * UG + v * VG) - (BG) + y1) >> 6); - *r = Clamp((int32)((u * UR + v * VR) - (BR) + y1) >> 6); -} - -#if !defined(LIBYUV_DISABLE_NEON) && \ - (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) -// C mimic assembly. -// TODO(fbarchard): Remove subsampling from Neon. -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - uint8 u = (src_u[0] + src_u[1] + 1) >> 1; - uint8 v = (src_v[0] + src_v[1] + 1) >> 1; - YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - src_y += 2; - src_u += 2; - src_v += 2; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - } -} -#else -void I444ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width; ++x) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - src_y += 1; - src_u += 1; - src_v += 1; - rgb_buf += 4; // Advance 1 pixel. - } -} -#endif -// Also used for 420 -void I422ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - } -} - -void I422ToRGB24Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 3, rgb_buf + 4, rgb_buf + 5); - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 6; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - } -} - -void I422ToRAWRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 5, rgb_buf + 4, rgb_buf + 3); - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 6; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); - } -} - -void I422ToARGB4444Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); - YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); - b0 = b0 >> 4; - g0 = g0 >> 4; - r0 = r0 >> 4; - b1 = b1 >> 4; - g1 = g1 >> 4; - r1 = r1 >> 4; - *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | - (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000; - src_y += 2; - src_u += 1; - src_v += 1; - dst_argb4444 += 4; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); - b0 = b0 >> 4; - g0 = g0 >> 4; - r0 = r0 >> 4; - *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | - 0xf000; - } -} - -void I422ToARGB1555Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, - int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); - YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); - b0 = b0 >> 3; - g0 = g0 >> 3; - r0 = r0 >> 3; - b1 = b1 >> 3; - g1 = g1 >> 3; - r1 = r1 >> 3; - *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | - (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000; - src_y += 2; - src_u += 1; - src_v += 1; - dst_argb1555 += 4; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); - b0 = b0 >> 3; - g0 = g0 >> 3; - r0 = r0 >> 3; - *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | - 0x8000; - } -} - -void I422ToRGB565Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, - int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); - YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1); - b0 = b0 >> 3; - g0 = g0 >> 2; - r0 = r0 >> 3; - b1 = b1 >> 3; - g1 = g1 >> 2; - r1 = r1 >> 3; - *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | - (b1 << 16) | (g1 << 21) | (r1 << 27); - src_y += 2; - src_u += 1; - src_v += 1; - dst_rgb565 += 4; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0); - b0 = b0 >> 3; - g0 = g0 >> 2; - r0 = r0 >> 3; - *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); - } -} - -void I411ToARGBRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 3; x += 4) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - YuvPixel(src_y[2], src_u[0], src_v[0], - rgb_buf + 8, rgb_buf + 9, rgb_buf + 10); - rgb_buf[11] = 255; - YuvPixel(src_y[3], src_u[0], src_v[0], - rgb_buf + 12, rgb_buf + 13, rgb_buf + 14); - rgb_buf[15] = 255; - src_y += 4; - src_u += 1; - src_v += 1; - rgb_buf += 16; // Advance 4 pixels. - } - if (width & 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - src_y += 2; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - } -} - -void NV12ToARGBRow_C(const uint8* src_y, - const uint8* usrc_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], usrc_v[0], usrc_v[1], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - YuvPixel(src_y[1], usrc_v[0], usrc_v[1], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - src_y += 2; - usrc_v += 2; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], usrc_v[0], usrc_v[1], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - } -} - -void NV21ToARGBRow_C(const uint8* src_y, - const uint8* src_vu, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_vu[1], src_vu[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - - YuvPixel(src_y[1], src_vu[1], src_vu[0], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - - src_y += 2; - src_vu += 2; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_vu[1], src_vu[0], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - } -} - -void NV12ToRGB565Row_C(const uint8* src_y, - const uint8* usrc_v, - uint8* dst_rgb565, - int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0); - YuvPixel(src_y[1], usrc_v[0], usrc_v[1], &b1, &g1, &r1); - b0 = b0 >> 3; - g0 = g0 >> 2; - r0 = r0 >> 3; - b1 = b1 >> 3; - g1 = g1 >> 2; - r1 = r1 >> 3; - *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | - (b1 << 16) | (g1 << 21) | (r1 << 27); - src_y += 2; - usrc_v += 2; - dst_rgb565 += 4; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], usrc_v[0], usrc_v[1], &b0, &g0, &r0); - b0 = b0 >> 3; - g0 = g0 >> 2; - r0 = r0 >> 3; - *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); - } -} - -void NV21ToRGB565Row_C(const uint8* src_y, - const uint8* vsrc_u, - uint8* dst_rgb565, - int width) { - uint8 b0; - uint8 g0; - uint8 r0; - uint8 b1; - uint8 g1; - uint8 r1; - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0); - YuvPixel(src_y[1], vsrc_u[1], vsrc_u[0], &b1, &g1, &r1); - b0 = b0 >> 3; - g0 = g0 >> 2; - r0 = r0 >> 3; - b1 = b1 >> 3; - g1 = g1 >> 2; - r1 = r1 >> 3; - *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | - (b1 << 16) | (g1 << 21) | (r1 << 27); - src_y += 2; - vsrc_u += 2; - dst_rgb565 += 4; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], vsrc_u[1], vsrc_u[0], &b0, &g0, &r0); - b0 = b0 >> 3; - g0 = g0 >> 2; - r0 = r0 >> 3; - *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); - } -} - -void YUY2ToARGBRow_C(const uint8* src_yuy2, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - src_yuy2 += 4; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - } -} - -void UYVYToARGBRow_C(const uint8* src_uyvy, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - src_uyvy += 4; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - } -} - -void I422ToBGRARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 3, rgb_buf + 2, rgb_buf + 1); - rgb_buf[0] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 7, rgb_buf + 6, rgb_buf + 5); - rgb_buf[4] = 255; - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 3, rgb_buf + 2, rgb_buf + 1); - rgb_buf[0] = 255; - } -} - -void I422ToABGRRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); - rgb_buf[3] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 6, rgb_buf + 5, rgb_buf + 4); - rgb_buf[7] = 255; - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 2, rgb_buf + 1, rgb_buf + 0); - rgb_buf[3] = 255; - } -} - -void I422ToRGBARow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 1, rgb_buf + 2, rgb_buf + 3); - rgb_buf[0] = 255; - YuvPixel(src_y[1], src_u[0], src_v[0], - rgb_buf + 5, rgb_buf + 6, rgb_buf + 7); - rgb_buf[4] = 255; - src_y += 2; - src_u += 1; - src_v += 1; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], src_u[0], src_v[0], - rgb_buf + 1, rgb_buf + 2, rgb_buf + 3); - rgb_buf[0] = 255; - } -} - -void YToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - YuvPixel(src_y[0], 128, 128, - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - YuvPixel(src_y[1], 128, 128, - rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); - rgb_buf[7] = 255; - src_y += 2; - rgb_buf += 8; // Advance 2 pixels. - } - if (width & 1) { - YuvPixel(src_y[0], 128, 128, - rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); - rgb_buf[3] = 255; - } -} - -void MirrorRow_C(const uint8* src, uint8* dst, int width) { - int x; - src += width - 1; - for (x = 0; x < width - 1; x += 2) { - dst[x] = src[0]; - dst[x + 1] = src[-1]; - src -= 2; - } - if (width & 1) { - dst[width - 1] = src[0]; - } -} - -void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { - int x; - src_uv += (width - 1) << 1; - for (x = 0; x < width - 1; x += 2) { - dst_u[x] = src_uv[0]; - dst_u[x + 1] = src_uv[-2]; - dst_v[x] = src_uv[1]; - dst_v[x + 1] = src_uv[-2 + 1]; - src_uv -= 4; - } - if (width & 1) { - dst_u[width - 1] = src_uv[0]; - dst_v[width - 1] = src_uv[1]; - } -} - -void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) { - int x; - const uint32* src32 = (const uint32*)(src); - uint32* dst32 = (uint32*)(dst); - src32 += width - 1; - for (x = 0; x < width - 1; x += 2) { - dst32[x] = src32[0]; - dst32[x + 1] = src32[-1]; - src32 -= 2; - } - if (width & 1) { - dst32[width - 1] = src32[0]; - } -} - -void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - dst_u[x] = src_uv[0]; - dst_u[x + 1] = src_uv[2]; - dst_v[x] = src_uv[1]; - dst_v[x + 1] = src_uv[3]; - src_uv += 4; - } - if (width & 1) { - dst_u[width - 1] = src_uv[0]; - dst_v[width - 1] = src_uv[1]; - } -} - -void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - dst_uv[0] = src_u[x]; - dst_uv[1] = src_v[x]; - dst_uv[2] = src_u[x + 1]; - dst_uv[3] = src_v[x + 1]; - dst_uv += 4; - } - if (width & 1) { - dst_uv[0] = src_u[width - 1]; - dst_uv[1] = src_v[width - 1]; - } -} - -void CopyRow_C(const uint8* src, uint8* dst, int count) { - memcpy(dst, src, count); -} - -void SetRow_C(uint8* dst, uint32 v8, int count) { -#ifdef _MSC_VER - // VC will generate rep stosb. - int x; - for (x = 0; x < count; ++x) { - dst[x] = v8; - } -#else - memset(dst, v8, count); -#endif -} - -void ARGBSetRows_C(uint8* dst, uint32 v32, int width, - int dst_stride, int height) { - int y; - for (y = 0; y < height; ++y) { - uint32* d = (uint32*)(dst); - int x; - for (x = 0; x < width; ++x) { - d[x] = v32; - } - dst += dst_stride; - } -} - -// Filter 2 rows of YUY2 UV's (422) into U and V (420). -void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2, - uint8* dst_u, uint8* dst_v, int width) { - // Output a row of UV values, filtering 2 rows of YUY2. - int x; - for (x = 0; x < width; x += 2) { - dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1; - dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1; - src_yuy2 += 4; - dst_u += 1; - dst_v += 1; - } -} - -// Copy row of YUY2 UV's (422) into U and V (422). -void YUY2ToUV422Row_C(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int width) { - // Output a row of UV values. - int x; - for (x = 0; x < width; x += 2) { - dst_u[0] = src_yuy2[1]; - dst_v[0] = src_yuy2[3]; - src_yuy2 += 4; - dst_u += 1; - dst_v += 1; - } -} - -// Copy row of YUY2 Y's (422) into Y (420/422). -void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) { - // Output a row of Y values. - int x; - for (x = 0; x < width - 1; x += 2) { - dst_y[x] = src_yuy2[0]; - dst_y[x + 1] = src_yuy2[2]; - src_yuy2 += 4; - } - if (width & 1) { - dst_y[width - 1] = src_yuy2[0]; - } -} - -// Filter 2 rows of UYVY UV's (422) into U and V (420). -void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy, - uint8* dst_u, uint8* dst_v, int width) { - // Output a row of UV values. - int x; - for (x = 0; x < width; x += 2) { - dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1; - dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1; - src_uyvy += 4; - dst_u += 1; - dst_v += 1; - } -} - -// Copy row of UYVY UV's (422) into U and V (422). -void UYVYToUV422Row_C(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int width) { - // Output a row of UV values. - int x; - for (x = 0; x < width; x += 2) { - dst_u[0] = src_uyvy[0]; - dst_v[0] = src_uyvy[2]; - src_uyvy += 4; - dst_u += 1; - dst_v += 1; - } -} - -// Copy row of UYVY Y's (422) into Y (420/422). -void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) { - // Output a row of Y values. - int x; - for (x = 0; x < width - 1; x += 2) { - dst_y[x] = src_uyvy[1]; - dst_y[x + 1] = src_uyvy[3]; - src_uyvy += 4; - } - if (width & 1) { - dst_y[width - 1] = src_uyvy[1]; - } -} - -#define BLEND(f, b, a) (((256 - a) * b) >> 8) + f - -// Blend src_argb0 over src_argb1 and store to dst_argb. -// dst_argb may be src_argb0 or src_argb1. -// This code mimics the SSSE3 version for better testability. -void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - uint32 fb = src_argb0[0]; - uint32 fg = src_argb0[1]; - uint32 fr = src_argb0[2]; - uint32 a = src_argb0[3]; - uint32 bb = src_argb1[0]; - uint32 bg = src_argb1[1]; - uint32 br = src_argb1[2]; - dst_argb[0] = BLEND(fb, bb, a); - dst_argb[1] = BLEND(fg, bg, a); - dst_argb[2] = BLEND(fr, br, a); - dst_argb[3] = 255u; - - fb = src_argb0[4 + 0]; - fg = src_argb0[4 + 1]; - fr = src_argb0[4 + 2]; - a = src_argb0[4 + 3]; - bb = src_argb1[4 + 0]; - bg = src_argb1[4 + 1]; - br = src_argb1[4 + 2]; - dst_argb[4 + 0] = BLEND(fb, bb, a); - dst_argb[4 + 1] = BLEND(fg, bg, a); - dst_argb[4 + 2] = BLEND(fr, br, a); - dst_argb[4 + 3] = 255u; - src_argb0 += 8; - src_argb1 += 8; - dst_argb += 8; - } - - if (width & 1) { - uint32 fb = src_argb0[0]; - uint32 fg = src_argb0[1]; - uint32 fr = src_argb0[2]; - uint32 a = src_argb0[3]; - uint32 bb = src_argb1[0]; - uint32 bg = src_argb1[1]; - uint32 br = src_argb1[2]; - dst_argb[0] = BLEND(fb, bb, a); - dst_argb[1] = BLEND(fg, bg, a); - dst_argb[2] = BLEND(fr, br, a); - dst_argb[3] = 255u; - } -} -#undef BLEND -#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24 - -// Multiply source RGB by alpha and store to destination. -// This code mimics the SSSE3 version for better testability. -void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { - int i; - for (i = 0; i < width - 1; i += 2) { - uint32 b = src_argb[0]; - uint32 g = src_argb[1]; - uint32 r = src_argb[2]; - uint32 a = src_argb[3]; - dst_argb[0] = ATTENUATE(b, a); - dst_argb[1] = ATTENUATE(g, a); - dst_argb[2] = ATTENUATE(r, a); - dst_argb[3] = a; - b = src_argb[4]; - g = src_argb[5]; - r = src_argb[6]; - a = src_argb[7]; - dst_argb[4] = ATTENUATE(b, a); - dst_argb[5] = ATTENUATE(g, a); - dst_argb[6] = ATTENUATE(r, a); - dst_argb[7] = a; - src_argb += 8; - dst_argb += 8; - } - - if (width & 1) { - const uint32 b = src_argb[0]; - const uint32 g = src_argb[1]; - const uint32 r = src_argb[2]; - const uint32 a = src_argb[3]; - dst_argb[0] = ATTENUATE(b, a); - dst_argb[1] = ATTENUATE(g, a); - dst_argb[2] = ATTENUATE(r, a); - dst_argb[3] = a; - } -} -#undef ATTENUATE - -// Divide source RGB by alpha and store to destination. -// b = (b * 255 + (a / 2)) / a; -// g = (g * 255 + (a / 2)) / a; -// r = (r * 255 + (a / 2)) / a; -// Reciprocal method is off by 1 on some values. ie 125 -// 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower. -#define T(a) 0x01000000 + (0x10000 / a) -const uint32 fixed_invtbl8[256] = { - 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07), - T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f), - T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17), - T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f), - T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), - T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), - T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37), - T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f), - T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47), - T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f), - T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57), - T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), - T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), - T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f), - T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77), - T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f), - T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87), - T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f), - T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), - T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), - T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7), - T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf), - T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7), - T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf), - T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7), - T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), - T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), - T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf), - T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7), - T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef), - T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7), - T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 }; -#undef T - -void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) { - int i; - for (i = 0; i < width; ++i) { - uint32 b = src_argb[0]; - uint32 g = src_argb[1]; - uint32 r = src_argb[2]; - const uint32 a = src_argb[3]; - const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point - b = (b * ia) >> 8; - g = (g * ia) >> 8; - r = (r * ia) >> 8; - // Clamping should not be necessary but is free in assembly. - dst_argb[0] = clamp255(b); - dst_argb[1] = clamp255(g); - dst_argb[2] = clamp255(r); - dst_argb[3] = a; - src_argb += 4; - dst_argb += 4; - } -} - -void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width) { - int32 row_sum[4] = {0, 0, 0, 0}; - int x; - for (x = 0; x < width; ++x) { - row_sum[0] += row[x * 4 + 0]; - row_sum[1] += row[x * 4 + 1]; - row_sum[2] += row[x * 4 + 2]; - row_sum[3] += row[x * 4 + 3]; - cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0]; - cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1]; - cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2]; - cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3]; - } -} - -void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl, - int w, int area, uint8* dst, int count) { - float ooa = 1.0f / area; - int i; - for (i = 0; i < count; ++i) { - dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); - dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); - dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); - dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); - dst += 4; - tl += 4; - bl += 4; - } -} - -// Copy pixels from rotated source to destination row with a slope. -LIBYUV_API -void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width) { - int i; - // Render a row of pixels from source into a buffer. - float uv[2]; - uv[0] = uv_dudv[0]; - uv[1] = uv_dudv[1]; - for (i = 0; i < width; ++i) { - int x = (int)(uv[0]); - int y = (int)(uv[1]); - *(uint32*)(dst_argb) = - *(const uint32*)(src_argb + y * src_argb_stride + - x * 4); - dst_argb += 4; - uv[0] += uv_dudv[2]; - uv[1] += uv_dudv[3]; - } -} - -// Blend 2 rows into 1 for conversions such as I422ToI420. -void HalfRow_C(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - int x; - for (x = 0; x < pix; ++x) { - dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; - } -} - -// C version 2x2 -> 2x1. -void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, - int width, int source_y_fraction) { - int y1_fraction = source_y_fraction; - int y0_fraction = 256 - y1_fraction; - const uint8* src_ptr1 = src_ptr + src_stride; - int x; - if (source_y_fraction == 0) { - memcpy(dst_ptr, src_ptr, width); - return; - } - if (source_y_fraction == 128) { - HalfRow_C(src_ptr, (int)(src_stride), dst_ptr, width); - return; - } - for (x = 0; x < width - 1; x += 2) { - dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; - dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; - src_ptr += 2; - src_ptr1 += 2; - dst_ptr += 2; - } - if (width & 1) { - dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; - } -} - -// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG -void ARGBToBayerRow_C(const uint8* src_argb, - uint8* dst_bayer, uint32 selector, int pix) { - int index0 = selector & 0xff; - int index1 = (selector >> 8) & 0xff; - // Copy a row of Bayer. - int x; - for (x = 0; x < pix - 1; x += 2) { - dst_bayer[0] = src_argb[index0]; - dst_bayer[1] = src_argb[index1]; - src_argb += 8; - dst_bayer += 2; - } - if (pix & 1) { - dst_bayer[0] = src_argb[index0]; - } -} - -// Select G channel from ARGB. e.g. GGGGGGGG -void ARGBToBayerGGRow_C(const uint8* src_argb, - uint8* dst_bayer, uint32 selector, int pix) { - // Copy a row of G. - int x; - for (x = 0; x < pix - 1; x += 2) { - dst_bayer[0] = src_argb[1]; - dst_bayer[1] = src_argb[5]; - src_argb += 8; - dst_bayer += 2; - } - if (pix & 1) { - dst_bayer[0] = src_argb[1]; - } -} - -// Use first 4 shuffler values to reorder ARGB channels. -void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix) { - int index0 = shuffler[0]; - int index1 = shuffler[1]; - int index2 = shuffler[2]; - int index3 = shuffler[3]; - // Shuffle a row of ARGB. - int x; - for (x = 0; x < pix; ++x) { - // To support in-place conversion. - uint8 b = src_argb[index0]; - uint8 g = src_argb[index1]; - uint8 r = src_argb[index2]; - uint8 a = src_argb[index3]; - dst_argb[0] = b; - dst_argb[1] = g; - dst_argb[2] = r; - dst_argb[3] = a; - src_argb += 4; - dst_argb += 4; - } -} - -void I422ToYUY2Row_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - dst_frame[0] = src_y[0]; - dst_frame[1] = src_u[0]; - dst_frame[2] = src_y[1]; - dst_frame[3] = src_v[0]; - dst_frame += 4; - src_y += 2; - src_u += 1; - src_v += 1; - } - if (width & 1) { - dst_frame[0] = src_y[0]; - dst_frame[1] = src_u[0]; - dst_frame[2] = src_y[0]; // duplicate last y - dst_frame[3] = src_v[0]; - } -} - -void I422ToUYVYRow_C(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { - int x; - for (x = 0; x < width - 1; x += 2) { - dst_frame[0] = src_u[0]; - dst_frame[1] = src_y[0]; - dst_frame[2] = src_v[0]; - dst_frame[3] = src_y[1]; - dst_frame += 4; - src_y += 2; - src_u += 1; - src_v += 1; - } - if (width & 1) { - dst_frame[0] = src_u[0]; - dst_frame[1] = src_y[0]; - dst_frame[2] = src_v[0]; - dst_frame[3] = src_y[0]; // duplicate last y - } -} - -#if !defined(LIBYUV_DISABLE_X86) && defined(HAS_I422TOARGBROW_SSSE3) -// row_win.cc has asm version, but GCC uses 2 step wrapper. -#if !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__)) -void I422ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - // Allocate a row of ARGB. - align_buffer_64(row, width * 4); - I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width); - ARGBToRGB565Row_SSE2(row, rgb_buf, width); - free_aligned_buffer_64(row); -} -#endif // !defined(_MSC_VER) && (defined(__x86_64__) || defined(__i386__)) - -#if defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) -void I422ToARGB1555Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - // Allocate a row of ARGB. - align_buffer_64(row, width * 4); - I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width); - ARGBToARGB1555Row_SSE2(row, rgb_buf, width); - free_aligned_buffer_64(row); -} - -void I422ToARGB4444Row_SSSE3(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* rgb_buf, - int width) { - // Allocate a row of ARGB. - align_buffer_64(row, width * 4); - I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, width); - ARGBToARGB4444Row_SSE2(row, rgb_buf, width); - free_aligned_buffer_64(row); -} - -void NV12ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, - int width) { - // Allocate a row of ARGB. - align_buffer_64(row, width * 4); - NV12ToARGBRow_SSSE3(src_y, src_uv, row, width); - ARGBToRGB565Row_SSE2(row, dst_rgb565, width); - free_aligned_buffer_64(row); -} - -void NV21ToRGB565Row_SSSE3(const uint8* src_y, - const uint8* src_vu, - uint8* dst_rgb565, - int width) { - // Allocate a row of ARGB. - align_buffer_64(row, width * 4); - NV21ToARGBRow_SSSE3(src_y, src_vu, row, width); - ARGBToRGB565Row_SSE2(row, dst_rgb565, width); - free_aligned_buffer_64(row); -} - -void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, - int width) { - // Allocate a rows of yuv. - align_buffer_64(row_y, ((width + 63) & ~63) * 2); - uint8* row_u = row_y + ((width + 63) & ~63); - uint8* row_v = row_u + ((width + 63) & ~63) / 2; - YUY2ToUV422Row_SSE2(src_yuy2, row_u, row_v, width); - YUY2ToYRow_SSE2(src_yuy2, row_y, width); - I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width); - free_aligned_buffer_64(row_y); -} - -void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2, - uint8* dst_argb, - int width) { - // Allocate a rows of yuv. - align_buffer_64(row_y, ((width + 63) & ~63) * 2); - uint8* row_u = row_y + ((width + 63) & ~63); - uint8* row_v = row_u + ((width + 63) & ~63) / 2; - YUY2ToUV422Row_Unaligned_SSE2(src_yuy2, row_u, row_v, width); - YUY2ToYRow_Unaligned_SSE2(src_yuy2, row_y, width); - I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width); - free_aligned_buffer_64(row_y); -} - -void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, - int width) { - // Allocate a rows of yuv. - align_buffer_64(row_y, ((width + 63) & ~63) * 2); - uint8* row_u = row_y + ((width + 63) & ~63); - uint8* row_v = row_u + ((width + 63) & ~63) / 2; - UYVYToUV422Row_SSE2(src_uyvy, row_u, row_v, width); - UYVYToYRow_SSE2(src_uyvy, row_y, width); - I422ToARGBRow_SSSE3(row_y, row_u, row_v, dst_argb, width); - free_aligned_buffer_64(row_y); -} - -void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy, - uint8* dst_argb, - int width) { - // Allocate a rows of yuv. - align_buffer_64(row_y, ((width + 63) & ~63) * 2); - uint8* row_u = row_y + ((width + 63) & ~63); - uint8* row_v = row_u + ((width + 63) & ~63) / 2; - UYVYToUV422Row_Unaligned_SSE2(src_uyvy, row_u, row_v, width); - UYVYToYRow_Unaligned_SSE2(src_uyvy, row_y, width); - I422ToARGBRow_Unaligned_SSSE3(row_y, row_u, row_v, dst_argb, width); - free_aligned_buffer_64(row_y); -} - -#endif // defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) -#endif // !defined(LIBYUV_DISABLE_X86) - -void ARGBPolynomialRow_C(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width) { - int i; - for (i = 0; i < width; ++i) { - float b = (float)(src_argb[0]); - float g = (float)(src_argb[1]); - float r = (float)(src_argb[2]); - float a = (float)(src_argb[3]); - float b2 = b * b; - float g2 = g * g; - float r2 = r * r; - float a2 = a * a; - float db = poly[0] + poly[4] * b; - float dg = poly[1] + poly[5] * g; - float dr = poly[2] + poly[6] * r; - float da = poly[3] + poly[7] * a; - float b3 = b2 * b; - float g3 = g2 * g; - float r3 = r2 * r; - float a3 = a2 * a; - db += poly[8] * b2; - dg += poly[9] * g2; - dr += poly[10] * r2; - da += poly[11] * a2; - db += poly[12] * b3; - dg += poly[13] * g3; - dr += poly[14] * r3; - da += poly[15] * a3; - - dst_argb[0] = Clamp((int32)(db)); - dst_argb[1] = Clamp((int32)(dg)); - dst_argb[2] = Clamp((int32)(dr)); - dst_argb[3] = Clamp((int32)(da)); - src_argb += 4; - dst_argb += 4; - } -} - -void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width, - const uint8* luma, uint32 lumacoeff) { - uint32 bc = lumacoeff & 0xff; - uint32 gc = (lumacoeff >> 8) & 0xff; - uint32 rc = (lumacoeff >> 16) & 0xff; - - int i; - for (i = 0; i < width - 1; i += 2) { - // Luminance in rows, color values in columns. - const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + - src_argb[2] * rc) & 0x7F00u) + luma; - const uint8* luma1; - dst_argb[0] = luma0[src_argb[0]]; - dst_argb[1] = luma0[src_argb[1]]; - dst_argb[2] = luma0[src_argb[2]]; - dst_argb[3] = src_argb[3]; - luma1 = ((src_argb[4] * bc + src_argb[5] * gc + - src_argb[6] * rc) & 0x7F00u) + luma; - dst_argb[4] = luma1[src_argb[4]]; - dst_argb[5] = luma1[src_argb[5]]; - dst_argb[6] = luma1[src_argb[6]]; - dst_argb[7] = src_argb[7]; - src_argb += 8; - dst_argb += 8; - } - if (width & 1) { - // Luminance in rows, color values in columns. - const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + - src_argb[2] * rc) & 0x7F00u) + luma; - dst_argb[0] = luma0[src_argb[0]]; - dst_argb[1] = luma0[src_argb[1]]; - dst_argb[2] = luma0[src_argb[2]]; - dst_argb[3] = src_argb[3]; - } -} - -void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) { - int i; - for (i = 0; i < width - 1; i += 2) { - dst[3] = src[3]; - dst[7] = src[7]; - dst += 8; - src += 8; - } - if (width & 1) { - dst[3] = src[3]; - } -} - -void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) { - int i; - for (i = 0; i < width - 1; i += 2) { - dst[3] = src[0]; - dst[7] = src[1]; - dst += 8; - src += 2; - } - if (width & 1) { - dst[3] = src[0]; - } -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_mips.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_mips.cc deleted file mode 100755 index 4435c55c5c..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/row_mips.cc +++ /dev/null @@ -1,991 +0,0 @@ -/* - * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// The following are available on Mips platforms: -#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) - -#ifdef HAS_COPYROW_MIPS -void CopyRow_MIPS(const uint8* src, uint8* dst, int count) { - __asm__ __volatile__ ( - ".set noreorder \n" - ".set noat \n" - "slti $at, %[count], 8 \n" - "bne $at ,$zero, $last8 \n" - "xor $t8, %[src], %[dst] \n" - "andi $t8, $t8, 0x3 \n" - - "bne $t8, $zero, unaligned \n" - "negu $a3, %[dst] \n" - // make dst/src aligned - "andi $a3, $a3, 0x3 \n" - "beq $a3, $zero, $chk16w \n" - // word-aligned now count is the remining bytes count - "subu %[count], %[count], $a3 \n" - - "lwr $t8, 0(%[src]) \n" - "addu %[src], %[src], $a3 \n" - "swr $t8, 0(%[dst]) \n" - "addu %[dst], %[dst], $a3 \n" - - // Now the dst/src are mutually word-aligned with word-aligned addresses - "$chk16w: \n" - "andi $t8, %[count], 0x3f \n" // whole 64-B chunks? - // t8 is the byte count after 64-byte chunks - "beq %[count], $t8, chk8w \n" - // There will be at most 1 32-byte chunk after it - "subu $a3, %[count], $t8 \n" // the reminder - // Here a3 counts bytes in 16w chunks - "addu $a3, %[dst], $a3 \n" - // Now a3 is the final dst after 64-byte chunks - "addu $t0, %[dst], %[count] \n" - // t0 is the "past the end" address - - // When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past - // the "t0-32" address - // This means: for x=128 the last "safe" a1 address is "t0-160" - // Alternatively, for x=64 the last "safe" a1 address is "t0-96" - // we will use "pref 30,128(a1)", so "t0-160" is the limit - "subu $t9, $t0, 160 \n" - // t9 is the "last safe pref 30,128(a1)" address - "pref 0, 0(%[src]) \n" // first line of src - "pref 0, 32(%[src]) \n" // second line of src - "pref 0, 64(%[src]) \n" - "pref 30, 32(%[dst]) \n" - // In case the a1 > t9 don't use "pref 30" at all - "sgtu $v1, %[dst], $t9 \n" - "bgtz $v1, $loop16w \n" - "nop \n" - // otherwise, start with using pref30 - "pref 30, 64(%[dst]) \n" - "$loop16w: \n" - "pref 0, 96(%[src]) \n" - "lw $t0, 0(%[src]) \n" - "bgtz $v1, $skip_pref30_96 \n" // skip - "lw $t1, 4(%[src]) \n" - "pref 30, 96(%[dst]) \n" // continue - "$skip_pref30_96: \n" - "lw $t2, 8(%[src]) \n" - "lw $t3, 12(%[src]) \n" - "lw $t4, 16(%[src]) \n" - "lw $t5, 20(%[src]) \n" - "lw $t6, 24(%[src]) \n" - "lw $t7, 28(%[src]) \n" - "pref 0, 128(%[src]) \n" - // bring the next lines of src, addr 128 - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "lw $t0, 32(%[src]) \n" - "bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1) - "lw $t1, 36(%[src]) \n" - "pref 30, 128(%[dst]) \n" // set dest, addr 128 - "$skip_pref30_128: \n" - "lw $t2, 40(%[src]) \n" - "lw $t3, 44(%[src]) \n" - "lw $t4, 48(%[src]) \n" - "lw $t5, 52(%[src]) \n" - "lw $t6, 56(%[src]) \n" - "lw $t7, 60(%[src]) \n" - "pref 0, 160(%[src]) \n" - // bring the next lines of src, addr 160 - "sw $t0, 32(%[dst]) \n" - "sw $t1, 36(%[dst]) \n" - "sw $t2, 40(%[dst]) \n" - "sw $t3, 44(%[dst]) \n" - "sw $t4, 48(%[dst]) \n" - "sw $t5, 52(%[dst]) \n" - "sw $t6, 56(%[dst]) \n" - "sw $t7, 60(%[dst]) \n" - - "addiu %[dst], %[dst], 64 \n" // adding 64 to dest - "sgtu $v1, %[dst], $t9 \n" - "bne %[dst], $a3, $loop16w \n" - " addiu %[src], %[src], 64 \n" // adding 64 to src - "move %[count], $t8 \n" - - // Here we have src and dest word-aligned but less than 64-bytes to go - - "chk8w: \n" - "pref 0, 0x0(%[src]) \n" - "andi $t8, %[count], 0x1f \n" // 32-byte chunk? - // the t8 is the reminder count past 32-bytes - "beq %[count], $t8, chk1w \n" - // count=t8,no 32-byte chunk - " nop \n" - - "lw $t0, 0(%[src]) \n" - "lw $t1, 4(%[src]) \n" - "lw $t2, 8(%[src]) \n" - "lw $t3, 12(%[src]) \n" - "lw $t4, 16(%[src]) \n" - "lw $t5, 20(%[src]) \n" - "lw $t6, 24(%[src]) \n" - "lw $t7, 28(%[src]) \n" - "addiu %[src], %[src], 32 \n" - - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "addiu %[dst], %[dst], 32 \n" - - "chk1w: \n" - "andi %[count], $t8, 0x3 \n" - // now count is the reminder past 1w chunks - "beq %[count], $t8, $last8 \n" - " subu $a3, $t8, %[count] \n" - // a3 is count of bytes in 1w chunks - "addu $a3, %[dst], $a3 \n" - // now a3 is the dst address past the 1w chunks - // copying in words (4-byte chunks) - "$wordCopy_loop: \n" - "lw $t3, 0(%[src]) \n" - // the first t3 may be equal t0 ... optimize? - "addiu %[src], %[src],4 \n" - "addiu %[dst], %[dst],4 \n" - "bne %[dst], $a3,$wordCopy_loop \n" - " sw $t3, -4(%[dst]) \n" - - // For the last (<8) bytes - "$last8: \n" - "blez %[count], leave \n" - " addu $a3, %[dst], %[count] \n" // a3 -last dst address - "$last8loop: \n" - "lb $v1, 0(%[src]) \n" - "addiu %[src], %[src], 1 \n" - "addiu %[dst], %[dst], 1 \n" - "bne %[dst], $a3, $last8loop \n" - " sb $v1, -1(%[dst]) \n" - - "leave: \n" - " j $ra \n" - " nop \n" - - // - // UNALIGNED case - // - - "unaligned: \n" - // got here with a3="negu a1" - "andi $a3, $a3, 0x3 \n" // a1 is word aligned? - "beqz $a3, $ua_chk16w \n" - " subu %[count], %[count], $a3 \n" - // bytes left after initial a3 bytes - "lwr $v1, 0(%[src]) \n" - "lwl $v1, 3(%[src]) \n" - "addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3 - "swr $v1, 0(%[dst]) \n" - "addu %[dst], %[dst], $a3 \n" - // below the dst will be word aligned (NOTE1) - "$ua_chk16w: \n" - "andi $t8, %[count], 0x3f \n" // whole 64-B chunks? - // t8 is the byte count after 64-byte chunks - "beq %[count], $t8, ua_chk8w \n" - // if a2==t8, no 64-byte chunks - // There will be at most 1 32-byte chunk after it - "subu $a3, %[count], $t8 \n" // the reminder - // Here a3 counts bytes in 16w chunks - "addu $a3, %[dst], $a3 \n" - // Now a3 is the final dst after 64-byte chunks - "addu $t0, %[dst], %[count] \n" // t0 "past the end" - "subu $t9, $t0, 160 \n" - // t9 is the "last safe pref 30,128(a1)" address - "pref 0, 0(%[src]) \n" // first line of src - "pref 0, 32(%[src]) \n" // second line addr 32 - "pref 0, 64(%[src]) \n" - "pref 30, 32(%[dst]) \n" - // safe, as we have at least 64 bytes ahead - // In case the a1 > t9 don't use "pref 30" at all - "sgtu $v1, %[dst], $t9 \n" - "bgtz $v1, $ua_loop16w \n" - // skip "pref 30,64(a1)" for too short arrays - " nop \n" - // otherwise, start with using pref30 - "pref 30, 64(%[dst]) \n" - "$ua_loop16w: \n" - "pref 0, 96(%[src]) \n" - "lwr $t0, 0(%[src]) \n" - "lwl $t0, 3(%[src]) \n" - "lwr $t1, 4(%[src]) \n" - "bgtz $v1, $ua_skip_pref30_96 \n" - " lwl $t1, 7(%[src]) \n" - "pref 30, 96(%[dst]) \n" - // continue setting up the dest, addr 96 - "$ua_skip_pref30_96: \n" - "lwr $t2, 8(%[src]) \n" - "lwl $t2, 11(%[src]) \n" - "lwr $t3, 12(%[src]) \n" - "lwl $t3, 15(%[src]) \n" - "lwr $t4, 16(%[src]) \n" - "lwl $t4, 19(%[src]) \n" - "lwr $t5, 20(%[src]) \n" - "lwl $t5, 23(%[src]) \n" - "lwr $t6, 24(%[src]) \n" - "lwl $t6, 27(%[src]) \n" - "lwr $t7, 28(%[src]) \n" - "lwl $t7, 31(%[src]) \n" - "pref 0, 128(%[src]) \n" - // bring the next lines of src, addr 128 - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "lwr $t0, 32(%[src]) \n" - "lwl $t0, 35(%[src]) \n" - "lwr $t1, 36(%[src]) \n" - "bgtz $v1, ua_skip_pref30_128 \n" - " lwl $t1, 39(%[src]) \n" - "pref 30, 128(%[dst]) \n" - // continue setting up the dest, addr 128 - "ua_skip_pref30_128: \n" - - "lwr $t2, 40(%[src]) \n" - "lwl $t2, 43(%[src]) \n" - "lwr $t3, 44(%[src]) \n" - "lwl $t3, 47(%[src]) \n" - "lwr $t4, 48(%[src]) \n" - "lwl $t4, 51(%[src]) \n" - "lwr $t5, 52(%[src]) \n" - "lwl $t5, 55(%[src]) \n" - "lwr $t6, 56(%[src]) \n" - "lwl $t6, 59(%[src]) \n" - "lwr $t7, 60(%[src]) \n" - "lwl $t7, 63(%[src]) \n" - "pref 0, 160(%[src]) \n" - // bring the next lines of src, addr 160 - "sw $t0, 32(%[dst]) \n" - "sw $t1, 36(%[dst]) \n" - "sw $t2, 40(%[dst]) \n" - "sw $t3, 44(%[dst]) \n" - "sw $t4, 48(%[dst]) \n" - "sw $t5, 52(%[dst]) \n" - "sw $t6, 56(%[dst]) \n" - "sw $t7, 60(%[dst]) \n" - - "addiu %[dst],%[dst],64 \n" // adding 64 to dest - "sgtu $v1,%[dst],$t9 \n" - "bne %[dst],$a3,$ua_loop16w \n" - " addiu %[src],%[src],64 \n" // adding 64 to src - "move %[count],$t8 \n" - - // Here we have src and dest word-aligned but less than 64-bytes to go - - "ua_chk8w: \n" - "pref 0, 0x0(%[src]) \n" - "andi $t8, %[count], 0x1f \n" // 32-byte chunk? - // the t8 is the reminder count - "beq %[count], $t8, $ua_chk1w \n" - // when count==t8, no 32-byte chunk - - "lwr $t0, 0(%[src]) \n" - "lwl $t0, 3(%[src]) \n" - "lwr $t1, 4(%[src]) \n" - "lwl $t1, 7(%[src]) \n" - "lwr $t2, 8(%[src]) \n" - "lwl $t2, 11(%[src]) \n" - "lwr $t3, 12(%[src]) \n" - "lwl $t3, 15(%[src]) \n" - "lwr $t4, 16(%[src]) \n" - "lwl $t4, 19(%[src]) \n" - "lwr $t5, 20(%[src]) \n" - "lwl $t5, 23(%[src]) \n" - "lwr $t6, 24(%[src]) \n" - "lwl $t6, 27(%[src]) \n" - "lwr $t7, 28(%[src]) \n" - "lwl $t7, 31(%[src]) \n" - "addiu %[src], %[src], 32 \n" - - "sw $t0, 0(%[dst]) \n" - "sw $t1, 4(%[dst]) \n" - "sw $t2, 8(%[dst]) \n" - "sw $t3, 12(%[dst]) \n" - "sw $t4, 16(%[dst]) \n" - "sw $t5, 20(%[dst]) \n" - "sw $t6, 24(%[dst]) \n" - "sw $t7, 28(%[dst]) \n" - "addiu %[dst], %[dst], 32 \n" - - "$ua_chk1w: \n" - "andi %[count], $t8, 0x3 \n" - // now count is the reminder past 1w chunks - "beq %[count], $t8, ua_smallCopy \n" - "subu $a3, $t8, %[count] \n" - // a3 is count of bytes in 1w chunks - "addu $a3, %[dst], $a3 \n" - // now a3 is the dst address past the 1w chunks - - // copying in words (4-byte chunks) - "$ua_wordCopy_loop: \n" - "lwr $v1, 0(%[src]) \n" - "lwl $v1, 3(%[src]) \n" - "addiu %[src], %[src], 4 \n" - "addiu %[dst], %[dst], 4 \n" - // note: dst=a1 is word aligned here, see NOTE1 - "bne %[dst], $a3, $ua_wordCopy_loop \n" - " sw $v1,-4(%[dst]) \n" - - // Now less than 4 bytes (value in count) left to copy - "ua_smallCopy: \n" - "beqz %[count], leave \n" - " addu $a3, %[dst], %[count] \n" // a3 = last dst address - "$ua_smallCopy_loop: \n" - "lb $v1, 0(%[src]) \n" - "addiu %[src], %[src], 1 \n" - "addiu %[dst], %[dst], 1 \n" - "bne %[dst],$a3,$ua_smallCopy_loop \n" - " sb $v1, -1(%[dst]) \n" - - "j $ra \n" - " nop \n" - ".set at \n" - ".set reorder \n" - : [dst] "+r" (dst), [src] "+r" (src) - : [count] "r" (count) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", - "t8", "t9", "a3", "v1", "at" - ); -} -#endif // HAS_COPYROW_MIPS - -// MIPS DSPR2 functions -#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \ - (__mips_dsp_rev >= 2) -void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "srl $t4, %[width], 4 \n" // multiplies of 16 - "blez $t4, 2f \n" - " andi %[width], %[width], 0xf \n" // residual - - ".p2align 2 \n" - "1: \n" - "addiu $t4, $t4, -1 \n" - "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0 - "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2 - "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4 - "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6 - "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8 - "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10 - "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12 - "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14 - "addiu %[src_uv], %[src_uv], 32 \n" - "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0 - "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0 - "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4 - "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4 - "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8 - "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8 - "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12 - "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12 - "sw $t9, 0(%[dst_v]) \n" - "sw $t0, 0(%[dst_u]) \n" - "sw $t1, 4(%[dst_v]) \n" - "sw $t2, 4(%[dst_u]) \n" - "sw $t3, 8(%[dst_v]) \n" - "sw $t5, 8(%[dst_u]) \n" - "sw $t6, 12(%[dst_v]) \n" - "sw $t7, 12(%[dst_u]) \n" - "addiu %[dst_v], %[dst_v], 16 \n" - "bgtz $t4, 1b \n" - " addiu %[dst_u], %[dst_u], 16 \n" - - "beqz %[width], 3f \n" - " nop \n" - - "2: \n" - "lbu $t0, 0(%[src_uv]) \n" - "lbu $t1, 1(%[src_uv]) \n" - "addiu %[src_uv], %[src_uv], 2 \n" - "addiu %[width], %[width], -1 \n" - "sb $t0, 0(%[dst_u]) \n" - "sb $t1, 0(%[dst_v]) \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "bgtz %[width], 2b \n" - " addiu %[dst_v], %[dst_v], 1 \n" - - "3: \n" - ".set pop \n" - : [src_uv] "+r" (src_uv), - [width] "+r" (width), - [dst_u] "+r" (dst_u), - [dst_v] "+r" (dst_v) - : - : "t0", "t1", "t2", "t3", - "t4", "t5", "t6", "t7", "t8", "t9" - ); -} - -void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, - uint8* dst_v, int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "srl $t4, %[width], 4 \n" // multiplies of 16 - "blez $t4, 2f \n" - " andi %[width], %[width], 0xf \n" // residual - - ".p2align 2 \n" - "1: \n" - "addiu $t4, $t4, -1 \n" - "lwr $t0, 0(%[src_uv]) \n" - "lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0 - "lwr $t1, 4(%[src_uv]) \n" - "lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2 - "lwr $t2, 8(%[src_uv]) \n" - "lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4 - "lwr $t3, 12(%[src_uv]) \n" - "lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6 - "lwr $t5, 16(%[src_uv]) \n" - "lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8 - "lwr $t6, 20(%[src_uv]) \n" - "lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10 - "lwr $t7, 24(%[src_uv]) \n" - "lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12 - "lwr $t8, 28(%[src_uv]) \n" - "lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14 - "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0 - "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0 - "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4 - "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4 - "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8 - "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8 - "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12 - "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12 - "addiu %[src_uv], %[src_uv], 32 \n" - "swr $t9, 0(%[dst_v]) \n" - "swl $t9, 3(%[dst_v]) \n" - "swr $t0, 0(%[dst_u]) \n" - "swl $t0, 3(%[dst_u]) \n" - "swr $t1, 4(%[dst_v]) \n" - "swl $t1, 7(%[dst_v]) \n" - "swr $t2, 4(%[dst_u]) \n" - "swl $t2, 7(%[dst_u]) \n" - "swr $t3, 8(%[dst_v]) \n" - "swl $t3, 11(%[dst_v]) \n" - "swr $t5, 8(%[dst_u]) \n" - "swl $t5, 11(%[dst_u]) \n" - "swr $t6, 12(%[dst_v]) \n" - "swl $t6, 15(%[dst_v]) \n" - "swr $t7, 12(%[dst_u]) \n" - "swl $t7, 15(%[dst_u]) \n" - "addiu %[dst_u], %[dst_u], 16 \n" - "bgtz $t4, 1b \n" - " addiu %[dst_v], %[dst_v], 16 \n" - - "beqz %[width], 3f \n" - " nop \n" - - "2: \n" - "lbu $t0, 0(%[src_uv]) \n" - "lbu $t1, 1(%[src_uv]) \n" - "addiu %[src_uv], %[src_uv], 2 \n" - "addiu %[width], %[width], -1 \n" - "sb $t0, 0(%[dst_u]) \n" - "sb $t1, 0(%[dst_v]) \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "bgtz %[width], 2b \n" - " addiu %[dst_v], %[dst_v], 1 \n" - - "3: \n" - ".set pop \n" - : [src_uv] "+r" (src_uv), - [width] "+r" (width), - [dst_u] "+r" (dst_u), - [dst_v] "+r" (dst_v) - : - : "t0", "t1", "t2", "t3", - "t4", "t5", "t6", "t7", "t8", "t9" - ); -} - -void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "srl $t4, %[width], 4 \n" // multiplies of 16 - "andi $t5, %[width], 0xf \n" - "blez $t4, 2f \n" - " addu %[src], %[src], %[width] \n" // src += width - - ".p2align 2 \n" - "1: \n" - "lw $t0, -16(%[src]) \n" // |3|2|1|0| - "lw $t1, -12(%[src]) \n" // |7|6|5|4| - "lw $t2, -8(%[src]) \n" // |11|10|9|8| - "lw $t3, -4(%[src]) \n" // |15|14|13|12| - "wsbh $t0, $t0 \n" // |2|3|0|1| - "wsbh $t1, $t1 \n" // |6|7|4|5| - "wsbh $t2, $t2 \n" // |10|11|8|9| - "wsbh $t3, $t3 \n" // |14|15|12|13| - "rotr $t0, $t0, 16 \n" // |0|1|2|3| - "rotr $t1, $t1, 16 \n" // |4|5|6|7| - "rotr $t2, $t2, 16 \n" // |8|9|10|11| - "rotr $t3, $t3, 16 \n" // |12|13|14|15| - "addiu %[src], %[src], -16 \n" - "addiu $t4, $t4, -1 \n" - "sw $t3, 0(%[dst]) \n" // |15|14|13|12| - "sw $t2, 4(%[dst]) \n" // |11|10|9|8| - "sw $t1, 8(%[dst]) \n" // |7|6|5|4| - "sw $t0, 12(%[dst]) \n" // |3|2|1|0| - "bgtz $t4, 1b \n" - " addiu %[dst], %[dst], 16 \n" - "beqz $t5, 3f \n" - " nop \n" - - "2: \n" - "lbu $t0, -1(%[src]) \n" - "addiu $t5, $t5, -1 \n" - "addiu %[src], %[src], -1 \n" - "sb $t0, 0(%[dst]) \n" - "bgez $t5, 2b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src] "+r" (src), [dst] "+r" (dst) - : [width] "r" (width) - : "t0", "t1", "t2", "t3", "t4", "t5" - ); -} - -void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width) { - int x = 0; - int y = 0; - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "addu $t4, %[width], %[width] \n" - "srl %[x], %[width], 4 \n" - "andi %[y], %[width], 0xf \n" - "blez %[x], 2f \n" - " addu %[src_uv], %[src_uv], $t4 \n" - - ".p2align 2 \n" - "1: \n" - "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0| - "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4| - "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8| - "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12| - "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16| - "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20| - "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24| - "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28| - - "rotr $t0, $t0, 16 \n" // |1|0|3|2| - "rotr $t1, $t1, 16 \n" // |5|4|7|6| - "rotr $t2, $t2, 16 \n" // |9|8|11|10| - "rotr $t3, $t3, 16 \n" // |13|12|15|14| - "rotr $t4, $t4, 16 \n" // |17|16|19|18| - "rotr $t6, $t6, 16 \n" // |21|20|23|22| - "rotr $t7, $t7, 16 \n" // |25|24|27|26| - "rotr $t8, $t8, 16 \n" // |29|28|31|30| - "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6| - "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7| - "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14| - "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15| - "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22| - "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23| - "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30| - "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31| - "addiu %[src_uv], %[src_uv], -32 \n" - "addiu %[x], %[x], -1 \n" - "swr $t4, 0(%[dst_u]) \n" - "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24| - "swr $t6, 0(%[dst_v]) \n" - "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25| - "swr $t2, 4(%[dst_u]) \n" - "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16| - "swr $t3, 4(%[dst_v]) \n" - "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17| - "swr $t0, 8(%[dst_u]) \n" - "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8| - "swr $t1, 8(%[dst_v]) \n" - "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9| - "swr $t9, 12(%[dst_u]) \n" - "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0| - "swr $t5, 12(%[dst_v]) \n" - "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1| - "addiu %[dst_v], %[dst_v], 16 \n" - "bgtz %[x], 1b \n" - " addiu %[dst_u], %[dst_u], 16 \n" - "beqz %[y], 3f \n" - " nop \n" - "b 2f \n" - " nop \n" - - "2: \n" - "lbu $t0, -2(%[src_uv]) \n" - "lbu $t1, -1(%[src_uv]) \n" - "addiu %[src_uv], %[src_uv], -2 \n" - "addiu %[y], %[y], -1 \n" - "sb $t0, 0(%[dst_u]) \n" - "sb $t1, 0(%[dst_v]) \n" - "addiu %[dst_u], %[dst_u], 1 \n" - "bgtz %[y], 2b \n" - " addiu %[dst_v], %[dst_v], 1 \n" - - "3: \n" - ".set pop \n" - : [src_uv] "+r" (src_uv), - [dst_u] "+r" (dst_u), - [dst_v] "+r" (dst_v), - [x] "=&r" (x), - [y] "+r" (y) - : [width] "r" (width) - : "t0", "t1", "t2", "t3", "t4", - "t5", "t7", "t8", "t9" - ); -} - -// Convert (4 Y and 2 VU) I422 and arrange RGB values into -// t5 = | 0 | B0 | 0 | b0 | -// t4 = | 0 | B1 | 0 | b1 | -// t9 = | 0 | G0 | 0 | g0 | -// t8 = | 0 | G1 | 0 | g1 | -// t2 = | 0 | R0 | 0 | r0 | -// t1 = | 0 | R1 | 0 | r1 | -#define I422ToTransientMipsRGB \ - "lw $t0, 0(%[y_buf]) \n" \ - "lhu $t1, 0(%[u_buf]) \n" \ - "lhu $t2, 0(%[v_buf]) \n" \ - "preceu.ph.qbr $t1, $t1 \n" \ - "preceu.ph.qbr $t2, $t2 \n" \ - "preceu.ph.qbra $t3, $t0 \n" \ - "preceu.ph.qbla $t0, $t0 \n" \ - "subu.ph $t1, $t1, $s5 \n" \ - "subu.ph $t2, $t2, $s5 \n" \ - "subu.ph $t3, $t3, $s4 \n" \ - "subu.ph $t0, $t0, $s4 \n" \ - "mul.ph $t3, $t3, $s0 \n" \ - "mul.ph $t0, $t0, $s0 \n" \ - "shll.ph $t4, $t1, 0x7 \n" \ - "subu.ph $t4, $t4, $t1 \n" \ - "mul.ph $t6, $t1, $s1 \n" \ - "mul.ph $t1, $t2, $s2 \n" \ - "addq_s.ph $t5, $t4, $t3 \n" \ - "addq_s.ph $t4, $t4, $t0 \n" \ - "shra.ph $t5, $t5, 6 \n" \ - "shra.ph $t4, $t4, 6 \n" \ - "addiu %[u_buf], 2 \n" \ - "addiu %[v_buf], 2 \n" \ - "addu.ph $t6, $t6, $t1 \n" \ - "mul.ph $t1, $t2, $s3 \n" \ - "addu.ph $t9, $t6, $t3 \n" \ - "addu.ph $t8, $t6, $t0 \n" \ - "shra.ph $t9, $t9, 6 \n" \ - "shra.ph $t8, $t8, 6 \n" \ - "addu.ph $t2, $t1, $t3 \n" \ - "addu.ph $t1, $t1, $t0 \n" \ - "shra.ph $t2, $t2, 6 \n" \ - "shra.ph $t1, $t1, 6 \n" \ - "subu.ph $t5, $t5, $s5 \n" \ - "subu.ph $t4, $t4, $s5 \n" \ - "subu.ph $t9, $t9, $s5 \n" \ - "subu.ph $t8, $t8, $s5 \n" \ - "subu.ph $t2, $t2, $s5 \n" \ - "subu.ph $t1, $t1, $s5 \n" \ - "shll_s.ph $t5, $t5, 8 \n" \ - "shll_s.ph $t4, $t4, 8 \n" \ - "shll_s.ph $t9, $t9, 8 \n" \ - "shll_s.ph $t8, $t8, 8 \n" \ - "shll_s.ph $t2, $t2, 8 \n" \ - "shll_s.ph $t1, $t1, 8 \n" \ - "shra.ph $t5, $t5, 8 \n" \ - "shra.ph $t4, $t4, 8 \n" \ - "shra.ph $t9, $t9, 8 \n" \ - "shra.ph $t8, $t8, 8 \n" \ - "shra.ph $t2, $t2, 8 \n" \ - "shra.ph $t1, $t1, 8 \n" \ - "addu.ph $t5, $t5, $s5 \n" \ - "addu.ph $t4, $t4, $s5 \n" \ - "addu.ph $t9, $t9, $s5 \n" \ - "addu.ph $t8, $t8, $s5 \n" \ - "addu.ph $t2, $t2, $s5 \n" \ - "addu.ph $t1, $t1, $s5 \n" - -void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " repl.ph $s0, 74 \n" // |YG|YG| = |74|74| - "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25| - "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52| - "repl.ph $s3, 102 \n" // |VR|VR| = |102|102| - "repl.ph $s4, 16 \n" // |0|16|0|16| - "repl.ph $s5, 128 \n" // |128|128| // clipping - "lui $s6, 0xff00 \n" - "ori $s6, 0xff00 \n" // |ff|00|ff|00|ff| - - ".p2align 2 \n" - "1: \n" - I422ToTransientMipsRGB -// Arranging into argb format - "precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1| - "precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0| - "addiu %[width], -4 \n" - "precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0| - "precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0| - "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0| - - "addiu %[y_buf], 4 \n" - "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0| - "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0| - "or $t1, $t1, $s6 \n" // |ff|R1|ff|R0| - "or $t2, $t2, $s6 \n" // |ff|r1|ff|r0| - "precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1| - "precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1| - "sll $t9, $t9, 16 \n" - "sll $t8, $t8, 16 \n" - "packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0| - "packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0| -// Store results. - "sw $t2, 0(%[rgb_buf]) \n" - "sw $t0, 4(%[rgb_buf]) \n" - "sw $t1, 8(%[rgb_buf]) \n" - "sw $t3, 12(%[rgb_buf]) \n" - "bnez %[width], 1b \n" - " addiu %[rgb_buf], 16 \n" - "2: \n" - ".set pop \n" - :[y_buf] "+r" (y_buf), - [u_buf] "+r" (u_buf), - [v_buf] "+r" (v_buf), - [width] "+r" (width), - [rgb_buf] "+r" (rgb_buf) - : - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9", - "s0", "s1", "s2", "s3", - "s4", "s5", "s6" - ); -} - -void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " repl.ph $s0, 74 \n" // |YG|YG| = |74|74| - "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25| - "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52| - "repl.ph $s3, 102 \n" // |VR|VR| = |102|102| - "repl.ph $s4, 16 \n" // |0|16|0|16| - "repl.ph $s5, 128 \n" // |128|128| - "lui $s6, 0xff00 \n" - "ori $s6, 0xff00 \n" // |ff|00|ff|00| - - ".p2align 2 \n" - "1: \n" - I422ToTransientMipsRGB -// Arranging into abgr format - "precr.qb.ph $t0, $t8, $t1 \n" // |G1|g1|R1|r1| - "precr.qb.ph $t3, $t9, $t2 \n" // |G0|g0|R0|r0| - "precrq.qb.ph $t8, $t0, $t3 \n" // |G1|R1|G0|R0| - "precr.qb.ph $t9, $t0, $t3 \n" // |g1|r1|g0|r0| - - "precr.qb.ph $t2, $t4, $t5 \n" // |B1|b1|B0|b0| - "addiu %[width], -4 \n" - "addiu %[y_buf], 4 \n" - "preceu.ph.qbla $t1, $t2 \n" // |0 |B1|0 |B0| - "preceu.ph.qbra $t2, $t2 \n" // |0 |b1|0 |b0| - "or $t1, $t1, $s6 \n" // |ff|B1|ff|B0| - "or $t2, $t2, $s6 \n" // |ff|b1|ff|b0| - "precrq.ph.w $t0, $t2, $t9 \n" // |ff|b1|g1|r1| - "precrq.ph.w $t3, $t1, $t8 \n" // |ff|B1|G1|R1| - "sll $t9, $t9, 16 \n" - "sll $t8, $t8, 16 \n" - "packrl.ph $t2, $t2, $t9 \n" // |ff|b0|g0|r0| - "packrl.ph $t1, $t1, $t8 \n" // |ff|B0|G0|R0| -// Store results. - "sw $t2, 0(%[rgb_buf]) \n" - "sw $t0, 4(%[rgb_buf]) \n" - "sw $t1, 8(%[rgb_buf]) \n" - "sw $t3, 12(%[rgb_buf]) \n" - "bnez %[width], 1b \n" - " addiu %[rgb_buf], 16 \n" - "2: \n" - ".set pop \n" - :[y_buf] "+r" (y_buf), - [u_buf] "+r" (u_buf), - [v_buf] "+r" (v_buf), - [width] "+r" (width), - [rgb_buf] "+r" (rgb_buf) - : - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9", - "s0", "s1", "s2", "s3", - "s4", "s5", "s6" - ); -} - -void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "beqz %[width], 2f \n" - " repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 | - "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25| - "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52| - "repl.ph $s3, 102 \n" // |VR|VR| = |102|102| - "repl.ph $s4, 16 \n" // |0|16|0|16| - "repl.ph $s5, 128 \n" // |128|128| - "lui $s6, 0xff \n" - "ori $s6, 0xff \n" // |00|ff|00|ff| - - ".p2align 2 \n" - "1: \n" - I422ToTransientMipsRGB - // Arranging into bgra format - "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1| - "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0| - "precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0| - "precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0| - - "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0| - "addiu %[width], -4 \n" - "addiu %[y_buf], 4 \n" - "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0| - "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0| - "sll $t1, $t1, 8 \n" // |R1|0 |R0|0 | - "sll $t2, $t2, 8 \n" // |r1|0 |r0|0 | - "or $t1, $t1, $s6 \n" // |R1|ff|R0|ff| - "or $t2, $t2, $s6 \n" // |r1|ff|r0|ff| - "precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff| - "precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff| - "sll $t1, $t1, 16 \n" - "sll $t2, $t2, 16 \n" - "packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff| - "packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff| -// Store results. - "sw $t2, 0(%[rgb_buf]) \n" - "sw $t0, 4(%[rgb_buf]) \n" - "sw $t1, 8(%[rgb_buf]) \n" - "sw $t3, 12(%[rgb_buf]) \n" - "bnez %[width], 1b \n" - " addiu %[rgb_buf], 16 \n" - "2: \n" - ".set pop \n" - :[y_buf] "+r" (y_buf), - [u_buf] "+r" (u_buf), - [v_buf] "+r" (v_buf), - [width] "+r" (width), - [rgb_buf] "+r" (rgb_buf) - : - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9", - "s0", "s1", "s2", "s3", - "s4", "s5", "s6" - ); -} - -// Bilinear filter 8x2 -> 8x1 -void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - int y0_fraction = 256 - source_y_fraction; - const uint8* src_ptr1 = src_ptr + src_stride; - - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "replv.ph $t0, %[y0_fraction] \n" - "replv.ph $t1, %[source_y_fraction] \n" - - ".p2align 2 \n" - "1: \n" - "lw $t2, 0(%[src_ptr]) \n" - "lw $t3, 0(%[src_ptr1]) \n" - "lw $t4, 4(%[src_ptr]) \n" - "lw $t5, 4(%[src_ptr1]) \n" - "muleu_s.ph.qbl $t6, $t2, $t0 \n" - "muleu_s.ph.qbr $t7, $t2, $t0 \n" - "muleu_s.ph.qbl $t8, $t3, $t1 \n" - "muleu_s.ph.qbr $t9, $t3, $t1 \n" - "muleu_s.ph.qbl $t2, $t4, $t0 \n" - "muleu_s.ph.qbr $t3, $t4, $t0 \n" - "muleu_s.ph.qbl $t4, $t5, $t1 \n" - "muleu_s.ph.qbr $t5, $t5, $t1 \n" - "addq.ph $t6, $t6, $t8 \n" - "addq.ph $t7, $t7, $t9 \n" - "addq.ph $t2, $t2, $t4 \n" - "addq.ph $t3, $t3, $t5 \n" - "shra.ph $t6, $t6, 8 \n" - "shra.ph $t7, $t7, 8 \n" - "shra.ph $t2, $t2, 8 \n" - "shra.ph $t3, $t3, 8 \n" - "precr.qb.ph $t6, $t6, $t7 \n" - "precr.qb.ph $t2, $t2, $t3 \n" - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[src_ptr1], %[src_ptr1], 8 \n" - "addiu %[dst_width], %[dst_width], -8 \n" - "sw $t6, 0(%[dst_ptr]) \n" - "sw $t2, 4(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[dst_ptr], %[dst_ptr], 8 \n" - - ".set pop \n" - : [dst_ptr] "+r" (dst_ptr), - [src_ptr1] "+r" (src_ptr1), - [src_ptr] "+r" (src_ptr), - [dst_width] "+r" (dst_width) - : [source_y_fraction] "r" (source_y_fraction), - [y0_fraction] "r" (y0_fraction), - [src_stride] "r" (src_stride) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9" - ); -} -#endif // __mips_dsp_rev >= 2 - -#endif // defined(__mips__) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_neon.cc deleted file mode 100755 index 68e380051b..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/row_neon.cc +++ /dev/null @@ -1,2847 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for GCC Neon -#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) - -// Read 8 Y, 4 U and 4 V from 422 -#define READYUV422 \ - "vld1.8 {d0}, [%0]! \n" \ - "vld1.32 {d2[0]}, [%1]! \n" \ - "vld1.32 {d2[1]}, [%2]! \n" - -// Read 8 Y, 2 U and 2 V from 422 -#define READYUV411 \ - "vld1.8 {d0}, [%0]! \n" \ - "vld1.16 {d2[0]}, [%1]! \n" \ - "vld1.16 {d2[1]}, [%2]! \n" \ - "vmov.u8 d3, d2 \n" \ - "vzip.u8 d2, d3 \n" - -// Read 8 Y, 8 U and 8 V from 444 -#define READYUV444 \ - "vld1.8 {d0}, [%0]! \n" \ - "vld1.8 {d2}, [%1]! \n" \ - "vld1.8 {d3}, [%2]! \n" \ - "vpaddl.u8 q1, q1 \n" \ - "vrshrn.u16 d2, q1, #1 \n" - -// Read 8 Y, and set 4 U and 4 V to 128 -#define READYUV400 \ - "vld1.8 {d0}, [%0]! \n" \ - "vmov.u8 d2, #128 \n" - -// Read 8 Y and 4 UV from NV12 -#define READNV12 \ - "vld1.8 {d0}, [%0]! \n" \ - "vld1.8 {d2}, [%1]! \n" \ - "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ - "vuzp.u8 d2, d3 \n" \ - "vtrn.u32 d2, d3 \n" - -// Read 8 Y and 4 VU from NV21 -#define READNV21 \ - "vld1.8 {d0}, [%0]! \n" \ - "vld1.8 {d2}, [%1]! \n" \ - "vmov.u8 d3, d2 \n"/* split odd/even uv apart */\ - "vuzp.u8 d3, d2 \n" \ - "vtrn.u32 d2, d3 \n" - -// Read 8 YUY2 -#define READYUY2 \ - "vld2.8 {d0, d2}, [%0]! \n" \ - "vmov.u8 d3, d2 \n" \ - "vuzp.u8 d2, d3 \n" \ - "vtrn.u32 d2, d3 \n" - -// Read 8 UYVY -#define READUYVY \ - "vld2.8 {d2, d3}, [%0]! \n" \ - "vmov.u8 d0, d3 \n" \ - "vmov.u8 d3, d2 \n" \ - "vuzp.u8 d2, d3 \n" \ - "vtrn.u32 d2, d3 \n" - -#define YUV422TORGB \ - "veor.u8 d2, d26 \n"/*subtract 128 from u and v*/\ - "vmull.s8 q8, d2, d24 \n"/* u/v B/R component */\ - "vmull.s8 q9, d2, d25 \n"/* u/v G component */\ - "vmov.u8 d1, #0 \n"/* split odd/even y apart */\ - "vtrn.u8 d0, d1 \n" \ - "vsub.s16 q0, q0, q15 \n"/* offset y */\ - "vmul.s16 q0, q0, q14 \n" \ - "vadd.s16 d18, d19 \n" \ - "vqadd.s16 d20, d0, d16 \n" /* B */ \ - "vqadd.s16 d21, d1, d16 \n" \ - "vqadd.s16 d22, d0, d17 \n" /* R */ \ - "vqadd.s16 d23, d1, d17 \n" \ - "vqadd.s16 d16, d0, d18 \n" /* G */ \ - "vqadd.s16 d17, d1, d18 \n" \ - "vqshrun.s16 d0, q10, #6 \n" /* B */ \ - "vqshrun.s16 d1, q11, #6 \n" /* G */ \ - "vqshrun.s16 d2, q8, #6 \n" /* R */ \ - "vmovl.u8 q10, d0 \n"/* set up for reinterleave*/\ - "vmovl.u8 q11, d1 \n" \ - "vmovl.u8 q8, d2 \n" \ - "vtrn.u8 d20, d21 \n" \ - "vtrn.u8 d22, d23 \n" \ - "vtrn.u8 d16, d17 \n" \ - "vmov.u8 d21, d16 \n" - -static vec8 kUVToRB = { 127, 127, 127, 127, 102, 102, 102, 102, - 0, 0, 0, 0, 0, 0, 0, 0 }; -static vec8 kUVToG = { -25, -25, -25, -25, -52, -52, -52, -52, - 0, 0, 0, 0, 0, 0, 0, 0 }; - -void I444ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width) { - asm volatile ( -#ifdef _ANDROID - ".fpu neon\n" -#endif - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV444 - YUV422TORGB - "subs %4, %4, #8 \n" - "vmov.u8 d23, #255 \n" - "vst4.8 {d20, d21, d22, d23}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void I422ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width) { - asm volatile ( - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV422 - YUV422TORGB - "subs %4, %4, #8 \n" - "vmov.u8 d23, #255 \n" - "vst4.8 {d20, d21, d22, d23}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void I411ToARGBRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int width) { - asm volatile ( - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV411 - YUV422TORGB - "subs %4, %4, #8 \n" - "vmov.u8 d23, #255 \n" - "vst4.8 {d20, d21, d22, d23}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void I422ToBGRARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_bgra, - int width) { - asm volatile ( - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV422 - YUV422TORGB - "subs %4, %4, #8 \n" - "vswp.u8 d20, d22 \n" - "vmov.u8 d19, #255 \n" - "vst4.8 {d19, d20, d21, d22}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_bgra), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void I422ToABGRRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_abgr, - int width) { - asm volatile ( - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV422 - YUV422TORGB - "subs %4, %4, #8 \n" - "vswp.u8 d20, d22 \n" - "vmov.u8 d23, #255 \n" - "vst4.8 {d20, d21, d22, d23}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_abgr), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void I422ToRGBARow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgba, - int width) { - asm volatile ( - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV422 - YUV422TORGB - "subs %4, %4, #8 \n" - "vmov.u8 d19, #255 \n" - "vst4.8 {d19, d20, d21, d22}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_rgba), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void I422ToRGB24Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb24, - int width) { - asm volatile ( - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV422 - YUV422TORGB - "subs %4, %4, #8 \n" - "vst3.8 {d20, d21, d22}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_rgb24), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void I422ToRAWRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_raw, - int width) { - asm volatile ( - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV422 - YUV422TORGB - "subs %4, %4, #8 \n" - "vswp.u8 d20, d22 \n" - "vst3.8 {d20, d21, d22}, [%3]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_raw), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -#define ARGBTORGB565 \ - "vshr.u8 d20, d20, #3 \n" /* B */ \ - "vshr.u8 d21, d21, #2 \n" /* G */ \ - "vshr.u8 d22, d22, #3 \n" /* R */ \ - "vmovl.u8 q8, d20 \n" /* B */ \ - "vmovl.u8 q9, d21 \n" /* G */ \ - "vmovl.u8 q10, d22 \n" /* R */ \ - "vshl.u16 q9, q9, #5 \n" /* G */ \ - "vshl.u16 q10, q10, #11 \n" /* R */ \ - "vorr q0, q8, q9 \n" /* BG */ \ - "vorr q0, q0, q10 \n" /* BGR */ - -void I422ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_rgb565, - int width) { - asm volatile ( - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV422 - YUV422TORGB - "subs %4, %4, #8 \n" - ARGBTORGB565 - "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_rgb565), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -#define ARGBTOARGB1555 \ - "vshr.u8 q10, q10, #3 \n" /* B */ \ - "vshr.u8 d22, d22, #3 \n" /* R */ \ - "vshr.u8 d23, d23, #7 \n" /* A */ \ - "vmovl.u8 q8, d20 \n" /* B */ \ - "vmovl.u8 q9, d21 \n" /* G */ \ - "vmovl.u8 q10, d22 \n" /* R */ \ - "vmovl.u8 q11, d23 \n" /* A */ \ - "vshl.u16 q9, q9, #5 \n" /* G */ \ - "vshl.u16 q10, q10, #10 \n" /* R */ \ - "vshl.u16 q11, q11, #15 \n" /* A */ \ - "vorr q0, q8, q9 \n" /* BG */ \ - "vorr q1, q10, q11 \n" /* RA */ \ - "vorr q0, q0, q1 \n" /* BGRA */ - -void I422ToARGB1555Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb1555, - int width) { - asm volatile ( - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV422 - YUV422TORGB - "subs %4, %4, #8 \n" - "vmov.u8 d23, #255 \n" - ARGBTOARGB1555 - "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB1555. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb1555), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -#define ARGBTOARGB4444 \ - "vshr.u8 d20, d20, #4 \n" /* B */ \ - "vbic.32 d21, d21, d4 \n" /* G */ \ - "vshr.u8 d22, d22, #4 \n" /* R */ \ - "vbic.32 d23, d23, d4 \n" /* A */ \ - "vorr d0, d20, d21 \n" /* BG */ \ - "vorr d1, d22, d23 \n" /* RA */ \ - "vzip.u8 d0, d1 \n" /* BGRA */ - -void I422ToARGB4444Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb4444, - int width) { - asm volatile ( - "vld1.8 {d24}, [%5] \n" - "vld1.8 {d25}, [%6] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. - ".p2align 2 \n" - "1: \n" - READYUV422 - YUV422TORGB - "subs %4, %4, #8 \n" - "vmov.u8 d23, #255 \n" - ARGBTOARGB4444 - "vst1.8 {q0}, [%3]! \n" // store 8 pixels ARGB4444. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_argb4444), // %3 - "+r"(width) // %4 - : "r"(&kUVToRB), // %5 - "r"(&kUVToG) // %6 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void YToARGBRow_NEON(const uint8* src_y, - uint8* dst_argb, - int width) { - asm volatile ( - "vld1.8 {d24}, [%3] \n" - "vld1.8 {d25}, [%4] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUV400 - YUV422TORGB - "subs %2, %2, #8 \n" - "vmov.u8 d23, #255 \n" - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(&kUVToRB), // %3 - "r"(&kUVToG) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void I400ToARGBRow_NEON(const uint8* src_y, - uint8* dst_argb, - int width) { - asm volatile ( - ".p2align 2 \n" - "vmov.u8 d23, #255 \n" - "1: \n" - "vld1.8 {d20}, [%0]! \n" - "vmov d21, d20 \n" - "vmov d22, d20 \n" - "subs %2, %2, #8 \n" - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "d20", "d21", "d22", "d23" - ); -} - -void NV12ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width) { - asm volatile ( - "vld1.8 {d24}, [%4] \n" - "vld1.8 {d25}, [%5] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READNV12 - YUV422TORGB - "subs %3, %3, #8 \n" - "vmov.u8 d23, #255 \n" - "vst4.8 {d20, d21, d22, d23}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : "r"(&kUVToRB), // %4 - "r"(&kUVToG) // %5 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void NV21ToARGBRow_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_argb, - int width) { - asm volatile ( - "vld1.8 {d24}, [%4] \n" - "vld1.8 {d25}, [%5] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READNV21 - YUV422TORGB - "subs %3, %3, #8 \n" - "vmov.u8 d23, #255 \n" - "vst4.8 {d20, d21, d22, d23}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : "r"(&kUVToRB), // %4 - "r"(&kUVToG) // %5 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void NV12ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, - int width) { - asm volatile ( - "vld1.8 {d24}, [%4] \n" - "vld1.8 {d25}, [%5] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READNV12 - YUV422TORGB - "subs %3, %3, #8 \n" - ARGBTORGB565 - "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_rgb565), // %2 - "+r"(width) // %3 - : "r"(&kUVToRB), // %4 - "r"(&kUVToG) // %5 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void NV21ToRGB565Row_NEON(const uint8* src_y, - const uint8* src_uv, - uint8* dst_rgb565, - int width) { - asm volatile ( - "vld1.8 {d24}, [%4] \n" - "vld1.8 {d25}, [%5] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READNV21 - YUV422TORGB - "subs %3, %3, #8 \n" - ARGBTORGB565 - "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_uv), // %1 - "+r"(dst_rgb565), // %2 - "+r"(width) // %3 - : "r"(&kUVToRB), // %4 - "r"(&kUVToG) // %5 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void YUY2ToARGBRow_NEON(const uint8* src_yuy2, - uint8* dst_argb, - int width) { - asm volatile ( - "vld1.8 {d24}, [%3] \n" - "vld1.8 {d25}, [%4] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READYUY2 - YUV422TORGB - "subs %2, %2, #8 \n" - "vmov.u8 d23, #255 \n" - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(&kUVToRB), // %3 - "r"(&kUVToG) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void UYVYToARGBRow_NEON(const uint8* src_uyvy, - uint8* dst_argb, - int width) { - asm volatile ( - "vld1.8 {d24}, [%3] \n" - "vld1.8 {d25}, [%4] \n" - "vmov.u8 d26, #128 \n" - "vmov.u16 q14, #74 \n" - "vmov.u16 q15, #16 \n" - ".p2align 2 \n" - "1: \n" - READUYVY - YUV422TORGB - "subs %2, %2, #8 \n" - "vmov.u8 d23, #255 \n" - "vst4.8 {d20, d21, d22, d23}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(&kUVToRB), // %3 - "r"(&kUVToG) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -// Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. -void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV - "subs %3, %3, #16 \n" // 16 processed per loop - "vst1.8 {q0}, [%1]! \n" // store U - "vst1.8 {q1}, [%2]! \n" // store V - "bgt 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 // Output registers - : // Input registers - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - -// Reads 16 U's and V's and writes out 16 pairs of UV. -void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load U - "vld1.8 {q1}, [%1]! \n" // load V - "subs %3, %3, #16 \n" // 16 processed per loop - "vst2.u8 {q0, q1}, [%2]! \n" // store 16 pairs of UV - "bgt 1b \n" - : - "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 // Output registers - : // Input registers - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - -// Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. -void CopyRow_NEON(const uint8* src, uint8* dst, int count) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32 - "subs %2, %2, #32 \n" // 32 processed per loop - "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32 - "bgt 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(count) // %2 // Output registers - : // Input registers - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - -// SetRow8 writes 'count' bytes using a 32 bit value repeated. -void SetRow_NEON(uint8* dst, uint32 v32, int count) { - asm volatile ( - "vdup.u32 q0, %2 \n" // duplicate 4 ints - "1: \n" - "subs %1, %1, #16 \n" // 16 bytes per loop - "vst1.8 {q0}, [%0]! \n" // store - "bgt 1b \n" - : "+r"(dst), // %0 - "+r"(count) // %1 - : "r"(v32) // %2 - : "cc", "memory", "q0" - ); -} - -// TODO(fbarchard): Make fully assembler -// SetRow32 writes 'count' words using a 32 bit value repeated. -void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width, - int dst_stride, int height) { - for (int y = 0; y < height; ++y) { - SetRow_NEON(dst, v32, width << 2); - dst += dst_stride; - } -} - -void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { - asm volatile ( - // Start at end of source row. - "mov r3, #-16 \n" - "add %0, %0, %2 \n" - "sub %0, #16 \n" - - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0], r3 \n" // src -= 16 - "subs %2, #16 \n" // 16 pixels per loop. - "vrev64.8 q0, q0 \n" - "vst1.8 {d1}, [%1]! \n" // dst += 16 - "vst1.8 {d0}, [%1]! \n" - "bgt 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "r3", "q0" - ); -} - -void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int width) { - asm volatile ( - // Start at end of source row. - "mov r12, #-16 \n" - "add %0, %0, %3, lsl #1 \n" - "sub %0, #16 \n" - - ".p2align 2 \n" - "1: \n" - "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16 - "subs %3, #8 \n" // 8 pixels per loop. - "vrev64.8 q0, q0 \n" - "vst1.8 {d0}, [%1]! \n" // dst += 8 - "vst1.8 {d1}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "r12", "q0" - ); -} - -void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { - asm volatile ( - // Start at end of source row. - "mov r3, #-16 \n" - "add %0, %0, %2, lsl #2 \n" - "sub %0, #16 \n" - - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0], r3 \n" // src -= 16 - "subs %2, #4 \n" // 4 pixels per loop. - "vrev64.32 q0, q0 \n" - "vst1.8 {d1}, [%1]! \n" // dst += 16 - "vst1.8 {d0}, [%1]! \n" - "bgt 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "r3", "q0" - ); -} - -void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix) { - asm volatile ( - "vmov.u8 d4, #255 \n" // Alpha - ".p2align 2 \n" - "1: \n" - "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); -} - -void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix) { - asm volatile ( - "vmov.u8 d4, #255 \n" // Alpha - ".p2align 2 \n" - "1: \n" - "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vswp.u8 d1, d3 \n" // swap R, B - "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); -} - -#define RGB565TOARGB \ - "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \ - "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \ - "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \ - "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \ - "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ - "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ - "vorr.u8 d0, d0, d4 \n" /* B */ \ - "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \ - "vorr.u8 d2, d1, d5 \n" /* R */ \ - "vorr.u8 d1, d4, d6 \n" /* G */ - -void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix) { - asm volatile ( - "vmov.u8 d3, #255 \n" // Alpha - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - RGB565TOARGB - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_rgb565), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List - ); -} - -#define ARGB1555TOARGB \ - "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \ - "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \ - "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \ - "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \ - "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \ - "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \ - "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \ - "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \ - "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \ - "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \ - "vorr.u8 q1, q1, q3 \n" /* R,A */ \ - "vorr.u8 q0, q0, q2 \n" /* B,G */ \ - -// RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha. -#define RGB555TOARGB \ - "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \ - "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \ - "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \ - "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \ - "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ - "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ - "vorr.u8 d0, d0, d4 \n" /* B */ \ - "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \ - "vorr.u8 d2, d1, d5 \n" /* R */ \ - "vorr.u8 d1, d4, d6 \n" /* G */ - -void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb, - int pix) { - asm volatile ( - "vmov.u8 d3, #255 \n" // Alpha - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGB1555TOARGB - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_argb1555), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List - ); -} - -#define ARGB4444TOARGB \ - "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \ - "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \ - "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \ - "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \ - "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \ - "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \ - "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \ - "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */ - -void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb, - int pix) { - asm volatile ( - "vmov.u8 d3, #255 \n" // Alpha - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGB4444TOARGB - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_argb4444), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1", "q2" // Clobber List - ); -} - -void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb24, int pix) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RGB24. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_rgb24), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); -} - -void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_raw, int pix) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vswp.u8 d1, d3 \n" // swap R, B - "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_raw), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List - ); -} - -void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2. - "subs %2, %2, #16 \n" // 16 processed per loop. - "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y. - "bgt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - -void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY. - "subs %2, %2, #16 \n" // 16 processed per loop. - "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y. - "bgt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - -void YUY2ToUV422Row_NEON(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v, - int pix) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. - "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. - "vst1.8 {d1}, [%1]! \n" // store 8 U. - "vst1.8 {d3}, [%2]! \n" // store 8 V. - "bgt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List - ); -} - -void UYVYToUV422Row_NEON(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v, - int pix) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. - "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. - "vst1.8 {d0}, [%1]! \n" // store 8 U. - "vst1.8 {d2}, [%2]! \n" // store 8 V. - "bgt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List - ); -} - -void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // stride + src_yuy2 - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. - "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. - "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2. - "vrhadd.u8 d1, d1, d5 \n" // average rows of U - "vrhadd.u8 d3, d3, d7 \n" // average rows of V - "vst1.8 {d1}, [%2]! \n" // store 8 U. - "vst1.8 {d3}, [%3]! \n" // store 8 V. - "bgt 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(stride_yuy2), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List - ); -} - -void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // stride + src_uyvy - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. - "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. - "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY. - "vrhadd.u8 d0, d0, d4 \n" // average rows of U - "vrhadd.u8 d2, d2, d6 \n" // average rows of V - "vst1.8 {d0}, [%2]! \n" // store 8 U. - "vst1.8 {d2}, [%3]! \n" // store 8 V. - "bgt 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(stride_uyvy), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List - ); -} - -void HalfRow_NEON(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - asm volatile ( - // change the stride to row 2 pointer - "add %1, %0 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load row 1 16 pixels. - "subs %3, %3, #16 \n" // 16 processed per loop - "vld1.8 {q1}, [%1]! \n" // load row 2 16 pixels. - "vrhadd.u8 q0, q1 \n" // average row 1 and 2 - "vst1.8 {q0}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_uv), // %0 - "+r"(src_uv_stride), // %1 - "+r"(dst_uv), // %2 - "+r"(pix) // %3 - : - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - -// Select 2 channels from ARGB on alternating pixels. e.g. BGBGBGBG -void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) { - asm volatile ( - "vmov.u32 d6[0], %3 \n" // selector - "1: \n" - "vld1.8 {q0, q1}, [%0]! \n" // load row 8 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop - "vtbl.8 d4, {d0, d1}, d6 \n" // look up 4 pixels - "vtbl.8 d5, {d2, d3}, d6 \n" // look up 4 pixels - "vtrn.u32 d4, d5 \n" // combine 8 pixels - "vst1.8 {d4}, [%1]! \n" // store 8. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_bayer), // %1 - "+r"(pix) // %2 - : "r"(selector) // %3 - : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List - ); -} - -// Select G channels from ARGB. e.g. GGGGGGGG -void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer, - uint32 /*selector*/, int pix) { - asm volatile ( - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load row 8 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop - "vst1.8 {d1}, [%1]! \n" // store 8 G's. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_bayer), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - -// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix) { - asm volatile ( - "vld1.8 {q2}, [%3] \n" // shuffler - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 4 pixels. - "subs %2, %2, #4 \n" // 4 processed per loop - "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels - "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels - "vst1.8 {q1}, [%1]! \n" // store 4. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : "r"(shuffler) // %3 - : "cc", "memory", "q0", "q1", "q2" // Clobber List - ); -} - -void I422ToYUY2Row_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_yuy2, int width) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys - "vld1.8 {d1}, [%1]! \n" // load 8 Us - "vld1.8 {d3}, [%2]! \n" // load 8 Vs - "subs %4, %4, #16 \n" // 16 pixels - "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_yuy2), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "d0", "d1", "d2", "d3" - ); -} - -void I422ToUYVYRow_NEON(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_uyvy, int width) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys - "vld1.8 {d0}, [%1]! \n" // load 8 Us - "vld1.8 {d2}, [%2]! \n" // load 8 Vs - "subs %4, %4, #16 \n" // 16 pixels - "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels. - "bgt 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_uyvy), // %3 - "+r"(width) // %4 - : - : "cc", "memory", "d0", "d1", "d2", "d3" - ); -} - -void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb565, int pix) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGBTORGB565 - "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_rgb565), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q8", "q9", "q10", "q11" - ); -} - -void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_argb1555, - int pix) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGBTOARGB1555 - "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB1555. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb1555), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q8", "q9", "q10", "q11" - ); -} - -void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_argb4444, - int pix) { - asm volatile ( - "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. - ".p2align 2 \n" - "1: \n" - "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGBTOARGB4444 - "vst1.8 {q0}, [%1]! \n" // store 8 pixels ARGB4444. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb4444), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q8", "q9", "q10", "q11" - ); -} - -void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { - asm volatile ( - "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d27, #16 \n" // Add 16 constant - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d27 \n" - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q12", "q13" - ); -} - -void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix) { - asm volatile ( - "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient - "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient - "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q12", "q13" - ); -} - -// 8x1 pixels. -void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix) { - asm volatile ( - "vmov.u8 d24, #112 \n" // UB / VR 0.875 coefficient - "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient - "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient - "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient - "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d24 \n" // B - "vmlsl.u8 q2, d1, d25 \n" // G - "vmlsl.u8 q2, d2, d26 \n" // R - "vadd.u16 q2, q2, q15 \n" // +128 -> unsigned - - "vmull.u8 q3, d2, d24 \n" // R - "vmlsl.u8 q3, d1, d28 \n" // G - "vmlsl.u8 q3, d0, d27 \n" // B - "vadd.u16 q3, q3, q15 \n" // +128 -> unsigned - - "vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V - - "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" - ); -} - -// 16x1 pixels -> 8x1. pix is number of argb pixels. e.g. 16. -void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix) { - asm volatile ( - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - - "subs %3, %3, #16 \n" // 16 processed per loop. - "vmul.s16 q8, q0, q10 \n" // B - "vmls.s16 q8, q1, q11 \n" // G - "vmls.s16 q8, q2, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - - "vmul.s16 q9, q2, q10 \n" // R - "vmls.s16 q9, q1, q14 \n" // G - "vmls.s16 q9, q0, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - - "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -// 32x1 pixels -> 8x1. pix is number of argb pixels. e.g. 32. -void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int pix) { - asm volatile ( - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%0]! \n" // load 8 more ARGB pixels. - "vld4.8 {d9, d11, d13, d15}, [%0]! \n" // load last 8 ARGB pixels. - "vpaddl.u8 q4, q4 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q5, q5 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q6, q6 \n" // R 16 bytes -> 8 shorts. - - "vpadd.u16 d0, d0, d1 \n" // B 16 shorts -> 8 shorts. - "vpadd.u16 d1, d8, d9 \n" // B - "vpadd.u16 d2, d2, d3 \n" // G 16 shorts -> 8 shorts. - "vpadd.u16 d3, d10, d11 \n" // G - "vpadd.u16 d4, d4, d5 \n" // R 16 shorts -> 8 shorts. - "vpadd.u16 d5, d12, d13 \n" // R - - "vrshr.u16 q0, q0, #1 \n" // 2x average - "vrshr.u16 q1, q1, #1 \n" - "vrshr.u16 q2, q2, #1 \n" - - "subs %3, %3, #32 \n" // 32 processed per loop. - "vmul.s16 q8, q0, q10 \n" // B - "vmls.s16 q8, q1, q11 \n" // G - "vmls.s16 q8, q2, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - "vmul.s16 q9, q2, q10 \n" // R - "vmls.s16 q9, q1, q14 \n" // G - "vmls.s16 q9, q0, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. -#define RGBTOUV(QB, QG, QR) \ - "vmul.s16 q8, " #QB ", q10 \n" /* B */ \ - "vmls.s16 q8, " #QG ", q11 \n" /* G */ \ - "vmls.s16 q8, " #QR ", q12 \n" /* R */ \ - "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \ - "vmul.s16 q9, " #QR ", q10 \n" /* R */ \ - "vmls.s16 q9, " #QG ", q14 \n" /* G */ \ - "vmls.s16 q9, " #QB ", q13 \n" /* B */ \ - "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \ - "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \ - "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */ - -// TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. -void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_argb - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. - "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. - "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. - - "vrshr.u16 q0, q0, #1 \n" // 2x average - "vrshr.u16 q1, q1, #1 \n" - "vrshr.u16 q2, q2, #1 \n" - - "subs %4, %4, #16 \n" // 32 processed per loop. - RGBTOUV(q0, q1, q2) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stride_argb), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -// TODO(fbarchard): Subsample match C code. -void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_argb - "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient - "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient - "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient - "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient - "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. - "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. - "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. - - "vrshr.u16 q0, q0, #1 \n" // 2x average - "vrshr.u16 q1, q1, #1 \n" - "vrshr.u16 q2, q2, #1 \n" - - "subs %4, %4, #16 \n" // 32 processed per loop. - RGBTOUV(q0, q1, q2) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stride_argb), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_bgra - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels. - "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels. - "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels. - "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts. - - "vrshr.u16 q1, q1, #1 \n" // 2x average - "vrshr.u16 q2, q2, #1 \n" - "vrshr.u16 q3, q3, #1 \n" - - "subs %4, %4, #16 \n" // 32 processed per loop. - RGBTOUV(q3, q2, q1) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_bgra), // %0 - "+r"(src_stride_bgra), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_abgr - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels. - "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels. - "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels. - "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. - - "vrshr.u16 q0, q0, #1 \n" // 2x average - "vrshr.u16 q1, q1, #1 \n" - "vrshr.u16 q2, q2, #1 \n" - - "subs %4, %4, #16 \n" // 32 processed per loop. - RGBTOUV(q2, q1, q0) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_abgr), // %0 - "+r"(src_stride_abgr), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_rgba - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels. - "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts. - "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels. - "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels. - "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts. - - "vrshr.u16 q0, q0, #1 \n" // 2x average - "vrshr.u16 q1, q1, #1 \n" - "vrshr.u16 q2, q2, #1 \n" - - "subs %4, %4, #16 \n" // 32 processed per loop. - RGBTOUV(q0, q1, q2) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_rgba), // %0 - "+r"(src_stride_rgba), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_rgb24 - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels. - "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels. - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels. - "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels. - "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. - - "vrshr.u16 q0, q0, #1 \n" // 2x average - "vrshr.u16 q1, q1, #1 \n" - "vrshr.u16 q2, q2, #1 \n" - - "subs %4, %4, #16 \n" // 32 processed per loop. - RGBTOUV(q0, q1, q2) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(src_stride_rgb24), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_raw - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels. - "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels. - "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. - "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels. - "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels. - "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. - - "vrshr.u16 q0, q0, #1 \n" // 2x average - "vrshr.u16 q1, q1, #1 \n" - "vrshr.u16 q2, q2, #1 \n" - - "subs %4, %4, #16 \n" // 32 processed per loop. - RGBTOUV(q2, q1, q0) - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_raw), // %0 - "+r"(src_stride_raw), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. -void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_argb - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. - RGB565TOARGB - "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels. - RGB565TOARGB - "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - - "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels. - RGB565TOARGB - "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels. - RGB565TOARGB - "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - - "vrshr.u16 q4, q4, #1 \n" // 2x average - "vrshr.u16 q5, q5, #1 \n" - "vrshr.u16 q6, q6, #1 \n" - - "subs %4, %4, #16 \n" // 16 processed per loop. - "vmul.s16 q8, q4, q10 \n" // B - "vmls.s16 q8, q5, q11 \n" // G - "vmls.s16 q8, q6, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - "vmul.s16 q9, q6, q10 \n" // R - "vmls.s16 q9, q5, q14 \n" // G - "vmls.s16 q9, q4, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_rgb565), // %0 - "+r"(src_stride_rgb565), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. -void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_argb - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. - RGB555TOARGB - "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels. - RGB555TOARGB - "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - - "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels. - RGB555TOARGB - "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels. - RGB555TOARGB - "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - - "vrshr.u16 q4, q4, #1 \n" // 2x average - "vrshr.u16 q5, q5, #1 \n" - "vrshr.u16 q6, q6, #1 \n" - - "subs %4, %4, #16 \n" // 16 processed per loop. - "vmul.s16 q8, q4, q10 \n" // B - "vmls.s16 q8, q5, q11 \n" // G - "vmls.s16 q8, q6, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - "vmul.s16 q9, q6, q10 \n" // R - "vmls.s16 q9, q5, q14 \n" // G - "vmls.s16 q9, q4, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb1555), // %0 - "+r"(src_stride_argb1555), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -// 16x2 pixels -> 8x1. pix is number of argb pixels. e.g. 16. -void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "add %1, %0, %1 \n" // src_stride + src_argb - "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient - "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient - "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient - "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient - "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient - "vmov.u16 q15, #0x8080 \n" // 128.5 - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. - ARGB4444TOARGB - "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels. - ARGB4444TOARGB - "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - - "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels. - ARGB4444TOARGB - "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. - "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels. - ARGB4444TOARGB - "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. - "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. - "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. - - "vrshr.u16 q4, q4, #1 \n" // 2x average - "vrshr.u16 q5, q5, #1 \n" - "vrshr.u16 q6, q6, #1 \n" - - "subs %4, %4, #16 \n" // 16 processed per loop. - "vmul.s16 q8, q4, q10 \n" // B - "vmls.s16 q8, q5, q11 \n" // G - "vmls.s16 q8, q6, q12 \n" // R - "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned - "vmul.s16 q9, q6, q10 \n" // R - "vmls.s16 q9, q5, q14 \n" // G - "vmls.s16 q9, q4, q13 \n" // B - "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned - "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U - "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V - "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. - "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. - "bgt 1b \n" - : "+r"(src_argb4444), // %0 - "+r"(src_stride_argb4444), // %1 - "+r"(dst_u), // %2 - "+r"(dst_v), // %3 - "+r"(pix) // %4 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix) { - asm volatile ( - "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d27, #16 \n" // Add 16 constant - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - RGB565TOARGB - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d27 \n" - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_rgb565), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" - ); -} - -void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix) { - asm volatile ( - "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d27, #16 \n" // Add 16 constant - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGB1555TOARGB - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d27 \n" - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_argb1555), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" - ); -} - -void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix) { - asm volatile ( - "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d27, #16 \n" // Add 16 constant - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - ARGB4444TOARGB - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d27 \n" - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_argb4444), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13" - ); -} - -void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix) { - asm volatile ( - "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d7, #16 \n" // Add 16 constant - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q8, d1, d4 \n" // R - "vmlal.u8 q8, d2, d5 \n" // G - "vmlal.u8 q8, d3, d6 \n" // B - "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d7 \n" - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_bgra), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" - ); -} - -void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix) { - asm volatile ( - "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d7, #16 \n" // Add 16 constant - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q8, d0, d4 \n" // R - "vmlal.u8 q8, d1, d5 \n" // G - "vmlal.u8 q8, d2, d6 \n" // B - "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d7 \n" - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_abgr), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" - ); -} - -void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix) { - asm volatile ( - "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d7, #16 \n" // Add 16 constant - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q8, d1, d4 \n" // B - "vmlal.u8 q8, d2, d5 \n" // G - "vmlal.u8 q8, d3, d6 \n" // R - "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d7 \n" - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_rgba), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" - ); -} - -void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix) { - asm volatile ( - "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d7, #16 \n" // Add 16 constant - ".p2align 2 \n" - "1: \n" - "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q8, d0, d4 \n" // B - "vmlal.u8 q8, d1, d5 \n" // G - "vmlal.u8 q8, d2, d6 \n" // R - "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d7 \n" - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" - ); -} - -void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix) { - asm volatile ( - "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient - "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient - "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient - "vmov.u8 d7, #16 \n" // Add 16 constant - ".p2align 2 \n" - "1: \n" - "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q8, d0, d4 \n" // B - "vmlal.u8 q8, d1, d5 \n" // G - "vmlal.u8 q8, d2, d6 \n" // R - "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y - "vqadd.u8 d0, d7 \n" - "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. - "bgt 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" - ); -} - -// Bilinear filter 16x2 -> 16x1 -void InterpolateRow_NEON(uint8* dst_ptr, - const uint8* src_ptr, ptrdiff_t src_stride, - int dst_width, int source_y_fraction) { - asm volatile ( - "cmp %4, #0 \n" - "beq 100f \n" - "add %2, %1 \n" - "cmp %4, #64 \n" - "beq 75f \n" - "cmp %4, #128 \n" - "beq 50f \n" - "cmp %4, #192 \n" - "beq 25f \n" - - "vdup.8 d5, %4 \n" - "rsb %4, #256 \n" - "vdup.8 d4, %4 \n" - // General purpose row blend. - "1: \n" - "vld1.8 {q0}, [%1]! \n" - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vmull.u8 q13, d0, d4 \n" - "vmull.u8 q14, d1, d4 \n" - "vmlal.u8 q13, d2, d5 \n" - "vmlal.u8 q14, d3, d5 \n" - "vrshrn.u16 d0, q13, #8 \n" - "vrshrn.u16 d1, q14, #8 \n" - "vst1.8 {q0}, [%0]! \n" - "bgt 1b \n" - "b 99f \n" - - // Blend 25 / 75. - "25: \n" - "vld1.8 {q0}, [%1]! \n" - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vrhadd.u8 q0, q1 \n" - "vrhadd.u8 q0, q1 \n" - "vst1.8 {q0}, [%0]! \n" - "bgt 25b \n" - "b 99f \n" - - // Blend 50 / 50. - "50: \n" - "vld1.8 {q0}, [%1]! \n" - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vrhadd.u8 q0, q1 \n" - "vst1.8 {q0}, [%0]! \n" - "bgt 50b \n" - "b 99f \n" - - // Blend 75 / 25. - "75: \n" - "vld1.8 {q1}, [%1]! \n" - "vld1.8 {q0}, [%2]! \n" - "subs %3, %3, #16 \n" - "vrhadd.u8 q0, q1 \n" - "vrhadd.u8 q0, q1 \n" - "vst1.8 {q0}, [%0]! \n" - "bgt 75b \n" - "b 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - "vld1.8 {q0}, [%1]! \n" - "subs %3, %3, #16 \n" - "vst1.8 {q0}, [%0]! \n" - "bgt 100b \n" - - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_stride), // %2 - "+r"(dst_width), // %3 - "+r"(source_y_fraction) // %4 - : - : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14" - ); -} - -// dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr -void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - "subs %3, #8 \n" - "blt 89f \n" - // Blend 8 pixels. - "8: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB0. - "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 pixels of ARGB1. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vmull.u8 q10, d4, d3 \n" // db * a - "vmull.u8 q11, d5, d3 \n" // dg * a - "vmull.u8 q12, d6, d3 \n" // dr * a - "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 - "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 - "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 - "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 - "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 - "vqadd.u8 q0, q0, q2 \n" // + sbg - "vqadd.u8 d2, d2, d6 \n" // + sr - "vmov.u8 d3, #255 \n" // a = 255 - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 pixels of ARGB. - "bge 8b \n" - - "89: \n" - "adds %3, #8-1 \n" - "blt 99f \n" - - // Blend 1 pixels. - "1: \n" - "vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n" // load 1 pixel ARGB0. - "vld4.8 {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n" // load 1 pixel ARGB1. - "subs %3, %3, #1 \n" // 1 processed per loop. - "vmull.u8 q10, d4, d3 \n" // db * a - "vmull.u8 q11, d5, d3 \n" // dg * a - "vmull.u8 q12, d6, d3 \n" // dr * a - "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 - "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 - "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 - "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 - "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 - "vqadd.u8 q0, q0, q2 \n" // + sbg - "vqadd.u8 d2, d2, d6 \n" // + sr - "vmov.u8 d3, #255 \n" // a = 255 - "vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n" // store 1 pixel. - "bge 1b \n" - - "99: \n" - - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12" - ); -} - -// Attenuate 8 pixels at a time. -void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - // Attenuate 8 pixels. - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q10, d0, d3 \n" // b * a - "vmull.u8 q11, d1, d3 \n" // g * a - "vmull.u8 q12, d2, d3 \n" // r * a - "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8 - "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8 - "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8 - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q10", "q11", "q12" - ); -} - -// Quantize 8 ARGB pixels (32 bytes). -// dst = (dst * scale >> 16) * interval_size + interval_offset; -void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width) { - asm volatile ( - "vdup.u16 q8, %2 \n" - "vshr.u16 q8, q8, #1 \n" // scale >>= 1 - "vdup.u16 q9, %3 \n" // interval multiply. - "vdup.u16 q10, %4 \n" // interval add - - // 8 pixel loop. - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB. - "subs %1, %1, #8 \n" // 8 processed per loop. - "vmovl.u8 q0, d0 \n" // b (0 .. 255) - "vmovl.u8 q1, d2 \n" - "vmovl.u8 q2, d4 \n" - "vqdmulh.s16 q0, q0, q8 \n" // b * scale - "vqdmulh.s16 q1, q1, q8 \n" // g - "vqdmulh.s16 q2, q2, q8 \n" // r - "vmul.u16 q0, q0, q9 \n" // b * interval_size - "vmul.u16 q1, q1, q9 \n" // g - "vmul.u16 q2, q2, q9 \n" // r - "vadd.u16 q0, q0, q10 \n" // b + interval_offset - "vadd.u16 q1, q1, q10 \n" // g - "vadd.u16 q2, q2, q10 \n" // r - "vqmovn.u16 d0, q0 \n" - "vqmovn.u16 d2, q1 \n" - "vqmovn.u16 d4, q2 \n" - "vst4.8 {d0, d2, d4, d6}, [%0]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : "r"(scale), // %2 - "r"(interval_size), // %3 - "r"(interval_offset) // %4 - : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10" - ); -} - -// Shade 8 pixels at a time by specified value. -// NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. -// Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. -void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value) { - asm volatile ( - "vdup.u32 q0, %3 \n" // duplicate scale value. - "vzip.u8 d0, d1 \n" // d0 aarrggbb. - "vshr.u16 q0, q0, #1 \n" // scale / 2. - - // 8 pixel loop. - ".p2align 2 \n" - "1: \n" - "vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmovl.u8 q10, d20 \n" // b (0 .. 255) - "vmovl.u8 q11, d22 \n" - "vmovl.u8 q12, d24 \n" - "vmovl.u8 q13, d26 \n" - "vqrdmulh.s16 q10, q10, d0[0] \n" // b * scale * 2 - "vqrdmulh.s16 q11, q11, d0[1] \n" // g - "vqrdmulh.s16 q12, q12, d0[2] \n" // r - "vqrdmulh.s16 q13, q13, d0[3] \n" // a - "vqmovn.u16 d20, q10 \n" - "vqmovn.u16 d22, q11 \n" - "vqmovn.u16 d24, q12 \n" - "vqmovn.u16 d26, q13 \n" - "vst4.8 {d20, d22, d24, d26}, [%1]! \n" // store 8 pixels of ARGB. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(value) // %3 - : "cc", "memory", "q0", "q10", "q11", "q12", "q13" - ); -} - -// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels -// Similar to ARGBToYJ but stores ARGB. -// C code is (15 * b + 75 * g + 38 * r + 64) >> 7; -void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient - "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient - "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d24 \n" // B - "vmlal.u8 q2, d1, d25 \n" // G - "vmlal.u8 q2, d2, d26 \n" // R - "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B - "vmov d1, d0 \n" // G - "vmov d2, d0 \n" // R - "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "cc", "memory", "q0", "q1", "q2", "q12", "q13" - ); -} - -// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. -// b = (r * 35 + g * 68 + b * 17) >> 7 -// g = (r * 45 + g * 88 + b * 22) >> 7 -// r = (r * 50 + g * 98 + b * 24) >> 7 -void ARGBSepiaRow_NEON(uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d20, #17 \n" // BB coefficient - "vmov.u8 d21, #68 \n" // BG coefficient - "vmov.u8 d22, #35 \n" // BR coefficient - "vmov.u8 d24, #22 \n" // GB coefficient - "vmov.u8 d25, #88 \n" // GG coefficient - "vmov.u8 d26, #45 \n" // GR coefficient - "vmov.u8 d28, #24 \n" // BB coefficient - "vmov.u8 d29, #98 \n" // BG coefficient - "vmov.u8 d30, #50 \n" // BR coefficient - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels. - "subs %1, %1, #8 \n" // 8 processed per loop. - "vmull.u8 q2, d0, d20 \n" // B to Sepia B - "vmlal.u8 q2, d1, d21 \n" // G - "vmlal.u8 q2, d2, d22 \n" // R - "vmull.u8 q3, d0, d24 \n" // B to Sepia G - "vmlal.u8 q3, d1, d25 \n" // G - "vmlal.u8 q3, d2, d26 \n" // R - "vmull.u8 q8, d0, d28 \n" // B to Sepia R - "vmlal.u8 q8, d1, d29 \n" // G - "vmlal.u8 q8, d2, d30 \n" // R - "vqshrn.u16 d0, q2, #7 \n" // 16 bit to 8 bit B - "vqshrn.u16 d1, q3, #7 \n" // 16 bit to 8 bit G - "vqshrn.u16 d2, q8, #7 \n" // 16 bit to 8 bit R - "vst4.8 {d0, d1, d2, d3}, [%0]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : - : "cc", "memory", "q0", "q1", "q2", "q3", - "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -// Tranform 8 ARGB pixels (32 bytes) with color matrix. -// TODO(fbarchard): Was same as Sepia except matrix is provided. This function -// needs to saturate. Consider doing a non-saturating version. -void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) { - asm volatile ( - "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors. - "vmovl.s8 q0, d4 \n" // B,G coefficients s16. - "vmovl.s8 q1, d5 \n" // R,A coefficients s16. - - ".p2align 2 \n" - "1: \n" - "vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels. - "subs %2, %2, #8 \n" // 8 processed per loop. - "vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit - "vmovl.u8 q9, d18 \n" // g - "vmovl.u8 q10, d20 \n" // r - "vmovl.u8 q15, d22 \n" // a - "vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B - "vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G - "vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R - "vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A - "vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B - "vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G - "vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R - "vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A - "vqadd.s16 q12, q12, q4 \n" // Accumulate B - "vqadd.s16 q13, q13, q5 \n" // Accumulate G - "vqadd.s16 q14, q14, q6 \n" // Accumulate R - "vqadd.s16 q15, q15, q7 \n" // Accumulate A - "vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B - "vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G - "vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R - "vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A - "vqadd.s16 q12, q12, q4 \n" // Accumulate B - "vqadd.s16 q13, q13, q5 \n" // Accumulate G - "vqadd.s16 q14, q14, q6 \n" // Accumulate R - "vqadd.s16 q15, q15, q7 \n" // Accumulate A - "vmul.s16 q4, q15, d0[3] \n" // B += A * Matrix B - "vmul.s16 q5, q15, d1[3] \n" // G += A * Matrix G - "vmul.s16 q6, q15, d2[3] \n" // R += A * Matrix R - "vmul.s16 q7, q15, d3[3] \n" // A += A * Matrix A - "vqadd.s16 q12, q12, q4 \n" // Accumulate B - "vqadd.s16 q13, q13, q5 \n" // Accumulate G - "vqadd.s16 q14, q14, q6 \n" // Accumulate R - "vqadd.s16 q15, q15, q7 \n" // Accumulate A - "vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B - "vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G - "vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R - "vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A - "vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(matrix_argb) // %3 - : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", - "q10", "q11", "q12", "q13", "q14", "q15" - ); -} - -// TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable. -#ifdef HAS_ARGBMULTIPLYROW_NEON -// Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBMultiplyRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 8 pixel loop. - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vmull.u8 q0, d0, d1 \n" // multiply B - "vmull.u8 q1, d2, d3 \n" // multiply G - "vmull.u8 q2, d4, d5 \n" // multiply R - "vmull.u8 q3, d6, d7 \n" // multiply A - "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B - "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G - "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R - "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3" - ); -} -#endif // HAS_ARGBMULTIPLYROW_NEON - -// Add 2 rows of ARGB pixels together, 8 pixels at a time. -void ARGBAddRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 8 pixel loop. - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vqadd.u8 q0, q0, q2 \n" // add B, G - "vqadd.u8 q1, q1, q3 \n" // add R, A - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3" - ); -} - -// Subtract 2 rows of ARGB pixels, 8 pixels at a time. -void ARGBSubtractRow_NEON(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 8 pixel loop. - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. - "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vqsub.u8 q0, q0, q2 \n" // subtract B, G - "vqsub.u8 q1, q1, q3 \n" // subtract R, A - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1", "q2", "q3" - ); -} - -// Adds Sobel X and Sobel Y and stores Sobel into ARGB. -// A = 255 -// R = Sobel -// G = Sobel -// B = Sobel -void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d3, #255 \n" // alpha - // 8 pixel loop. - ".p2align 2 \n" - "1: \n" - "vld1.8 {d0}, [%0]! \n" // load 8 sobelx. - "vld1.8 {d1}, [%1]! \n" // load 8 sobely. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vqadd.u8 d0, d0, d1 \n" // add - "vmov.u8 d1, d0 \n" - "vmov.u8 d2, d0 \n" - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1" - ); -} - -// Adds Sobel X and Sobel Y and stores Sobel into plane. -void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width) { - asm volatile ( - // 16 pixel loop. - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load 16 sobelx. - "vld1.8 {q1}, [%1]! \n" // load 16 sobely. - "subs %3, %3, #16 \n" // 16 processed per loop. - "vqadd.u8 q0, q0, q1 \n" // add - "vst1.8 {q0}, [%2]! \n" // store 16 pixels. - "bgt 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_y), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1" - ); -} - -// Mixes Sobel X, Sobel Y and Sobel into ARGB. -// A = 255 -// R = Sobel X -// G = Sobel -// B = Sobel Y -void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - asm volatile ( - "vmov.u8 d3, #255 \n" // alpha - // 8 pixel loop. - ".p2align 2 \n" - "1: \n" - "vld1.8 {d2}, [%0]! \n" // load 8 sobelx. - "vld1.8 {d0}, [%1]! \n" // load 8 sobely. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vqadd.u8 d1, d0, d2 \n" // add - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. - "bgt 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "cc", "memory", "q0", "q1" - ); -} - -// SobelX as a matrix is -// -1 0 1 -// -2 0 2 -// -1 0 1 -void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld1.8 {d0}, [%0],%5 \n" // top - "vld1.8 {d1}, [%0],%6 \n" - "vsubl.u8 q0, d0, d1 \n" - "vld1.8 {d2}, [%1],%5 \n" // center * 2 - "vld1.8 {d3}, [%1],%6 \n" - "vsubl.u8 q1, d2, d3 \n" - "vadd.s16 q0, q0, q1 \n" - "vadd.s16 q0, q0, q1 \n" - "vld1.8 {d2}, [%2],%5 \n" // bottom - "vld1.8 {d3}, [%2],%6 \n" - "subs %4, %4, #8 \n" // 8 pixels - "vsubl.u8 q1, d2, d3 \n" - "vadd.s16 q0, q0, q1 \n" - "vabs.s16 q0, q0 \n" - "vqmovn.u16 d0, q0 \n" - "vst1.8 {d0}, [%3]! \n" // store 8 sobelx - "bgt 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(src_y2), // %2 - "+r"(dst_sobelx), // %3 - "+r"(width) // %4 - : "r"(2), // %5 - "r"(6) // %6 - : "cc", "memory", "q0", "q1" // Clobber List - ); -} - -// SobelY as a matrix is -// -1 -2 -1 -// 0 0 0 -// 1 2 1 -void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - "vld1.8 {d0}, [%0],%4 \n" // left - "vld1.8 {d1}, [%1],%4 \n" - "vsubl.u8 q0, d0, d1 \n" - "vld1.8 {d2}, [%0],%4 \n" // center * 2 - "vld1.8 {d3}, [%1],%4 \n" - "vsubl.u8 q1, d2, d3 \n" - "vadd.s16 q0, q0, q1 \n" - "vadd.s16 q0, q0, q1 \n" - "vld1.8 {d2}, [%0],%5 \n" // right - "vld1.8 {d3}, [%1],%5 \n" - "subs %3, %3, #8 \n" // 8 pixels - "vsubl.u8 q1, d2, d3 \n" - "vadd.s16 q0, q0, q1 \n" - "vabs.s16 q0, q0 \n" - "vqmovn.u16 d0, q0 \n" - "vst1.8 {d0}, [%2]! \n" // store 8 sobely - "bgt 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(dst_sobely), // %2 - "+r"(width) // %3 - : "r"(1), // %4 - "r"(6) // %5 - : "cc", "memory", "q0", "q1" // Clobber List - ); -} -#endif // __ARM_NEON__ - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_posix.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_posix.cc deleted file mode 100755 index 106fda5689..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/row_posix.cc +++ /dev/null @@ -1,6443 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for GCC x86 and x64. -#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) - -#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) - -// Constants for ARGB -static vec8 kARGBToY = { - 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 -}; - -// JPeg full range. -static vec8 kARGBToYJ = { - 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0 -}; -#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) - -#if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3) - -static vec8 kARGBToU = { - 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0 -}; - -static vec8 kARGBToUJ = { - 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0 -}; - -static vec8 kARGBToV = { - -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -}; - -static vec8 kARGBToVJ = { - -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0 -}; - -// Constants for BGRA -static vec8 kBGRAToY = { - 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13 -}; - -static vec8 kBGRAToU = { - 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112 -}; - -static vec8 kBGRAToV = { - 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18 -}; - -// Constants for ABGR -static vec8 kABGRToY = { - 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0 -}; - -static vec8 kABGRToU = { - -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0 -}; - -static vec8 kABGRToV = { - 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0 -}; - -// Constants for RGBA. -static vec8 kRGBAToY = { - 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33 -}; - -static vec8 kRGBAToU = { - 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38 -}; - -static vec8 kRGBAToV = { - 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112 -}; - -static uvec8 kAddY16 = { - 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u -}; - -static vec16 kAddYJ64 = { - 64, 64, 64, 64, 64, 64, 64, 64 -}; - -static uvec8 kAddUV128 = { - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u -}; - -static uvec16 kAddUVJ128 = { - 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u -}; -#endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3) - -#ifdef HAS_RGB24TOARGBROW_SSSE3 - -// Shuffle table for converting RGB24 to ARGB. -static uvec8 kShuffleMaskRGB24ToARGB = { - 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u -}; - -// Shuffle table for converting RAW to ARGB. -static uvec8 kShuffleMaskRAWToARGB = { - 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u -}; - -// Shuffle table for converting ARGB to RGB24. -static uvec8 kShuffleMaskARGBToRGB24 = { - 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u -}; - -// Shuffle table for converting ARGB to RAW. -static uvec8 kShuffleMaskARGBToRAW = { - 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u -}; - -// Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 -static uvec8 kShuffleMaskARGBToRGB24_0 = { - 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u -}; - -// Shuffle table for converting ARGB to RAW. -static uvec8 kShuffleMaskARGBToRAW_0 = { - 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u -}; -#endif // HAS_RGB24TOARGBROW_SSSE3 - -#if defined(TESTING) && defined(__x86_64__) -void TestRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { - asm volatile ( - ".p2align 5 \n" - "mov %%eax,%%eax \n" - "mov %%ebx,%%ebx \n" - "mov %%ecx,%%ecx \n" - "mov %%edx,%%edx \n" - "mov %%esi,%%esi \n" - "mov %%edi,%%edi \n" - "mov %%ebp,%%ebp \n" - "mov %%esp,%%esp \n" - ".p2align 5 \n" - "mov %%r8d,%%r8d \n" - "mov %%r9d,%%r9d \n" - "mov %%r10d,%%r10d \n" - "mov %%r11d,%%r11d \n" - "mov %%r12d,%%r12d \n" - "mov %%r13d,%%r13d \n" - "mov %%r14d,%%r14d \n" - "mov %%r15d,%%r15d \n" - ".p2align 5 \n" - "lea (%%rax),%%eax \n" - "lea (%%rbx),%%ebx \n" - "lea (%%rcx),%%ecx \n" - "lea (%%rdx),%%edx \n" - "lea (%%rsi),%%esi \n" - "lea (%%rdi),%%edi \n" - "lea (%%rbp),%%ebp \n" - "lea (%%rsp),%%esp \n" - ".p2align 5 \n" - "lea (%%r8),%%r8d \n" - "lea (%%r9),%%r9d \n" - "lea (%%r10),%%r10d \n" - "lea (%%r11),%%r11d \n" - "lea (%%r12),%%r12d \n" - "lea (%%r13),%%r13d \n" - "lea (%%r14),%%r14d \n" - "lea (%%r15),%%r15d \n" - - ".p2align 5 \n" - "lea 0x10(%%rax),%%eax \n" - "lea 0x10(%%rbx),%%ebx \n" - "lea 0x10(%%rcx),%%ecx \n" - "lea 0x10(%%rdx),%%edx \n" - "lea 0x10(%%rsi),%%esi \n" - "lea 0x10(%%rdi),%%edi \n" - "lea 0x10(%%rbp),%%ebp \n" - "lea 0x10(%%rsp),%%esp \n" - ".p2align 5 \n" - "lea 0x10(%%r8),%%r8d \n" - "lea 0x10(%%r9),%%r9d \n" - "lea 0x10(%%r10),%%r10d \n" - "lea 0x10(%%r11),%%r11d \n" - "lea 0x10(%%r12),%%r12d \n" - "lea 0x10(%%r13),%%r13d \n" - "lea 0x10(%%r14),%%r14d \n" - "lea 0x10(%%r15),%%r15d \n" - - ".p2align 5 \n" - "add 0x10,%%eax \n" - "add 0x10,%%ebx \n" - "add 0x10,%%ecx \n" - "add 0x10,%%edx \n" - "add 0x10,%%esi \n" - "add 0x10,%%edi \n" - "add 0x10,%%ebp \n" - "add 0x10,%%esp \n" - ".p2align 5 \n" - "add 0x10,%%r8d \n" - "add 0x10,%%r9d \n" - "add 0x10,%%r10d \n" - "add 0x10,%%r11d \n" - "add 0x10,%%r12d \n" - "add 0x10,%%r13d \n" - "add 0x10,%%r14d \n" - "add 0x10,%%r15d \n" - - ".p2align 2 \n" - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} -#endif // TESTING - -#ifdef HAS_I400TOARGBROW_SSE2 -void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0x18,%%xmm5 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm0,%%xmm0 \n" - "punpckhwd %%xmm1,%%xmm1 \n" - "por %%xmm5,%%xmm0 \n" - "por %%xmm5,%%xmm1 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} - -void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, - int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0x18,%%xmm5 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm0,%%xmm0 \n" - "punpckhwd %%xmm1,%%xmm1 \n" - "por %%xmm5,%%xmm0 \n" - "por %%xmm5,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} -#endif // HAS_I400TOARGBROW_SSE2 - -#ifdef HAS_RGB24TOARGBROW_SSSE3 -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 - "pslld $0x18,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n" - "lea " MEMLEA(0x30,0) ",%0 \n" - "movdqa %%xmm3,%%xmm2 \n" - "palignr $0x8,%%xmm1,%%xmm2 \n" - "pshufb %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm2 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqa %%xmm2," MEMACCESS2(0x20,1) " \n" - "por %%xmm5,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "palignr $0x4,%%xmm3,%%xmm3 \n" - "pshufb %%xmm4,%%xmm3 \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" - "por %%xmm5,%%xmm3 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm3," MEMACCESS2(0x30,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_rgb24), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : "m"(kShuffleMaskRGB24ToARGB) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" // generate mask 0xff000000 - "pslld $0x18,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm3 \n" - "lea " MEMLEA(0x30,0) ",%0 \n" - "movdqa %%xmm3,%%xmm2 \n" - "palignr $0x8,%%xmm1,%%xmm2 \n" - "pshufb %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm2 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqa %%xmm2," MEMACCESS2(0x20,1) " \n" - "por %%xmm5,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "palignr $0x4,%%xmm3,%%xmm3 \n" - "pshufb %%xmm4,%%xmm3 \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" - "por %%xmm5,%%xmm3 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm3," MEMACCESS2(0x30,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_raw), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : "m"(kShuffleMaskRAWToARGB) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void RGB565ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( - "mov $0x1080108,%%eax \n" - "movd %%eax,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "mov $0x20802080,%%eax \n" - "movd %%eax,%%xmm6 \n" - "pshufd $0x0,%%xmm6,%%xmm6 \n" - "pcmpeqb %%xmm3,%%xmm3 \n" - "psllw $0xb,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "psllw $0xa,%%xmm4 \n" - "psrlw $0x5,%%xmm4 \n" - "pcmpeqb %%xmm7,%%xmm7 \n" - "psllw $0x8,%%xmm7 \n" - "sub %0,%1 \n" - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pand %%xmm3,%%xmm1 \n" - "psllw $0xb,%%xmm2 \n" - "pmulhuw %%xmm5,%%xmm1 \n" - "pmulhuw %%xmm5,%%xmm2 \n" - "psllw $0x8,%%xmm1 \n" - "por %%xmm2,%%xmm1 \n" - "pand %%xmm4,%%xmm0 \n" - "pmulhuw %%xmm6,%%xmm0 \n" - "por %%xmm7,%%xmm0 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm1,0x00,1,0,2) // movdqa %%xmm1,(%1,%0,2) - MEMOPMEM(movdqa,xmm2,0x10,1,0,2) // movdqa %%xmm2,0x10(%1,%0,2) - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(pix) // %2 - : - : "memory", "cc", "eax" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} - -void ARGB1555ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( - "mov $0x1080108,%%eax \n" - "movd %%eax,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "mov $0x42004200,%%eax \n" - "movd %%eax,%%xmm6 \n" - "pshufd $0x0,%%xmm6,%%xmm6 \n" - "pcmpeqb %%xmm3,%%xmm3 \n" - "psllw $0xb,%%xmm3 \n" - "movdqa %%xmm3,%%xmm4 \n" - "psrlw $0x6,%%xmm4 \n" - "pcmpeqb %%xmm7,%%xmm7 \n" - "psllw $0x8,%%xmm7 \n" - "sub %0,%1 \n" - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "psllw $0x1,%%xmm1 \n" - "psllw $0xb,%%xmm2 \n" - "pand %%xmm3,%%xmm1 \n" - "pmulhuw %%xmm5,%%xmm2 \n" - "pmulhuw %%xmm5,%%xmm1 \n" - "psllw $0x8,%%xmm1 \n" - "por %%xmm2,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pand %%xmm4,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "pmulhuw %%xmm6,%%xmm0 \n" - "pand %%xmm7,%%xmm2 \n" - "por %%xmm2,%%xmm0 \n" - "movdqa %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm1,0x00,1,0,2) // movdqa %%xmm1,(%1,%0,2) - MEMOPMEM(movdqa,xmm2,0x10,1,0,2) // movdqa %%xmm2,0x10(%1,%0,2) - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(pix) // %2 - : - : "memory", "cc", "eax" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} - -void ARGB4444ToARGBRow_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( - "mov $0xf0f0f0f,%%eax \n" - "movd %%eax,%%xmm4 \n" - "pshufd $0x0,%%xmm4,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "pslld $0x4,%%xmm5 \n" - "sub %0,%1 \n" - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pand %%xmm4,%%xmm0 \n" - "pand %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm3 \n" - "psllw $0x4,%%xmm1 \n" - "psrlw $0x4,%%xmm3 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm3,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,2) // movdqa %%xmm0,(%1,%0,2) - MEMOPMEM(movdqa,xmm1,0x10,1,0,2) // movdqa %%xmm1,0x10(%1,%0,2) - "lea " MEMLEA(0x10,0) ",%0 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(pix) // %2 - : - : "memory", "cc", "eax" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void ARGBToRGB24Row_SSSE3(const uint8* src, uint8* dst, int pix) { - asm volatile ( - "movdqa %3,%%xmm6 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "pshufb %%xmm6,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "pshufb %%xmm6,%%xmm2 \n" - "pshufb %%xmm6,%%xmm3 \n" - "movdqa %%xmm1,%%xmm4 \n" - "psrldq $0x4,%%xmm1 \n" - "pslldq $0xc,%%xmm4 \n" - "movdqa %%xmm2,%%xmm5 \n" - "por %%xmm4,%%xmm0 \n" - "pslldq $0x8,%%xmm5 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "psrldq $0x8,%%xmm2 \n" - "pslldq $0x4,%%xmm3 \n" - "por %%xmm3,%%xmm2 \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x30,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(pix) // %2 - : "m"(kShuffleMaskARGBToRGB24) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" -#endif - ); -} - -void ARGBToRAWRow_SSSE3(const uint8* src, uint8* dst, int pix) { - asm volatile ( - "movdqa %3,%%xmm6 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "pshufb %%xmm6,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "pshufb %%xmm6,%%xmm2 \n" - "pshufb %%xmm6,%%xmm3 \n" - "movdqa %%xmm1,%%xmm4 \n" - "psrldq $0x4,%%xmm1 \n" - "pslldq $0xc,%%xmm4 \n" - "movdqa %%xmm2,%%xmm5 \n" - "por %%xmm4,%%xmm0 \n" - "pslldq $0x8,%%xmm5 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "por %%xmm5,%%xmm1 \n" - "psrldq $0x8,%%xmm2 \n" - "pslldq $0x4,%%xmm3 \n" - "por %%xmm3,%%xmm2 \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "movdqu %%xmm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x30,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(pix) // %2 - : "m"(kShuffleMaskARGBToRAW) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" -#endif - ); -} - -void ARGBToRGB565Row_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( - "pcmpeqb %%xmm3,%%xmm3 \n" - "psrld $0x1b,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrld $0x1a,%%xmm4 \n" - "pslld $0x5,%%xmm4 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0xb,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "pslld $0x8,%%xmm0 \n" - "psrld $0x3,%%xmm1 \n" - "psrld $0x5,%%xmm2 \n" - "psrad $0x10,%%xmm0 \n" - "pand %%xmm3,%%xmm1 \n" - "pand %%xmm4,%%xmm2 \n" - "pand %%xmm5,%%xmm0 \n" - "por %%xmm2,%%xmm1 \n" - "por %%xmm1,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psrld $0x1b,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "pslld $0x5,%%xmm5 \n" - "movdqa %%xmm4,%%xmm6 \n" - "pslld $0xa,%%xmm6 \n" - "pcmpeqb %%xmm7,%%xmm7 \n" - "pslld $0xf,%%xmm7 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "psrad $0x10,%%xmm0 \n" - "psrld $0x3,%%xmm1 \n" - "psrld $0x6,%%xmm2 \n" - "psrld $0x9,%%xmm3 \n" - "pand %%xmm7,%%xmm0 \n" - "pand %%xmm4,%%xmm1 \n" - "pand %%xmm5,%%xmm2 \n" - "pand %%xmm6,%%xmm3 \n" - "por %%xmm1,%%xmm0 \n" - "por %%xmm3,%%xmm2 \n" - "por %%xmm2,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMACCESS2(0x8,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} - -void ARGBToARGB4444Row_SSE2(const uint8* src, uint8* dst, int pix) { - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "psllw $0xc,%%xmm4 \n" - "movdqa %%xmm4,%%xmm3 \n" - "psrlw $0x8,%%xmm3 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm3,%%xmm0 \n" - "pand %%xmm4,%%xmm1 \n" - "psrlq $0x4,%%xmm0 \n" - "psrlq $0x8,%%xmm1 \n" - "por %%xmm1,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x4,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" -#endif - ); -} -#endif // HAS_RGB24TOARGBROW_SSSE3 - -#ifdef HAS_ARGBTOYROW_SSSE3 -void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : "m"(kARGBToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : "m"(kARGBToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_ARGBTOYROW_SSSE3 - -#ifdef HAS_ARGBTOYJROW_SSSE3 -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "paddw %%xmm5,%%xmm0 \n" - "paddw %%xmm5,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : "m"(kARGBToYJ), // %3 - "m"(kAddYJ64) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "paddw %%xmm5,%%xmm0 \n" - "paddw %%xmm5,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : "m"(kARGBToYJ), // %3 - "m"(kAddYJ64) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_ARGBTOYJROW_SSSE3 - -#ifdef HAS_ARGBTOUVROW_SSSE3 -// TODO(fbarchard): pass xmm constants to single block of assembly. -// fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes -// 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers, -// or 4 if stack frame is disabled. Doing 2 assembly blocks is a work around -// and considered unsafe. -void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kARGBToU), // %0 - "m"(kARGBToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0 - MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1 - MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2 - MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6 - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -// TODO(fbarchard): Share code with ARGBToUVRow_SSSE3. -void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kARGBToUJ), // %0 - "m"(kARGBToVJ), // %1 - "m"(kAddUVJ128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0 - MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1 - MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2 - MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6 - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "paddw %%xmm5,%%xmm0 \n" - "paddw %%xmm5,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kARGBToU), // %0 - "m"(kARGBToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kARGBToUJ), // %0 - "m"(kARGBToVJ), // %1 - "m"(kAddUVJ128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "paddw %%xmm5,%%xmm0 \n" - "paddw %%xmm5,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_argb)) - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -void ARGBToUV444Row_SSSE3(const uint8* src_argb, uint8* dst_u, uint8* dst_v, - int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kARGBToU), // %0 - "m"(kARGBToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm6 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm6,%%xmm2 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "packsswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "pmaddubsw %%xmm3,%%xmm0 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm6,%%xmm2 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "packsswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,2,1) // movdqa %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6" -#endif - ); -} - -void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_u, - uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kARGBToU), // %0 - "m"(kARGBToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm6 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm6,%%xmm2 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "packsswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "pmaddubsw %%xmm3,%%xmm0 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm6,%%xmm2 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm2 \n" - "packsswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - BUNDLEALIGN - MEMOPMEM(movdqu,xmm0,0x00,1,2,1) // movdqu %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6" -#endif - ); -} - -void ARGBToUV422Row_SSSE3(const uint8* src_argb0, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kARGBToU), // %0 - "m"(kARGBToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kARGBToU), // %0 - "m"(kARGBToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_bgra), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : "m"(kBGRAToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_bgra), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : "m"(kBGRAToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void BGRAToUVRow_SSSE3(const uint8* src_bgra0, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kBGRAToU), // %0 - "m"(kBGRAToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0 - MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1 - MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2 - MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6 - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_bgra0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_bgra)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra0, int src_stride_bgra, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kBGRAToU), // %0 - "m"(kBGRAToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_bgra0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_bgra)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_abgr), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : "m"(kABGRToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_abgr), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : "m"(kABGRToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_rgba), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : "m"(kRGBAToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix) { - asm volatile ( - "movdqa %4,%%xmm5 \n" - "movdqa %3,%%xmm4 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm3 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm4,%%xmm3 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "phaddw %%xmm1,%%xmm0 \n" - "phaddw %%xmm3,%%xmm2 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_rgba), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : "m"(kRGBAToY), // %3 - "m"(kAddY16) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void ABGRToUVRow_SSSE3(const uint8* src_abgr0, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kABGRToU), // %0 - "m"(kABGRToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0 - MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1 - MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2 - MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6 - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_abgr0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_abgr)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr0, int src_stride_abgr, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kABGRToU), // %0 - "m"(kABGRToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_abgr0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_abgr)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kRGBAToU), // %0 - "m"(kRGBAToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(pavgb,0x00,0,4,1,xmm0) // pavgb (%0,%4,1),%%xmm0 - MEMOPREG(pavgb,0x10,0,4,1,xmm1) // pavgb 0x10(%0,%4,1),%%xmm1 - MEMOPREG(pavgb,0x20,0,4,1,xmm2) // pavgb 0x20(%0,%4,1),%%xmm2 - MEMOPREG(pavgb,0x30,0,4,1,xmm6) // pavgb 0x30(%0,%4,1),%%xmm6 - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_rgba0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_rgba)) - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} - -void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba0, int src_stride_rgba, - uint8* dst_u, uint8* dst_v, int width) { - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kRGBAToU), // %0 - "m"(kRGBAToV), // %1 - "m"(kAddUV128) // %2 - ); - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqu " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqu " MEMACCESS2(0x30,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(movdqu,0x00,0,4,1,xmm7) // movdqu (%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm0 \n" - MEMOPREG(movdqu,0x10,0,4,1,xmm7) // movdqu 0x10(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm1 \n" - MEMOPREG(movdqu,0x20,0,4,1,xmm7) // movdqu 0x20(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm2 \n" - MEMOPREG(movdqu,0x30,0,4,1,xmm7) // movdqu 0x30(%0,%4,1),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "movdqa %%xmm0,%%xmm7 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm7 \n" - "pavgb %%xmm7,%%xmm0 \n" - "movdqa %%xmm2,%%xmm7 \n" - "shufps $0x88,%%xmm6,%%xmm2 \n" - "shufps $0xdd,%%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm2 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "phaddw %%xmm2,%%xmm0 \n" - "phaddw %%xmm6,%%xmm1 \n" - "psraw $0x8,%%xmm0 \n" - "psraw $0x8,%%xmm1 \n" - "packsswb %%xmm1,%%xmm0 \n" - "paddb %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movlps %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhps,xmm0,0x00,1,2,1) // movhps %%xmm0,(%1,%2,1) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_rgba0), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+rm"(width) // %3 - : "r"((intptr_t)(src_stride_rgba)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" -#endif - ); -} -#endif // HAS_ARGBTOUVROW_SSSE3 - -#ifdef HAS_I422TOARGBROW_SSSE3 -#define UB 127 /* min(63,(int8)(2.018 * 64)) */ -#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */ -#define UR 0 - -#define VB 0 -#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */ -#define VR 102 /* (int8)(1.596 * 64 + 0.5) */ - -// Bias -#define BB UB * 128 + VB * 128 -#define BG UG * 128 + VG * 128 -#define BR UR * 128 + VR * 128 - -#define YG 74 /* (int8)(1.164 * 64 + 0.5) */ - -struct { - vec8 kUVToB; // 0 - vec8 kUVToG; // 16 - vec8 kUVToR; // 32 - vec16 kUVBiasB; // 48 - vec16 kUVBiasG; // 64 - vec16 kUVBiasR; // 80 - vec16 kYSub16; // 96 - vec16 kYToRgb; // 112 - vec8 kVUToB; // 128 - vec8 kVUToG; // 144 - vec8 kVUToR; // 160 -} static SIMD_ALIGNED(kYuvConstants) = { - { UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB }, - { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG }, - { UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR }, - { BB, BB, BB, BB, BB, BB, BB, BB }, - { BG, BG, BG, BG, BG, BG, BG, BG }, - { BR, BR, BR, BR, BR, BR, BR, BR }, - { 16, 16, 16, 16, 16, 16, 16, 16 }, - { YG, YG, YG, YG, YG, YG, YG, YG }, - { VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB }, - { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG }, - { VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR } -}; - - -// Read 8 UV from 411 -#define READYUV444 \ - "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - BUNDLEALIGN \ - MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" - -// Read 4 UV from 422, upsample to 8 UV -#define READYUV422 \ - "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - BUNDLEALIGN \ - MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" - -// Read 2 UV from 411, upsample to 8 UV -#define READYUV411 \ - "movd " MEMACCESS([u_buf]) ",%%xmm0 \n" \ - BUNDLEALIGN \ - MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ - "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \ - "punpcklbw %%xmm1,%%xmm0 \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" \ - "punpckldq %%xmm0,%%xmm0 \n" - -// Read 4 UV from NV12, upsample to 8 UV -#define READNV12 \ - "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ - "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ - "punpcklwd %%xmm0,%%xmm0 \n" - -// Convert 8 pixels: 8 UV and 8 Y -#define YUVTORGB \ - "movdqa %%xmm0,%%xmm1 \n" \ - "movdqa %%xmm0,%%xmm2 \n" \ - "pmaddubsw " MEMACCESS([kYuvConstants]) ",%%xmm0 \n" \ - "pmaddubsw " MEMACCESS2(16, [kYuvConstants]) ",%%xmm1 \n" \ - "pmaddubsw " MEMACCESS2(32, [kYuvConstants]) ",%%xmm2 \n" \ - "psubw " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0 \n" \ - "psubw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1 \n" \ - "psubw " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ - "punpcklbw %%xmm4,%%xmm3 \n" \ - "psubsw " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3 \n" \ - "pmullw " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3 \n" \ - "paddsw %%xmm3,%%xmm0 \n" \ - "paddsw %%xmm3,%%xmm1 \n" \ - "paddsw %%xmm3,%%xmm2 \n" \ - "psraw $0x6,%%xmm0 \n" \ - "psraw $0x6,%%xmm1 \n" \ - "psraw $0x6,%%xmm2 \n" \ - "packuswb %%xmm0,%%xmm0 \n" \ - "packuswb %%xmm1,%%xmm1 \n" \ - "packuswb %%xmm2,%%xmm2 \n" - -// Convert 8 pixels: 8 VU and 8 Y -#define YVUTORGB \ - "movdqa %%xmm0,%%xmm1 \n" \ - "movdqa %%xmm0,%%xmm2 \n" \ - "pmaddubsw " MEMACCESS2(128, [kYuvConstants]) ",%%xmm0 \n" \ - "pmaddubsw " MEMACCESS2(144, [kYuvConstants]) ",%%xmm1 \n" \ - "pmaddubsw " MEMACCESS2(160, [kYuvConstants]) ",%%xmm2 \n" \ - "psubw " MEMACCESS2(48, [kYuvConstants]) ",%%xmm0 \n" \ - "psubw " MEMACCESS2(64, [kYuvConstants]) ",%%xmm1 \n" \ - "psubw " MEMACCESS2(80, [kYuvConstants]) ",%%xmm2 \n" \ - "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \ - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ - "punpcklbw %%xmm4,%%xmm3 \n" \ - "psubsw " MEMACCESS2(96, [kYuvConstants]) ",%%xmm3 \n" \ - "pmullw " MEMACCESS2(112, [kYuvConstants]) ",%%xmm3 \n" \ - "paddsw %%xmm3,%%xmm0 \n" \ - "paddsw %%xmm3,%%xmm1 \n" \ - "paddsw %%xmm3,%%xmm2 \n" \ - "psraw $0x6,%%xmm0 \n" \ - "psraw $0x6,%%xmm1 \n" \ - "psraw $0x6,%%xmm2 \n" \ - "packuswb %%xmm0,%%xmm0 \n" \ - "packuswb %%xmm1,%%xmm1 \n" \ - "packuswb %%xmm2,%%xmm2 \n" - -void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV444 - YUVTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "movdqa %%xmm0," MEMACCESS([dst_argb]) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) " \n" - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgb24, - int width) { -// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs. -#if defined(__i386__) - asm volatile ( - "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" - "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" - :: [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), - [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)); -#endif - - asm volatile ( -#if !defined(__i386__) - "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" - "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" -#endif - "sub %[u_buf],%[v_buf] \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n" - "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n" - "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) -#if !defined(__i386__) - , [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), - [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) -#endif - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" -#endif - ); -} - -void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_raw, - int width) { -// fpic 32 bit gcc 4.2 on OSX runs out of GPR regs. -#if defined(__i386__) - asm volatile ( - "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" - "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n" - :: [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), - [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)); -#endif - - asm volatile ( -#if !defined(__i386__) - "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" - "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n" -#endif - "sub %[u_buf],%[v_buf] \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm6,%%xmm1 \n" - "palignr $0xc,%%xmm0,%%xmm1 \n" - "movq %%xmm0," MEMACCESS([dst_raw]) " \n" - "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n" - "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_raw]"+r"(dst_raw), // %[dst_raw] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) -#if !defined(__i386__) - , [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), - [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) -#endif - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" -#endif - ); -} - -void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n" - "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n" - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV411 - YUVTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n" - "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n" - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READNV12 - YUVTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n" - "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n" - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [uv_buf]"+r"(uv_buf), // %[uv_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" - // Does not use r14. -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READNV12 - YVUTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "movdqa %%xmm0," MEMACCESS([dst_argb]) "\n" - "movdqa %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n" - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [uv_buf]"+r"(uv_buf), // %[uv_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" - // Does not use r14. -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV444 - YUVTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n" - "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n" - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n" - "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n" - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV411 - YUVTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n" - "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n" - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READNV12 - YUVTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n" - "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n" - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [uv_buf]"+r"(uv_buf), // %[uv_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" - // Does not use r14. -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READNV12 - YVUTORGB - "punpcklbw %%xmm1,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm0 \n" - "punpckhwd %%xmm2,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS([dst_argb]) "\n" - "movdqu %%xmm1," MEMACCESS2(0x10,[dst_argb]) "\n" - "lea " MEMLEA(0x20,[dst_argb]) ",%[dst_argb] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [uv_buf]"+r"(uv_buf), // %[uv_buf] - [dst_argb]"+r"(dst_argb), // %[dst_argb] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" - // Does not use r14. -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_bgra, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB - "pcmpeqb %%xmm5,%%xmm5 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm5 \n" - "movdqa %%xmm5,%%xmm0 \n" - "punpcklwd %%xmm1,%%xmm5 \n" - "punpckhwd %%xmm1,%%xmm0 \n" - "movdqa %%xmm5," MEMACCESS([dst_bgra]) "\n" - "movdqa %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n" - "lea " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB - "punpcklbw %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "punpcklwd %%xmm0,%%xmm2 \n" - "punpckhwd %%xmm0,%%xmm1 \n" - "movdqa %%xmm2," MEMACCESS([dst_abgr]) "\n" - "movdqa %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n" - "lea " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgba, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB - "pcmpeqb %%xmm5,%%xmm5 \n" - "punpcklbw %%xmm2,%%xmm1 \n" - "punpcklbw %%xmm0,%%xmm5 \n" - "movdqa %%xmm5,%%xmm0 \n" - "punpcklwd %%xmm1,%%xmm5 \n" - "punpckhwd %%xmm1,%%xmm0 \n" - "movdqa %%xmm5," MEMACCESS([dst_rgba]) "\n" - "movdqa %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n" - "lea " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_bgra, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB - "pcmpeqb %%xmm5,%%xmm5 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm5 \n" - "movdqa %%xmm5,%%xmm0 \n" - "punpcklwd %%xmm1,%%xmm5 \n" - "punpckhwd %%xmm1,%%xmm0 \n" - "movdqu %%xmm5," MEMACCESS([dst_bgra]) "\n" - "movdqu %%xmm0," MEMACCESS2(0x10,[dst_bgra]) "\n" - "lea " MEMLEA(0x20,[dst_bgra]) ",%[dst_bgra] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB - "punpcklbw %%xmm1,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "punpcklwd %%xmm0,%%xmm2 \n" - "punpckhwd %%xmm0,%%xmm1 \n" - "movdqu %%xmm2," MEMACCESS([dst_abgr]) "\n" - "movdqu %%xmm1," MEMACCESS2(0x10,[dst_abgr]) "\n" - "lea " MEMLEA(0x20,[dst_abgr]) ",%[dst_abgr] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void OMITFP I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgba, - int width) { - asm volatile ( - "sub %[u_buf],%[v_buf] \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - LABELALIGN - "1: \n" - READYUV422 - YUVTORGB - "pcmpeqb %%xmm5,%%xmm5 \n" - "punpcklbw %%xmm2,%%xmm1 \n" - "punpcklbw %%xmm0,%%xmm5 \n" - "movdqa %%xmm5,%%xmm0 \n" - "punpcklwd %%xmm1,%%xmm5 \n" - "punpckhwd %%xmm1,%%xmm0 \n" - "movdqu %%xmm5," MEMACCESS([dst_rgba]) "\n" - "movdqu %%xmm0," MEMACCESS2(0x10,[dst_rgba]) "\n" - "lea " MEMLEA(0x20,[dst_rgba]) ",%[dst_rgba] \n" - "sub $0x8,%[width] \n" - "jg 1b \n" - : [y_buf]"+r"(y_buf), // %[y_buf] - [u_buf]"+r"(u_buf), // %[u_buf] - [v_buf]"+r"(v_buf), // %[v_buf] - [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] - [width]"+rm"(width) // %[width] - : [kYuvConstants]"r"(&kYuvConstants.kUVToB) // %[kYuvConstants] - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -#endif // HAS_I422TOARGBROW_SSSE3 - -#ifdef HAS_YTOARGBROW_SSE2 -void YToARGBRow_SSE2(const uint8* y_buf, - uint8* dst_argb, - int width) { - asm volatile ( - "pxor %%xmm5,%%xmm5 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "pslld $0x18,%%xmm4 \n" - "mov $0x00100010,%%eax \n" - "movd %%eax,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" - "mov $0x004a004a,%%eax \n" - "movd %%eax,%%xmm2 \n" - "pshufd $0x0,%%xmm2,%%xmm2 \n" - LABELALIGN - "1: \n" - // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "psubusw %%xmm3,%%xmm0 \n" - "pmullw %%xmm2,%%xmm0 \n" - "psrlw $6, %%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - - // Step 2: Weave into ARGB - "punpcklbw %%xmm0,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm0,%%xmm0 \n" - "punpckhwd %%xmm1,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "por %%xmm4,%%xmm1 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(y_buf), // %0 - "+r"(dst_argb), // %1 - "+rm"(width) // %2 - : - : "memory", "cc", "eax" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" -#endif - ); -} -#endif // HAS_YTOARGBROW_SSE2 - -#ifdef HAS_MIRRORROW_SSSE3 -// Shuffle table for reversing the bytes. -static uvec8 kShuffleMirror = { - 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u -}; - -void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { - intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "movdqa %3,%%xmm5 \n" - "lea " MEMLEA(-0x10,0) ",%0 \n" - LABELALIGN - "1: \n" - MEMOPREG(movdqa,0x00,0,2,1,xmm0) // movdqa (%0,%2),%%xmm0 - "pshufb %%xmm5,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : "m"(kShuffleMirror) // %3 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm5" -#endif - ); -} -#endif // HAS_MIRRORROW_SSSE3 - -#ifdef HAS_MIRRORROW_SSE2 -void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { - intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "lea " MEMLEA(-0x10,0) ",%0 \n" - LABELALIGN - "1: \n" - MEMOPREG(movdqu,0x00,0,2,1,xmm0) // movdqu (%0,%2),%%xmm0 - "movdqa %%xmm0,%%xmm1 \n" - "psllw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm1,%%xmm0 \n" - "pshuflw $0x1b,%%xmm0,%%xmm0 \n" - "pshufhw $0x1b,%%xmm0,%%xmm0 \n" - "pshufd $0x4e,%%xmm0,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1)",%1 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} -#endif // HAS_MIRRORROW_SSE2 - -#ifdef HAS_MIRRORROW_UV_SSSE3 -// Shuffle table for reversing the bytes of UV channels. -static uvec8 kShuffleMirrorUV = { - 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u -}; -void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, - int width) { - intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "movdqa %4,%%xmm1 \n" - "lea " MEMLEA4(-0x10,0,3,2) ",%0 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(-0x10,0) ",%0 \n" - "pshufb %%xmm1,%%xmm0 \n" - "sub $8,%3 \n" - "movlpd %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movhpd,xmm0,0x00,1,2,1) // movhpd %%xmm0,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(temp_width) // %3 - : "m"(kShuffleMirrorUV) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} -#endif // HAS_MIRRORROW_UV_SSSE3 - -#ifdef HAS_ARGBMIRRORROW_SSSE3 -// Shuffle table for reversing the bytes. -static uvec8 kARGBShuffleMirror = { - 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u -}; - -void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { - intptr_t temp_width = (intptr_t)(width); - asm volatile ( - "lea " MEMLEA4(-0x10,0,2,4) ",%0 \n" - "movdqa %3,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "pshufb %%xmm5,%%xmm0 \n" - "lea " MEMLEA(-0x10,0) ",%0 \n" - "sub $0x4,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(temp_width) // %2 - : "m"(kARGBShuffleMirror) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm5" -#endif - ); -} -#endif // HAS_ARGBMIRRORROW_SSSE3 - -#ifdef HAS_SPLITUVROW_SSE2 -void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "psrlw $0x8,%%xmm2 \n" - "psrlw $0x8,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movdqa,xmm2,0x00,1,2,1) // movdqa %%xmm2,(%1,%2) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} - -void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "movdqa %%xmm1,%%xmm3 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "psrlw $0x8,%%xmm2 \n" - "psrlw $0x8,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - MEMOPMEM(movdqu,xmm2,0x00,1,2,1) // movdqu %%xmm2,(%1,%2) - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} -#endif // HAS_SPLITUVROW_SSE2 - -#ifdef HAS_MERGEUVROW_SSE2 -void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { - asm volatile ( - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm2 \n" - "movdqa %%xmm0," MEMACCESS(2) " \n" - "movdqa %%xmm2," MEMACCESS2(0x10,2) " \n" - "lea " MEMLEA(0x20,2) ",%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2" -#endif - ); -} - -void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, - uint8* dst_uv, int width) { - asm volatile ( - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,0,1,1,xmm1) // movdqu (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "punpcklbw %%xmm1,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm2 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "movdqu %%xmm2," MEMACCESS2(0x10,2) " \n" - "lea " MEMLEA(0x20,2) ",%2 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_u), // %0 - "+r"(src_v), // %1 - "+r"(dst_uv), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2" -#endif - ); -} -#endif // HAS_MERGEUVROW_SSE2 - -#ifdef HAS_COPYROW_SSE2 -void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x20,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(count) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} -#endif // HAS_COPYROW_SSE2 - -#ifdef HAS_COPYROW_X86 -void CopyRow_X86(const uint8* src, uint8* dst, int width) { - size_t width_tmp = (size_t)(width); - asm volatile ( - "shr $0x2,%2 \n" - "rep movsl " MEMMOVESTRING(0,1) " \n" - : "+S"(src), // %0 - "+D"(dst), // %1 - "+c"(width_tmp) // %2 - : - : "memory", "cc" - ); -} -#endif // HAS_COPYROW_X86 - -#ifdef HAS_COPYROW_ERMS -// Unaligned Multiple of 1. -void CopyRow_ERMS(const uint8* src, uint8* dst, int width) { - size_t width_tmp = (size_t)(width); - asm volatile ( - "rep movsb " MEMMOVESTRING(0,1) " \n" - : "+S"(src), // %0 - "+D"(dst), // %1 - "+c"(width_tmp) // %2 - : - : "memory", "cc" - ); -} -#endif // HAS_COPYROW_ERMS - -#ifdef HAS_ARGBCOPYALPHAROW_SSE2 -// width in pixels -void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm0,%%xmm0 \n" - "pslld $0x18,%%xmm0 \n" - "pcmpeqb %%xmm1,%%xmm1 \n" - "psrld $0x8,%%xmm1 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm3 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa " MEMACCESS(1) ",%%xmm4 \n" - "movdqa " MEMACCESS2(0x10,1) ",%%xmm5 \n" - "pand %%xmm0,%%xmm2 \n" - "pand %%xmm0,%%xmm3 \n" - "pand %%xmm1,%%xmm4 \n" - "pand %%xmm1,%%xmm5 \n" - "por %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm3 \n" - "movdqa %%xmm2," MEMACCESS(1) " \n" - "movdqa %%xmm3," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_ARGBCOPYALPHAROW_SSE2 - -#ifdef HAS_ARGBCOPYALPHAROW_AVX2 -// width in pixels -void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" - "vpsrld $0x8,%%ymm0,%%ymm0 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm1 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm2 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n" - "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n" - "vmovdqu %%ymm1," MEMACCESS(1) " \n" - "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2" -#endif - ); -} -#endif // HAS_ARGBCOPYALPHAROW_AVX2 - -#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 -// width in pixels -void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "pcmpeqb %%xmm0,%%xmm0 \n" - "pslld $0x18,%%xmm0 \n" - "pcmpeqb %%xmm1,%%xmm1 \n" - "psrld $0x8,%%xmm1 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "punpckhwd %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm2,%%xmm2 \n" - "movdqa " MEMACCESS(1) ",%%xmm4 \n" - "movdqa " MEMACCESS2(0x10,1) ",%%xmm5 \n" - "pand %%xmm0,%%xmm2 \n" - "pand %%xmm0,%%xmm3 \n" - "pand %%xmm1,%%xmm4 \n" - "pand %%xmm1,%%xmm5 \n" - "por %%xmm4,%%xmm2 \n" - "por %%xmm5,%%xmm3 \n" - "movdqa %%xmm2," MEMACCESS(1) " \n" - "movdqa %%xmm3," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2 - -#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 -// width in pixels -void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { - asm volatile ( - "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" - "vpsrld $0x8,%%ymm0,%%ymm0 \n" - LABELALIGN - "1: \n" - "vpmovzxbd " MEMACCESS(0) ",%%ymm1 \n" - "vpmovzxbd " MEMACCESS2(0x8,0) ",%%ymm2 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "vpslld $0x18,%%ymm1,%%ymm1 \n" - "vpslld $0x18,%%ymm2,%%ymm2 \n" - "vpblendvb %%ymm0," MEMACCESS(1) ",%%ymm1,%%ymm1 \n" - "vpblendvb %%ymm0," MEMACCESS2(0x20,1) ",%%ymm2,%%ymm2 \n" - "vmovdqu %%ymm1," MEMACCESS(1) " \n" - "vmovdqu %%ymm2," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src), // %0 - "+r"(dst), // %1 - "+r"(width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2" -#endif - ); -} -#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2 - -#ifdef HAS_SETROW_X86 -void SetRow_X86(uint8* dst, uint32 v32, int width) { - size_t width_tmp = (size_t)(width); - asm volatile ( - "shr $0x2,%1 \n" - "rep stosl " MEMSTORESTRING(eax,0) " \n" - : "+D"(dst), // %0 - "+c"(width_tmp) // %1 - : "a"(v32) // %2 - : "memory", "cc"); -} - -void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, - int dst_stride, int height) { - for (int y = 0; y < height; ++y) { - size_t width_tmp = (size_t)(width); - uint32* d = (uint32*)(dst); - asm volatile ( - "rep stosl " MEMSTORESTRING(eax,0) " \n" - : "+D"(d), // %0 - "+c"(width_tmp) // %1 - : "a"(v32) // %2 - : "memory", "cc"); - dst += dst_stride; - } -} -#endif // HAS_SETROW_X86 - -#ifdef HAS_YUY2TOYROW_SSE2 -void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} - -void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - BUNDLEALIGN - MEMOPREG(movdqa,0x00,0,4,1,xmm2) // movdqa (%0,%4,1),%%xmm2 - MEMOPREG(movdqa,0x10,0,4,1,xmm3) // movdqa 0x10(%0,%4,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : "r"((intptr_t)(stride_yuy2)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} - -void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} - -void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, - uint8* dst_y, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} - -void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, - int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - BUNDLEALIGN - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : "r"((intptr_t)(stride_yuy2)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} - -void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_yuy2), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} - -void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} - -void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - BUNDLEALIGN - MEMOPREG(movdqa,0x00,0,4,1,xmm2) // movdqa (%0,%4,1),%%xmm2 - MEMOPREG(movdqa,0x10,0,4,1,xmm3) // movdqa 0x10(%0,%4,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : "r"((intptr_t)(stride_uyvy)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} - -void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} - -void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, - uint8* dst_y, int pix) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_y), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} - -void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - BUNDLEALIGN - MEMOPREG(movdqu,0x00,0,4,1,xmm2) // movdqu (%0,%4,1),%%xmm2 - MEMOPREG(movdqu,0x10,0,4,1,xmm3) // movdqu 0x10(%0,%4,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : "r"((intptr_t)(stride_uyvy)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} - -void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - BUNDLEALIGN - MEMOPMEM(movq,xmm1,0x00,1,2,1) // movq %%xmm1,(%1,%2) - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x10,%3 \n" - "jg 1b \n" - : "+r"(src_uyvy), // %0 - "+r"(dst_u), // %1 - "+r"(dst_v), // %2 - "+r"(pix) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} -#endif // HAS_YUY2TOYROW_SSE2 - -#ifdef HAS_ARGBBLENDROW_SSE2 -// Blend 8 pixels at a time. -void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm7,%%xmm7 \n" - "psrlw $0xf,%%xmm7 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrlw $0x8,%%xmm6 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "psllw $0x8,%%xmm5 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "pslld $0x18,%%xmm4 \n" - "sub $0x1,%3 \n" - "je 91f \n" - "jl 99f \n" - - // 1 pixel loop until destination pointer is aligned. - "10: \n" - "test $0xf,%2 \n" - "je 19f \n" - "movd " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movd " MEMACCESS(1) ",%%xmm2 \n" - "psrlw $0x8,%%xmm3 \n" - "pshufhw $0xf5,%%xmm3,%%xmm3 \n" - "pshuflw $0xf5,%%xmm3,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movd " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x4,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "sub $0x1,%3 \n" - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x4,2) ",%2 \n" - "jge 10b \n" - - "19: \n" - "add $1-4,%3 \n" - "jl 49f \n" - - // 4 pixel loop. - LABELALIGN - "41: \n" - "movdqu " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movdqu " MEMACCESS(1) ",%%xmm2 \n" - "psrlw $0x8,%%xmm3 \n" - "pshufhw $0xf5,%%xmm3,%%xmm3 \n" - "pshuflw $0xf5,%%xmm3,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqa %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jge 41b \n" - - "49: \n" - "add $0x3,%3 \n" - "jl 99f \n" - - // 1 pixel loop. - "91: \n" - "movd " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movd " MEMACCESS(1) ",%%xmm2 \n" - "psrlw $0x8,%%xmm3 \n" - "pshufhw $0xf5,%%xmm3,%%xmm3 \n" - "pshuflw $0xf5,%%xmm3,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movd " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x4,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "sub $0x1,%3 \n" - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x4,2) ",%2 \n" - "jge 91b \n" - "99: \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} -#endif // HAS_ARGBBLENDROW_SSE2 - -#ifdef HAS_ARGBBLENDROW_SSSE3 -// Shuffle table for isolating alpha. -static uvec8 kShuffleAlpha = { - 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, - 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 -}; - -// Blend 8 pixels at a time -// Shuffle table for reversing the bytes. - -// Same as SSE2, but replaces -// psrlw xmm3, 8 // alpha -// pshufhw xmm3, xmm3,0F5h // 8 alpha words -// pshuflw xmm3, xmm3,0F5h -// with.. -// pshufb xmm3, kShuffleAlpha // alpha - -void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm7,%%xmm7 \n" - "psrlw $0xf,%%xmm7 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrlw $0x8,%%xmm6 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "psllw $0x8,%%xmm5 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "pslld $0x18,%%xmm4 \n" - "sub $0x1,%3 \n" - "je 91f \n" - "jl 99f \n" - - // 1 pixel loop until destination pointer is aligned. - "10: \n" - "test $0xf,%2 \n" - "je 19f \n" - "movd " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movd " MEMACCESS(1) ",%%xmm2 \n" - "pshufb %4,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movd " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x4,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "sub $0x1,%3 \n" - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x4,2) ",%2 \n" - "jge 10b \n" - - "19: \n" - "add $1-4,%3 \n" - "jl 49f \n" - "test $0xf,%0 \n" - "jne 41f \n" - "test $0xf,%1 \n" - "jne 41f \n" - - // 4 pixel loop. - LABELALIGN - "40: \n" - "movdqa " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movdqa " MEMACCESS(1) ",%%xmm2 \n" - "pshufb %4,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movdqa " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqa %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jge 40b \n" - "jmp 49f \n" - - // 4 pixel unaligned loop. - LABELALIGN - "41: \n" - "movdqu " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movdqu " MEMACCESS(1) ",%%xmm2 \n" - "pshufb %4,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqa %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jge 41b \n" - - "49: \n" - "add $0x3,%3 \n" - "jl 99f \n" - - // 1 pixel loop. - "91: \n" - "movd " MEMACCESS(0) ",%%xmm3 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "movdqa %%xmm3,%%xmm0 \n" - "pxor %%xmm4,%%xmm3 \n" - "movd " MEMACCESS(1) ",%%xmm2 \n" - "pshufb %4,%%xmm3 \n" - "pand %%xmm6,%%xmm2 \n" - "paddw %%xmm7,%%xmm3 \n" - "pmullw %%xmm3,%%xmm2 \n" - "movd " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x4,1) ",%1 \n" - "psrlw $0x8,%%xmm1 \n" - "por %%xmm4,%%xmm0 \n" - "pmullw %%xmm3,%%xmm1 \n" - "psrlw $0x8,%%xmm2 \n" - "paddusb %%xmm2,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "sub $0x1,%3 \n" - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x4,2) ",%2 \n" - "jge 91b \n" - "99: \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : "m"(kShuffleAlpha) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} -#endif // HAS_ARGBBLENDROW_SSSE3 - -#ifdef HAS_ARGBATTENUATEROW_SSE2 -// Attenuate 4 pixels at a time. -// aligned to 16 bytes -void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm4,%%xmm4 \n" - "pslld $0x18,%%xmm4 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrld $0x8,%%xmm5 \n" - - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "pshufhw $0xff,%%xmm0,%%xmm2 \n" - "pshuflw $0xff,%%xmm2,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "movdqa " MEMACCESS(0) ",%%xmm1 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "pshufhw $0xff,%%xmm1,%%xmm2 \n" - "pshuflw $0xff,%%xmm2,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "movdqa " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "pand %%xmm4,%%xmm2 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "pand %%xmm5,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "sub $0x4,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_ARGBATTENUATEROW_SSE2 - -#ifdef HAS_ARGBATTENUATEROW_SSSE3 -// Shuffle table duplicating alpha -static uvec8 kShuffleAlpha0 = { - 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, -}; -static uvec8 kShuffleAlpha1 = { - 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, - 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u, -}; -// Attenuate 4 pixels at a time. -// aligned to 16 bytes -void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "pcmpeqb %%xmm3,%%xmm3 \n" - "pslld $0x18,%%xmm3 \n" - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" - - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "pshufb %%xmm4,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "punpcklbw %%xmm1,%%xmm1 \n" - "pmulhuw %%xmm1,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "pshufb %%xmm5,%%xmm1 \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "punpckhbw %%xmm2,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "pand %%xmm3,%%xmm2 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "por %%xmm2,%%xmm0 \n" - "sub $0x4,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kShuffleAlpha0), // %3 - "m"(kShuffleAlpha1) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_ARGBATTENUATEROW_SSSE3 - -#ifdef HAS_ARGBUNATTENUATEROW_SSE2 -// Unattenuate 4 pixels at a time. -// aligned to 16 bytes -void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, - int width) { - uintptr_t alpha = 0; - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movzb " MEMACCESS2(0x03,0) ",%3 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 - "movzb " MEMACCESS2(0x07,0) ",%3 \n" - MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 - "pshuflw $0x40,%%xmm2,%%xmm2 \n" - "pshuflw $0x40,%%xmm3,%%xmm3 \n" - "movlhps %%xmm3,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "movdqu " MEMACCESS(0) ",%%xmm1 \n" - "movzb " MEMACCESS2(0x0b,0) ",%3 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - BUNDLEALIGN - MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 - "movzb " MEMACCESS2(0x0f,0) ",%3 \n" - MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 - "pshuflw $0x40,%%xmm2,%%xmm2 \n" - "pshuflw $0x40,%%xmm3,%%xmm3 \n" - "movlhps %%xmm3,%%xmm2 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x4,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width), // %2 - "+r"(alpha) // %3 - : "r"(fixed_invtbl8) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_ARGBUNATTENUATEROW_SSE2 - -#ifdef HAS_ARGBGRAYROW_SSSE3 -// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels -void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" - - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm0 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "phaddw %%xmm1,%%xmm0 \n" - "paddw %%xmm5,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movdqa " MEMACCESS(0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm3 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrld $0x18,%%xmm2 \n" - "psrld $0x18,%%xmm3 \n" - "packuswb %%xmm3,%%xmm2 \n" - "packuswb %%xmm2,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpcklbw %%xmm2,%%xmm3 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm3,%%xmm0 \n" - "punpckhwd %%xmm3,%%xmm1 \n" - "sub $0x8,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "m"(kARGBToYJ), // %3 - "m"(kAddYJ64) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_ARGBGRAYROW_SSSE3 - -#ifdef HAS_ARGBSEPIAROW_SSSE3 -// b = (r * 35 + g * 68 + b * 17) >> 7 -// g = (r * 45 + g * 88 + b * 22) >> 7 -// r = (r * 50 + g * 98 + b * 24) >> 7 -// Constant for ARGB color to sepia tone -static vec8 kARGBToSepiaB = { - 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0 -}; - -static vec8 kARGBToSepiaG = { - 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0 -}; - -static vec8 kARGBToSepiaR = { - 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0 -}; - -// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. -void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { - asm volatile ( - "movdqa %2,%%xmm2 \n" - "movdqa %3,%%xmm3 \n" - "movdqa %4,%%xmm4 \n" - - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm6 \n" - "pmaddubsw %%xmm2,%%xmm0 \n" - "pmaddubsw %%xmm2,%%xmm6 \n" - "phaddw %%xmm6,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movdqa " MEMACCESS(0) ",%%xmm5 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm5 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "phaddw %%xmm1,%%xmm5 \n" - "psrlw $0x7,%%xmm5 \n" - "packuswb %%xmm5,%%xmm5 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "movdqa " MEMACCESS(0) ",%%xmm5 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm5 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "phaddw %%xmm1,%%xmm5 \n" - "psrlw $0x7,%%xmm5 \n" - "packuswb %%xmm5,%%xmm5 \n" - "movdqa " MEMACCESS(0) ",%%xmm6 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "psrld $0x18,%%xmm6 \n" - "psrld $0x18,%%xmm1 \n" - "packuswb %%xmm1,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm5 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklwd %%xmm5,%%xmm0 \n" - "punpckhwd %%xmm5,%%xmm1 \n" - "sub $0x8,%1 \n" - "movdqa %%xmm0," MEMACCESS(0) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,0) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : "m"(kARGBToSepiaB), // %2 - "m"(kARGBToSepiaG), // %3 - "m"(kARGBToSepiaR) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" -#endif - ); -} -#endif // HAS_ARGBSEPIAROW_SSSE3 - -#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3 -// Tranform 8 ARGB pixels (32 bytes) with color matrix. -// Same as Sepia except matrix is provided. -void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) { - asm volatile ( - "movdqu " MEMACCESS(3) ",%%xmm5 \n" - "pshufd $0x00,%%xmm5,%%xmm2 \n" - "pshufd $0x55,%%xmm5,%%xmm3 \n" - "pshufd $0xaa,%%xmm5,%%xmm4 \n" - "pshufd $0xff,%%xmm5,%%xmm5 \n" - - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm7 \n" - "pmaddubsw %%xmm2,%%xmm0 \n" - "pmaddubsw %%xmm2,%%xmm7 \n" - "movdqa " MEMACCESS(0) ",%%xmm6 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "pmaddubsw %%xmm3,%%xmm6 \n" - "pmaddubsw %%xmm3,%%xmm1 \n" - "phaddsw %%xmm7,%%xmm0 \n" - "phaddsw %%xmm1,%%xmm6 \n" - "psraw $0x6,%%xmm0 \n" - "psraw $0x6,%%xmm6 \n" - "packuswb %%xmm0,%%xmm0 \n" - "packuswb %%xmm6,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm0 \n" - "movdqa " MEMACCESS(0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm7 \n" - "pmaddubsw %%xmm4,%%xmm1 \n" - "pmaddubsw %%xmm4,%%xmm7 \n" - "phaddsw %%xmm7,%%xmm1 \n" - "movdqa " MEMACCESS(0) ",%%xmm6 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm7 \n" - "pmaddubsw %%xmm5,%%xmm6 \n" - "pmaddubsw %%xmm5,%%xmm7 \n" - "phaddsw %%xmm7,%%xmm6 \n" - "psraw $0x6,%%xmm1 \n" - "psraw $0x6,%%xmm6 \n" - "packuswb %%xmm1,%%xmm1 \n" - "packuswb %%xmm6,%%xmm6 \n" - "punpcklbw %%xmm6,%%xmm1 \n" - "movdqa %%xmm0,%%xmm6 \n" - "punpcklwd %%xmm1,%%xmm0 \n" - "punpckhwd %%xmm1,%%xmm6 \n" - "sub $0x8,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa %%xmm6," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(matrix_argb) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} -#endif // HAS_ARGBCOLORMATRIXROW_SSSE3 - -#ifdef HAS_ARGBQUANTIZEROW_SSE2 -// Quantize 4 ARGB pixels (16 bytes). -// aligned to 16 bytes -void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width) { - asm volatile ( - "movd %2,%%xmm2 \n" - "movd %3,%%xmm3 \n" - "movd %4,%%xmm4 \n" - "pshuflw $0x40,%%xmm2,%%xmm2 \n" - "pshufd $0x44,%%xmm2,%%xmm2 \n" - "pshuflw $0x40,%%xmm3,%%xmm3 \n" - "pshufd $0x44,%%xmm3,%%xmm3 \n" - "pshuflw $0x40,%%xmm4,%%xmm4 \n" - "pshufd $0x44,%%xmm4,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "pslld $0x18,%%xmm6 \n" - - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "movdqa " MEMACCESS(0) ",%%xmm1 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "pmullw %%xmm3,%%xmm0 \n" - "movdqa " MEMACCESS(0) ",%%xmm7 \n" - "pmullw %%xmm3,%%xmm1 \n" - "pand %%xmm6,%%xmm7 \n" - "paddw %%xmm4,%%xmm0 \n" - "paddw %%xmm4,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "por %%xmm7,%%xmm0 \n" - "sub $0x4,%1 \n" - "movdqa %%xmm0," MEMACCESS(0) " \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "+r"(width) // %1 - : "r"(scale), // %2 - "r"(interval_size), // %3 - "r"(interval_offset) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} -#endif // HAS_ARGBQUANTIZEROW_SSE2 - -#ifdef HAS_ARGBSHADEROW_SSE2 -// Shade 4 pixels at a time by specified value. -// Aligned to 16 bytes. -void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value) { - asm volatile ( - "movd %3,%%xmm2 \n" - "punpcklbw %%xmm2,%%xmm2 \n" - "punpcklqdq %%xmm2,%%xmm2 \n" - - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "pmulhuw %%xmm2,%%xmm1 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x4,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(value) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2" -#endif - ); -} -#endif // HAS_ARGBSHADEROW_SSE2 - -#ifdef HAS_ARGBMULTIPLYROW_SSE2 -// Multiply 2 rows of ARGB pixels together, 4 pixels at a time. -void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - "pxor %%xmm5,%%xmm5 \n" - - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm2 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "movdqu %%xmm0,%%xmm1 \n" - "movdqu %%xmm2,%%xmm3 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "punpckhbw %%xmm5,%%xmm3 \n" - "pmulhuw %%xmm2,%%xmm0 \n" - "pmulhuw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} -#endif // HAS_ARGBMULTIPLYROW_SSE2 - -#ifdef HAS_ARGBADDROW_SSE2 -// Add 2 rows of ARGB pixels together, 4 pixels at a time. -void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} -#endif // HAS_ARGBADDROW_SSE2 - -#ifdef HAS_ARGBSUBTRACTROW_SSE2 -// Subtract 2 rows of ARGB pixels, 4 pixels at a time. -void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - asm volatile ( - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "psubusb %%xmm1,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jg 1b \n" - : "+r"(src_argb0), // %0 - "+r"(src_argb1), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} -#endif // HAS_ARGBSUBTRACTROW_SSE2 - -#ifdef HAS_SOBELXROW_SSE2 -// SobelX as a matrix is -// -1 0 1 -// -2 0 2 -// -1 0 1 -void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width) { - asm volatile ( - "sub %0,%1 \n" - "sub %0,%2 \n" - "sub %0,%3 \n" - "pxor %%xmm5,%%xmm5 \n" - - // 8 pixel loop. - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "movq " MEMACCESS2(0x2,0) ",%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "psubw %%xmm1,%%xmm0 \n" - BUNDLEALIGN - MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 - MEMOPREG(movq,0x02,0,1,1,xmm2) // movq 0x2(%0,%1,1),%%xmm2 - "punpcklbw %%xmm5,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "psubw %%xmm2,%%xmm1 \n" - BUNDLEALIGN - MEMOPREG(movq,0x00,0,2,1,xmm2) // movq (%0,%2,1),%%xmm2 - MEMOPREG(movq,0x02,0,2,1,xmm3) // movq 0x2(%0,%2,1),%%xmm3 - "punpcklbw %%xmm5,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm3 \n" - "psubw %%xmm3,%%xmm2 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "pxor %%xmm1,%%xmm1 \n" - "psubw %%xmm0,%%xmm1 \n" - "pmaxsw %%xmm1,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "sub $0x8,%4 \n" - BUNDLEALIGN - MEMOPMEM(movq,xmm0,0x00,0,3,1) // movq %%xmm0,(%0,%3,1) - "lea " MEMLEA(0x8,0) ",%0 \n" - "jg 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(src_y2), // %2 - "+r"(dst_sobelx), // %3 - "+r"(width) // %4 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} -#endif // HAS_SOBELXROW_SSE2 - -#ifdef HAS_SOBELYROW_SSE2 -// SobelY as a matrix is -// -1 -2 -1 -// 0 0 0 -// 1 2 1 -void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) { - asm volatile ( - "sub %0,%1 \n" - "sub %0,%2 \n" - "pxor %%xmm5,%%xmm5 \n" - - // 8 pixel loop. - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movq,0x00,0,1,1,xmm1) // movq (%0,%1,1),%%xmm1 - "punpcklbw %%xmm5,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "psubw %%xmm1,%%xmm0 \n" - BUNDLEALIGN - "movq " MEMACCESS2(0x1,0) ",%%xmm1 \n" - MEMOPREG(movq,0x01,0,1,1,xmm2) // movq 0x1(%0,%1,1),%%xmm2 - "punpcklbw %%xmm5,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm2 \n" - "psubw %%xmm2,%%xmm1 \n" - BUNDLEALIGN - "movq " MEMACCESS2(0x2,0) ",%%xmm2 \n" - MEMOPREG(movq,0x02,0,1,1,xmm3) // movq 0x2(%0,%1,1),%%xmm3 - "punpcklbw %%xmm5,%%xmm2 \n" - "punpcklbw %%xmm5,%%xmm3 \n" - "psubw %%xmm3,%%xmm2 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "paddw %%xmm1,%%xmm0 \n" - "pxor %%xmm1,%%xmm1 \n" - "psubw %%xmm0,%%xmm1 \n" - "pmaxsw %%xmm1,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "sub $0x8,%3 \n" - BUNDLEALIGN - MEMOPMEM(movq,xmm0,0x00,0,2,1) // movq %%xmm0,(%0,%2,1) - "lea " MEMLEA(0x8,0) ",%0 \n" - "jg 1b \n" - : "+r"(src_y0), // %0 - "+r"(src_y1), // %1 - "+r"(dst_sobely), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} -#endif // HAS_SOBELYROW_SSE2 - -#ifdef HAS_SOBELROW_SSE2 -// Adds Sobel X and Sobel Y and stores Sobel into ARGB. -// A = 255 -// R = Sobel -// G = Sobel -// B = Sobel -void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - asm volatile ( - "sub %0,%1 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0x18,%%xmm5 \n" - - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "punpcklbw %%xmm0,%%xmm2 \n" - "punpckhbw %%xmm0,%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "punpcklwd %%xmm2,%%xmm1 \n" - "punpckhwd %%xmm2,%%xmm2 \n" - "por %%xmm5,%%xmm1 \n" - "por %%xmm5,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "punpcklwd %%xmm0,%%xmm3 \n" - "punpckhwd %%xmm0,%%xmm0 \n" - "por %%xmm5,%%xmm3 \n" - "por %%xmm5,%%xmm0 \n" - "sub $0x10,%3 \n" - "movdqa %%xmm1," MEMACCESS(2) " \n" - "movdqa %%xmm2," MEMACCESS2(0x10,2) " \n" - "movdqa %%xmm3," MEMACCESS2(0x20,2) " \n" - "movdqa %%xmm0," MEMACCESS2(0x30,2) " \n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "jg 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} -#endif // HAS_SOBELROW_SSE2 - -#ifdef HAS_SOBELTOPLANEROW_SSE2 -// Adds Sobel X and Sobel Y and stores Sobel into a plane. -void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width) { - asm volatile ( - "sub %0,%1 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - "pslld $0x18,%%xmm5 \n" - - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "paddusb %%xmm1,%%xmm0 \n" - "sub $0x10,%3 \n" - "movdqa %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jg 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_y), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} -#endif // HAS_SOBELTOPLANEROW_SSE2 - -#ifdef HAS_SOBELXYROW_SSE2 -// Mixes Sobel X, Sobel Y and Sobel into ARGB. -// A = 255 -// R = Sobel X -// G = Sobel -// B = Sobel Y -void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - asm volatile ( - "sub %0,%1 \n" - "pcmpeqb %%xmm5,%%xmm5 \n" - - // 8 pixel loop. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,0,1,1,xmm1) // movdqa (%0,%1,1),%%xmm1 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "paddusb %%xmm1,%%xmm2 \n" - "movdqa %%xmm0,%%xmm3 \n" - "punpcklbw %%xmm5,%%xmm3 \n" - "punpckhbw %%xmm5,%%xmm0 \n" - "movdqa %%xmm1,%%xmm4 \n" - "punpcklbw %%xmm2,%%xmm4 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "movdqa %%xmm4,%%xmm6 \n" - "punpcklwd %%xmm3,%%xmm6 \n" - "punpckhwd %%xmm3,%%xmm4 \n" - "movdqa %%xmm1,%%xmm7 \n" - "punpcklwd %%xmm0,%%xmm7 \n" - "punpckhwd %%xmm0,%%xmm1 \n" - "sub $0x10,%3 \n" - "movdqa %%xmm6," MEMACCESS(2) " \n" - "movdqa %%xmm4," MEMACCESS2(0x10,2) " \n" - "movdqa %%xmm7," MEMACCESS2(0x20,2) " \n" - "movdqa %%xmm1," MEMACCESS2(0x30,2) " \n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "jg 1b \n" - : "+r"(src_sobelx), // %0 - "+r"(src_sobely), // %1 - "+r"(dst_argb), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} -#endif // HAS_SOBELXYROW_SSE2 - -#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 -// Creates a table of cumulative sums where each value is a sum of all values -// above and to the left of the value, inclusive of the value. -void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width) { - asm volatile ( - "pxor %%xmm0,%%xmm0 \n" - "pxor %%xmm1,%%xmm1 \n" - "sub $0x4,%3 \n" - "jl 49f \n" - "test $0xf,%1 \n" - "jne 49f \n" - - // 4 pixel loop \n" - LABELALIGN - "40: \n" - "movdqu " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm2,%%xmm4 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklwd %%xmm1,%%xmm2 \n" - "punpckhwd %%xmm1,%%xmm3 \n" - "punpckhbw %%xmm1,%%xmm4 \n" - "movdqa %%xmm4,%%xmm5 \n" - "punpcklwd %%xmm1,%%xmm4 \n" - "punpckhwd %%xmm1,%%xmm5 \n" - "paddd %%xmm2,%%xmm0 \n" - "movdqa " MEMACCESS(2) ",%%xmm2 \n" - "paddd %%xmm0,%%xmm2 \n" - "paddd %%xmm3,%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,2) ",%%xmm3 \n" - "paddd %%xmm0,%%xmm3 \n" - "paddd %%xmm4,%%xmm0 \n" - "movdqa " MEMACCESS2(0x20,2) ",%%xmm4 \n" - "paddd %%xmm0,%%xmm4 \n" - "paddd %%xmm5,%%xmm0 \n" - "movdqa " MEMACCESS2(0x30,2) ",%%xmm5 \n" - "lea " MEMLEA(0x40,2) ",%2 \n" - "paddd %%xmm0,%%xmm5 \n" - "movdqa %%xmm2," MEMACCESS(1) " \n" - "movdqa %%xmm3," MEMACCESS2(0x10,1) " \n" - "movdqa %%xmm4," MEMACCESS2(0x20,1) " \n" - "movdqa %%xmm5," MEMACCESS2(0x30,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "sub $0x4,%3 \n" - "jge 40b \n" - - "49: \n" - "add $0x3,%3 \n" - "jl 19f \n" - - // 1 pixel loop \n" - LABELALIGN - "10: \n" - "movd " MEMACCESS(0) ",%%xmm2 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "punpcklbw %%xmm1,%%xmm2 \n" - "punpcklwd %%xmm1,%%xmm2 \n" - "paddd %%xmm2,%%xmm0 \n" - "movdqu " MEMACCESS(2) ",%%xmm2 \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "paddd %%xmm0,%%xmm2 \n" - "movdqu %%xmm2," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x1,%3 \n" - "jge 10b \n" - - "19: \n" - : "+r"(row), // %0 - "+r"(cumsum), // %1 - "+r"(previous_cumsum), // %2 - "+r"(width) // %3 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 - -#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 -void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, - int count) { - asm volatile ( - "movd %5,%%xmm5 \n" - "cvtdq2ps %%xmm5,%%xmm5 \n" - "rcpss %%xmm5,%%xmm4 \n" - "pshufd $0x0,%%xmm4,%%xmm4 \n" - "sub $0x4,%3 \n" - "jl 49f \n" - "cmpl $0x80,%5 \n" - "ja 40f \n" - - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrld $0x10,%%xmm6 \n" - "cvtdq2ps %%xmm6,%%xmm6 \n" - "addps %%xmm6,%%xmm5 \n" - "mulps %%xmm4,%%xmm5 \n" - "cvtps2dq %%xmm5,%%xmm5 \n" - "packssdw %%xmm5,%%xmm5 \n" - - // 4 pixel small loop \n" - LABELALIGN - "4: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n" - BUNDLEALIGN - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 - MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 - MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3 - "lea " MEMLEA(0x40,0) ",%0 \n" - "psubd " MEMACCESS(1) ",%%xmm0 \n" - "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n" - "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n" - "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n" - BUNDLEALIGN - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 - MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 - MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3 - "lea " MEMLEA(0x40,1) ",%1 \n" - "packssdw %%xmm1,%%xmm0 \n" - "packssdw %%xmm3,%%xmm2 \n" - "pmulhuw %%xmm5,%%xmm0 \n" - "pmulhuw %%xmm5,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jge 4b \n" - "jmp 49f \n" - - // 4 pixel loop \n" - LABELALIGN - "40: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "movdqa " MEMACCESS2(0x20,0) ",%%xmm2 \n" - "movdqa " MEMACCESS2(0x30,0) ",%%xmm3 \n" - BUNDLEALIGN - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - MEMOPREG(psubd,0x10,0,4,4,xmm1) // psubd 0x10(%0,%4,4),%%xmm1 - MEMOPREG(psubd,0x20,0,4,4,xmm2) // psubd 0x20(%0,%4,4),%%xmm2 - MEMOPREG(psubd,0x30,0,4,4,xmm3) // psubd 0x30(%0,%4,4),%%xmm3 - "lea " MEMLEA(0x40,0) ",%0 \n" - "psubd " MEMACCESS(1) ",%%xmm0 \n" - "psubd " MEMACCESS2(0x10,1) ",%%xmm1 \n" - "psubd " MEMACCESS2(0x20,1) ",%%xmm2 \n" - "psubd " MEMACCESS2(0x30,1) ",%%xmm3 \n" - BUNDLEALIGN - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - MEMOPREG(paddd,0x10,1,4,4,xmm1) // paddd 0x10(%1,%4,4),%%xmm1 - MEMOPREG(paddd,0x20,1,4,4,xmm2) // paddd 0x20(%1,%4,4),%%xmm2 - MEMOPREG(paddd,0x30,1,4,4,xmm3) // paddd 0x30(%1,%4,4),%%xmm3 - "lea " MEMLEA(0x40,1) ",%1 \n" - "cvtdq2ps %%xmm0,%%xmm0 \n" - "cvtdq2ps %%xmm1,%%xmm1 \n" - "mulps %%xmm4,%%xmm0 \n" - "mulps %%xmm4,%%xmm1 \n" - "cvtdq2ps %%xmm2,%%xmm2 \n" - "cvtdq2ps %%xmm3,%%xmm3 \n" - "mulps %%xmm4,%%xmm2 \n" - "mulps %%xmm4,%%xmm3 \n" - "cvtps2dq %%xmm0,%%xmm0 \n" - "cvtps2dq %%xmm1,%%xmm1 \n" - "cvtps2dq %%xmm2,%%xmm2 \n" - "cvtps2dq %%xmm3,%%xmm3 \n" - "packssdw %%xmm1,%%xmm0 \n" - "packssdw %%xmm3,%%xmm2 \n" - "packuswb %%xmm2,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "sub $0x4,%3 \n" - "jge 40b \n" - - "49: \n" - "add $0x3,%3 \n" - "jl 19f \n" - - // 1 pixel loop \n" - LABELALIGN - "10: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(psubd,0x00,0,4,4,xmm0) // psubd 0x00(%0,%4,4),%%xmm0 - "lea " MEMLEA(0x10,0) ",%0 \n" - "psubd " MEMACCESS(1) ",%%xmm0 \n" - BUNDLEALIGN - MEMOPREG(paddd,0x00,1,4,4,xmm0) // paddd 0x00(%1,%4,4),%%xmm0 - "lea " MEMLEA(0x10,1) ",%1 \n" - "cvtdq2ps %%xmm0,%%xmm0 \n" - "mulps %%xmm4,%%xmm0 \n" - "cvtps2dq %%xmm0,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x4,2) ",%2 \n" - "sub $0x1,%3 \n" - "jge 10b \n" - "19: \n" - : "+r"(topleft), // %0 - "+r"(botleft), // %1 - "+r"(dst), // %2 - "+rm"(count) // %3 - : "r"((intptr_t)(width)), // %4 - "rm"(area) // %5 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" -#endif - ); -} -#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 - -#ifdef HAS_ARGBAFFINEROW_SSE2 -// Copy ARGB pixels from source image with slope to a row of destination. -LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* src_dudv, int width) { - intptr_t src_argb_stride_temp = src_argb_stride; - intptr_t temp = 0; - asm volatile ( - "movq " MEMACCESS(3) ",%%xmm2 \n" - "movq " MEMACCESS2(0x08,3) ",%%xmm7 \n" - "shl $0x10,%1 \n" - "add $0x4,%1 \n" - "movd %1,%%xmm5 \n" - "sub $0x4,%4 \n" - "jl 49f \n" - - "pshufd $0x44,%%xmm7,%%xmm7 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "movdqa %%xmm2,%%xmm0 \n" - "addps %%xmm7,%%xmm0 \n" - "movlhps %%xmm0,%%xmm2 \n" - "movdqa %%xmm7,%%xmm4 \n" - "addps %%xmm4,%%xmm4 \n" - "movdqa %%xmm2,%%xmm3 \n" - "addps %%xmm4,%%xmm3 \n" - "addps %%xmm4,%%xmm4 \n" - - // 4 pixel loop \n" - LABELALIGN - "40: \n" - "cvttps2dq %%xmm2,%%xmm0 \n" // x, y float to int first 2 - "cvttps2dq %%xmm3,%%xmm1 \n" // x, y float to int next 2 - "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts - "pmaddwd %%xmm5,%%xmm0 \n" // off = x * 4 + y * stride - "movd %%xmm0,%k1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movd %%xmm0,%k5 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - BUNDLEALIGN - MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 - MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 - "punpckldq %%xmm6,%%xmm1 \n" - "addps %%xmm4,%%xmm2 \n" - "movq %%xmm1," MEMACCESS(2) " \n" - "movd %%xmm0,%k1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - "movd %%xmm0,%k5 \n" - BUNDLEALIGN - MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 - MEMOPREG(movd,0x00,0,5,1,xmm6) // movd (%0,%5,1),%%xmm6 - "punpckldq %%xmm6,%%xmm0 \n" - "addps %%xmm4,%%xmm3 \n" - "sub $0x4,%4 \n" - "movq %%xmm0," MEMACCESS2(0x08,2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jge 40b \n" - - "49: \n" - "add $0x3,%4 \n" - "jl 19f \n" - - // 1 pixel loop \n" - LABELALIGN - "10: \n" - "cvttps2dq %%xmm2,%%xmm0 \n" - "packssdw %%xmm0,%%xmm0 \n" - "pmaddwd %%xmm5,%%xmm0 \n" - "addps %%xmm7,%%xmm2 \n" - "movd %%xmm0,%k1 \n" - BUNDLEALIGN - MEMOPREG(movd,0x00,0,1,1,xmm0) // movd (%0,%1,1),%%xmm0 - "sub $0x1,%4 \n" - "movd %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x04,2) ",%2 \n" - "jge 10b \n" - "19: \n" - : "+r"(src_argb), // %0 - "+r"(src_argb_stride_temp), // %1 - "+r"(dst_argb), // %2 - "+r"(src_dudv), // %3 - "+rm"(width), // %4 - "+r"(temp) // %5 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} -#endif // HAS_ARGBAFFINEROW_SSE2 - -#ifdef HAS_INTERPOLATEROW_SSSE3 -// Bilinear filter 16x2 -> 16x1 -void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - asm volatile ( - "sub %1,%0 \n" - "shr %3 \n" - "cmp $0x0,%3 \n" - "je 100f \n" - "cmp $0x20,%3 \n" - "je 75f \n" - "cmp $0x40,%3 \n" - "je 50f \n" - "cmp $0x60,%3 \n" - "je 25f \n" - - "movd %3,%%xmm0 \n" - "neg %3 \n" - "add $0x80,%3 \n" - "movd %3,%%xmm5 \n" - "punpcklbw %%xmm0,%%xmm5 \n" - "punpcklwd %%xmm5,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - - // General purpose row blend. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm2) - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "pmaddubsw %%xmm5,%%xmm0 \n" - "pmaddubsw %%xmm5,%%xmm1 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - "jmp 99f \n" - - // Blend 25 / 75. - LABELALIGN - "25: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm1) - "pavgb %%xmm1,%%xmm0 \n" - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 25b \n" - "jmp 99f \n" - - // Blend 50 / 50. - LABELALIGN - "50: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm1) - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 50b \n" - "jmp 99f \n" - - // Blend 75 / 25. - LABELALIGN - "75: \n" - "movdqa " MEMACCESS(1) ",%%xmm1 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm0) - "pavgb %%xmm1,%%xmm0 \n" - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 75b \n" - "jmp 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - LABELALIGN - "100: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - "sub $0x10,%2 \n" - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 100b \n" - - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(source_y_fraction) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm5" -#endif - ); -} -#endif // HAS_INTERPOLATEROW_SSSE3 - -#ifdef HAS_INTERPOLATEROW_SSE2 -// Bilinear filter 16x2 -> 16x1 -void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - asm volatile ( - "sub %1,%0 \n" - "shr %3 \n" - "cmp $0x0,%3 \n" - "je 100f \n" - "cmp $0x20,%3 \n" - "je 75f \n" - "cmp $0x40,%3 \n" - "je 50f \n" - "cmp $0x60,%3 \n" - "je 25f \n" - - "movd %3,%%xmm0 \n" - "neg %3 \n" - "add $0x80,%3 \n" - "movd %3,%%xmm5 \n" - "punpcklbw %%xmm0,%%xmm5 \n" - "punpcklwd %%xmm5,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - - // General purpose row blend. - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm2) // movdqa (%1,%4,1),%%xmm2 - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklbw %%xmm4,%%xmm2 \n" - "punpckhbw %%xmm4,%%xmm3 \n" - "punpcklbw %%xmm4,%%xmm0 \n" - "punpckhbw %%xmm4,%%xmm1 \n" - "psubw %%xmm0,%%xmm2 \n" - "psubw %%xmm1,%%xmm3 \n" - "paddw %%xmm2,%%xmm2 \n" - "paddw %%xmm3,%%xmm3 \n" - "pmulhw %%xmm5,%%xmm2 \n" - "pmulhw %%xmm5,%%xmm3 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - "jmp 99f \n" - - // Blend 25 / 75. - LABELALIGN - "25: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1 - "pavgb %%xmm1,%%xmm0 \n" - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 25b \n" - "jmp 99f \n" - - // Blend 50 / 50. - LABELALIGN - "50: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm1) // movdqa (%1,%4,1),%%xmm1 - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 50b \n" - "jmp 99f \n" - - // Blend 75 / 25. - LABELALIGN - "75: \n" - "movdqa " MEMACCESS(1) ",%%xmm1 \n" - MEMOPREG(movdqa,0x00,1,4,1,xmm0) // movdqa (%1,%4,1),%%xmm0 - "pavgb %%xmm1,%%xmm0 \n" - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 75b \n" - "jmp 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - LABELALIGN - "100: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - "sub $0x10,%2 \n" - MEMOPMEM(movdqa,xmm0,0x00,1,0,1) // movdqa %%xmm0,(%1,%0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 100b \n" - - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(source_y_fraction) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_INTERPOLATEROW_SSE2 - -#ifdef HAS_INTERPOLATEROW_SSSE3 -// Bilinear filter 16x2 -> 16x1 -void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - asm volatile ( - "sub %1,%0 \n" - "shr %3 \n" - "cmp $0x0,%3 \n" - "je 100f \n" - "cmp $0x20,%3 \n" - "je 75f \n" - "cmp $0x40,%3 \n" - "je 50f \n" - "cmp $0x60,%3 \n" - "je 25f \n" - - "movd %3,%%xmm0 \n" - "neg %3 \n" - "add $0x80,%3 \n" - "movd %3,%%xmm5 \n" - "punpcklbw %%xmm0,%%xmm5 \n" - "punpcklwd %%xmm5,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - - // General purpose row blend. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm2) - "movdqu %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "pmaddubsw %%xmm5,%%xmm0 \n" - "pmaddubsw %%xmm5,%%xmm1 \n" - "psrlw $0x7,%%xmm0 \n" - "psrlw $0x7,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - "jmp 99f \n" - - // Blend 25 / 75. - LABELALIGN - "25: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm1) - "pavgb %%xmm1,%%xmm0 \n" - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 25b \n" - "jmp 99f \n" - - // Blend 50 / 50. - LABELALIGN - "50: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm1) - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 50b \n" - "jmp 99f \n" - - // Blend 75 / 25. - LABELALIGN - "75: \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm0) - "pavgb %%xmm1,%%xmm0 \n" - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 75b \n" - "jmp 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - LABELALIGN - "100: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - "sub $0x10,%2 \n" - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 100b \n" - - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(source_y_fraction) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm5" -#endif - ); -} -#endif // HAS_INTERPOLATEROW_SSSE3 - -#ifdef HAS_INTERPOLATEROW_SSE2 -// Bilinear filter 16x2 -> 16x1 -void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - asm volatile ( - "sub %1,%0 \n" - "shr %3 \n" - "cmp $0x0,%3 \n" - "je 100f \n" - "cmp $0x20,%3 \n" - "je 75f \n" - "cmp $0x40,%3 \n" - "je 50f \n" - "cmp $0x60,%3 \n" - "je 25f \n" - - "movd %3,%%xmm0 \n" - "neg %3 \n" - "add $0x80,%3 \n" - "movd %3,%%xmm5 \n" - "punpcklbw %%xmm0,%%xmm5 \n" - "punpcklwd %%xmm5,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - "pxor %%xmm4,%%xmm4 \n" - - // General purpose row blend. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm2) // movdqu (%1,%4,1),%%xmm2 - "movdqu %%xmm0,%%xmm1 \n" - "movdqu %%xmm2,%%xmm3 \n" - "punpcklbw %%xmm4,%%xmm2 \n" - "punpckhbw %%xmm4,%%xmm3 \n" - "punpcklbw %%xmm4,%%xmm0 \n" - "punpckhbw %%xmm4,%%xmm1 \n" - "psubw %%xmm0,%%xmm2 \n" - "psubw %%xmm1,%%xmm3 \n" - "paddw %%xmm2,%%xmm2 \n" - "paddw %%xmm3,%%xmm3 \n" - "pmulhw %%xmm5,%%xmm2 \n" - "pmulhw %%xmm5,%%xmm3 \n" - "paddw %%xmm2,%%xmm0 \n" - "paddw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - "jmp 99f \n" - - // Blend 25 / 75. - LABELALIGN - "25: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 - "pavgb %%xmm1,%%xmm0 \n" - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 25b \n" - "jmp 99f \n" - - // Blend 50 / 50. - LABELALIGN - "50: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 50b \n" - "jmp 99f \n" - - // Blend 75 / 25. - LABELALIGN - "75: \n" - "movdqu " MEMACCESS(1) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,1,4,1,xmm0) // movdqu (%1,%4,1),%%xmm0 - "pavgb %%xmm1,%%xmm0 \n" - "pavgb %%xmm1,%%xmm0 \n" - "sub $0x10,%2 \n" - BUNDLEALIGN - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 75b \n" - "jmp 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - LABELALIGN - "100: \n" - "movdqu " MEMACCESS(1) ",%%xmm0 \n" - "sub $0x10,%2 \n" - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 100b \n" - - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(source_y_fraction) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_INTERPOLATEROW_SSE2 - -#ifdef HAS_HALFROW_SSE2 -void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - asm volatile ( - "sub %0,%1 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(pavgb,0x00,0,3,1,xmm0) // pavgb (%0,%3),%%xmm0 - "sub $0x10,%2 \n" - MEMOPMEM(movdqa,xmm0,0x00,0,1,1) // movdqa %%xmm0,(%0,%1) - "lea " MEMLEA(0x10,0) ",%0 \n" - "jg 1b \n" - : "+r"(src_uv), // %0 - "+r"(dst_uv), // %1 - "+r"(pix) // %2 - : "r"((intptr_t)(src_uv_stride)) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0" -#endif - ); -} -#endif // HAS_HALFROW_SSE2 - -#ifdef HAS_ARGBTOBAYERROW_SSSE3 -void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) { - asm volatile ( - // NaCL caveat - assumes movd is from GPR - "movd %3,%%xmm5 \n" - "pshufd $0x0,%%xmm5,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm5,%%xmm1 \n" - "punpckldq %%xmm1,%%xmm0 \n" - "sub $0x8,%2 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_bayer), // %1 - "+r"(pix) // %2 - : "g"(selector) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} -#endif // HAS_ARGBTOBAYERROW_SSSE3 - -#ifdef HAS_ARGBTOBAYERGGROW_SSE2 -void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrld $0x18,%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrld $0x8,%%xmm0 \n" - "psrld $0x8,%%xmm1 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packssdw %%xmm1,%%xmm0 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x8,%2 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_bayer), // %1 - "+r"(pix) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} -#endif // HAS_ARGBTOBAYERGGROW_SSE2 - -#ifdef HAS_ARGBSHUFFLEROW_SSSE3 -// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix) { - asm volatile ( - "movdqa " MEMACCESS(3) ",%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm5,%%xmm1 \n" - "sub $0x8,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : "r"(shuffler) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} - -void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix) { - asm volatile ( - "movdqa " MEMACCESS(3) ",%%xmm5 \n" - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pshufb %%xmm5,%%xmm0 \n" - "pshufb %%xmm5,%%xmm1 \n" - "sub $0x8,%2 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : "r"(shuffler) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} -#endif // HAS_ARGBSHUFFLEROW_SSSE3 - -#ifdef HAS_ARGBSHUFFLEROW_AVX2 -// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix) { - asm volatile ( - "vbroadcastf128 " MEMACCESS(3) ",%%ymm5 \n" - LABELALIGN - "1: \n" - "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" - "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n" - "lea " MEMLEA(0x40,0) ",%0 \n" - "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" - "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" - "sub $0x10,%2 \n" - "vmovdqu %%ymm0," MEMACCESS(1) " \n" - "vmovdqu %%ymm1," MEMACCESS2(0x20,1) " \n" - "lea " MEMLEA(0x40,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(pix) // %2 - : "r"(shuffler) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} -#endif // HAS_ARGBSHUFFLEROW_AVX2 - -#ifdef HAS_ARGBSHUFFLEROW_SSE2 -// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix) { - uintptr_t pixel_temp = 0u; - asm volatile ( - "pxor %%xmm5,%%xmm5 \n" - "mov " MEMACCESS(4) ",%k2 \n" - "cmp $0x3000102,%k2 \n" - "je 3012f \n" - "cmp $0x10203,%k2 \n" - "je 123f \n" - "cmp $0x30201,%k2 \n" - "je 321f \n" - "cmp $0x2010003,%k2 \n" - "je 2103f \n" - - LABELALIGN - "1: \n" - "movzb " MEMACCESS(4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS(1) " \n" - "movzb " MEMACCESS2(0x1,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x1,1) " \n" - BUNDLEALIGN - "movzb " MEMACCESS2(0x2,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x2,1) " \n" - "movzb " MEMACCESS2(0x3,4) ",%2 \n" - MEMOPARG(movzb,0x00,0,2,1,2) " \n" // movzb (%0,%2,1),%2 - "mov %b2," MEMACCESS2(0x3,1) " \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - "lea " MEMLEA(0x4,1) ",%1 \n" - "sub $0x1,%3 \n" - "jg 1b \n" - "jmp 99f \n" - - LABELALIGN - "123: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x1b,%%xmm0,%%xmm0 \n" - "pshuflw $0x1b,%%xmm0,%%xmm0 \n" - "pshufhw $0x1b,%%xmm1,%%xmm1 \n" - "pshuflw $0x1b,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 123b \n" - "jmp 99f \n" - - LABELALIGN - "321: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x39,%%xmm0,%%xmm0 \n" - "pshuflw $0x39,%%xmm0,%%xmm0 \n" - "pshufhw $0x39,%%xmm1,%%xmm1 \n" - "pshuflw $0x39,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 321b \n" - "jmp 99f \n" - - LABELALIGN - "2103: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0x93,%%xmm0,%%xmm0 \n" - "pshuflw $0x93,%%xmm0,%%xmm0 \n" - "pshufhw $0x93,%%xmm1,%%xmm1 \n" - "pshuflw $0x93,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 2103b \n" - "jmp 99f \n" - - LABELALIGN - "3012: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpckhbw %%xmm5,%%xmm1 \n" - "pshufhw $0xc6,%%xmm0,%%xmm0 \n" - "pshuflw $0xc6,%%xmm0,%%xmm0 \n" - "pshufhw $0xc6,%%xmm1,%%xmm1 \n" - "pshuflw $0xc6,%%xmm1,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 3012b \n" - - "99: \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+d"(pixel_temp), // %2 - "+r"(pix) // %3 - : "r"(shuffler) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} -#endif // HAS_ARGBSHUFFLEROW_SSE2 - -#ifdef HAS_I422TOYUY2ROW_SSE2 -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(1) ",%%xmm2 \n" - MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3 - "lea " MEMLEA(0x8,1) ",%1 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm2,%%xmm0 \n" - "punpckhbw %%xmm2,%%xmm1 \n" - "movdqu %%xmm0," MEMACCESS(3) " \n" - "movdqu %%xmm1," MEMACCESS2(0x10,3) " \n" - "lea " MEMLEA(0x20,3) ",%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_frame), // %3 - "+rm"(width) // %4 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3" -#endif - ); -} -#endif // HAS_I422TOYUY2ROW_SSE2 - -#ifdef HAS_I422TOUYVYROW_SSE2 -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { - asm volatile ( - "sub %1,%2 \n" - LABELALIGN - "1: \n" - "movq " MEMACCESS(1) ",%%xmm2 \n" - MEMOPREG(movq,0x00,1,2,1,xmm3) // movq (%1,%2,1),%%xmm3 - "lea " MEMLEA(0x8,1) ",%1 \n" - "punpcklbw %%xmm3,%%xmm2 \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "lea " MEMLEA(0x10,0) ",%0 \n" - "punpcklbw %%xmm0,%%xmm1 \n" - "punpckhbw %%xmm0,%%xmm2 \n" - "movdqu %%xmm1," MEMACCESS(3) " \n" - "movdqu %%xmm2," MEMACCESS2(0x10,3) " \n" - "lea " MEMLEA(0x20,3) ",%3 \n" - "sub $0x10,%4 \n" - "jg 1b \n" - : "+r"(src_y), // %0 - "+r"(src_u), // %1 - "+r"(src_v), // %2 - "+r"(dst_frame), // %3 - "+rm"(width) // %4 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3" -#endif - ); -} -#endif // HAS_I422TOUYVYROW_SSE2 - -#ifdef HAS_ARGBPOLYNOMIALROW_SSE2 -void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width) { - asm volatile ( - "pxor %%xmm3,%%xmm3 \n" - - // 2 pixel loop. - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "punpcklbw %%xmm3,%%xmm0 \n" - "movdqa %%xmm0,%%xmm4 \n" - "punpcklwd %%xmm3,%%xmm0 \n" - "punpckhwd %%xmm3,%%xmm4 \n" - "cvtdq2ps %%xmm0,%%xmm0 \n" - "cvtdq2ps %%xmm4,%%xmm4 \n" - "movdqa %%xmm0,%%xmm1 \n" - "movdqa %%xmm4,%%xmm5 \n" - "mulps " MEMACCESS2(0x10,3) ",%%xmm0 \n" - "mulps " MEMACCESS2(0x10,3) ",%%xmm4 \n" - "addps " MEMACCESS(3) ",%%xmm0 \n" - "addps " MEMACCESS(3) ",%%xmm4 \n" - "movdqa %%xmm1,%%xmm2 \n" - "movdqa %%xmm5,%%xmm6 \n" - "mulps %%xmm1,%%xmm2 \n" - "mulps %%xmm5,%%xmm6 \n" - "mulps %%xmm2,%%xmm1 \n" - "mulps %%xmm6,%%xmm5 \n" - "mulps " MEMACCESS2(0x20,3) ",%%xmm2 \n" - "mulps " MEMACCESS2(0x20,3) ",%%xmm6 \n" - "mulps " MEMACCESS2(0x30,3) ",%%xmm1 \n" - "mulps " MEMACCESS2(0x30,3) ",%%xmm5 \n" - "addps %%xmm2,%%xmm0 \n" - "addps %%xmm6,%%xmm4 \n" - "addps %%xmm1,%%xmm0 \n" - "addps %%xmm5,%%xmm4 \n" - "cvttps2dq %%xmm0,%%xmm0 \n" - "cvttps2dq %%xmm4,%%xmm4 \n" - "packuswb %%xmm4,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "sub $0x2,%2 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(poly) // %3 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" -#endif - ); -} -#endif // HAS_ARGBPOLYNOMIALROW_SSE2 - -#ifdef HAS_ARGBPOLYNOMIALROW_AVX2 -void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width) { - asm volatile ( - "vbroadcastf128 " MEMACCESS(3) ",%%ymm4 \n" - "vbroadcastf128 " MEMACCESS2(0x10,3) ",%%ymm5 \n" - "vbroadcastf128 " MEMACCESS2(0x20,3) ",%%ymm6 \n" - "vbroadcastf128 " MEMACCESS2(0x30,3) ",%%ymm7 \n" - - // 2 pixel loop. - LABELALIGN - "1: \n" - "vpmovzxbd " MEMACCESS(0) ",%%ymm0 \n" // 2 ARGB pixels - "lea " MEMLEA(0x8,0) ",%0 \n" - "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats - "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X - "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X - "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X - "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X - "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X * X - "vcvttps2dq %%ymm0,%%ymm0 \n" - "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n" - "vpermq $0xd8,%%ymm0,%%ymm0 \n" - "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n" - "sub $0x2,%2 \n" - "vmovq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "jg 1b \n" - "vzeroupper \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(width) // %2 - : "r"(poly) // %3 - : "memory", "cc" -#if defined(__SSE2__) -// TODO(fbarchard): declare ymm usage when applicable. - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} -#endif // HAS_ARGBPOLYNOMIALROW_AVX2 - -#ifdef HAS_ARGBCOLORTABLEROW_X86 -// Tranform ARGB pixels with color table. -void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, - int width) { - uintptr_t pixel_temp = 0u; - asm volatile ( - // 1 pixel loop. - LABELALIGN - "1: \n" - "movzb " MEMACCESS(0) ",%1 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x4,0) " \n" - "movzb " MEMACCESS2(-0x3,0) ",%1 \n" - MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x3,0) " \n" - "movzb " MEMACCESS2(-0x2,0) ",%1 \n" - MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x2,0) " \n" - "movzb " MEMACCESS2(-0x1,0) ",%1 \n" - MEMOPARG(movzb,0x03,3,1,4,1) " \n" // movzb 0x3(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x1,0) " \n" - "dec %2 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "+d"(pixel_temp), // %1 - "+r"(width) // %2 - : "r"(table_argb) // %3 - : "memory", "cc"); -} -#endif // HAS_ARGBCOLORTABLEROW_X86 - -#ifdef HAS_RGBCOLORTABLEROW_X86 -// Tranform RGB pixels with color table. -void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { - uintptr_t pixel_temp = 0u; - asm volatile ( - // 1 pixel loop. - LABELALIGN - "1: \n" - "movzb " MEMACCESS(0) ",%1 \n" - "lea " MEMLEA(0x4,0) ",%0 \n" - MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x4,0) " \n" - "movzb " MEMACCESS2(-0x3,0) ",%1 \n" - MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x3,0) " \n" - "movzb " MEMACCESS2(-0x2,0) ",%1 \n" - MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1 - "mov %b1," MEMACCESS2(-0x2,0) " \n" - "dec %2 \n" - "jg 1b \n" - : "+r"(dst_argb), // %0 - "+d"(pixel_temp), // %1 - "+r"(width) // %2 - : "r"(table_argb) // %3 - : "memory", "cc"); -} -#endif // HAS_RGBCOLORTABLEROW_X86 - -#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 -// Tranform RGB pixels with luma table. -void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - int width, - const uint8* luma, uint32 lumacoeff) { - uintptr_t pixel_temp = 0u; - uintptr_t table_temp = 0u; - asm volatile ( - "movd %6,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" - "pcmpeqb %%xmm4,%%xmm4 \n" - "psllw $0x8,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" - - // 4 pixel loop. - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(2) ",%%xmm0 \n" - "pmaddubsw %%xmm3,%%xmm0 \n" - "phaddw %%xmm0,%%xmm0 \n" - "pand %%xmm4,%%xmm0 \n" - "punpcklwd %%xmm5,%%xmm0 \n" - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - - "movzb " MEMACCESS(2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS(3) " \n" - "movzb " MEMACCESS2(0x1,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x1,3) " \n" - "movzb " MEMACCESS2(0x2,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x2,3) " \n" - "movzb " MEMACCESS2(0x3,2) ",%0 \n" - "mov %b0," MEMACCESS2(0x3,3) " \n" - - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - - "movzb " MEMACCESS2(0x4,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x4,3) " \n" - BUNDLEALIGN - "movzb " MEMACCESS2(0x5,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x5,3) " \n" - "movzb " MEMACCESS2(0x6,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x6,3) " \n" - "movzb " MEMACCESS2(0x7,2) ",%0 \n" - "mov %b0," MEMACCESS2(0x7,3) " \n" - - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - "pshufd $0x39,%%xmm0,%%xmm0 \n" - - "movzb " MEMACCESS2(0x8,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x8,3) " \n" - "movzb " MEMACCESS2(0x9,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0x9,3) " \n" - "movzb " MEMACCESS2(0xa,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xa,3) " \n" - "movzb " MEMACCESS2(0xb,2) ",%0 \n" - "mov %b0," MEMACCESS2(0xb,3) " \n" - - "movd %%xmm0,%k1 \n" // 32 bit offset - "add %5,%1 \n" - - "movzb " MEMACCESS2(0xc,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xc,3) " \n" - "movzb " MEMACCESS2(0xd,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xd,3) " \n" - "movzb " MEMACCESS2(0xe,2) ",%0 \n" - MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 - "mov %b0," MEMACCESS2(0xe,3) " \n" - "movzb " MEMACCESS2(0xf,2) ",%0 \n" - "mov %b0," MEMACCESS2(0xf,3) " \n" - "sub $0x4,%4 \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "lea " MEMLEA(0x10,3) ",%3 \n" - "jg 1b \n" - : "+d"(pixel_temp), // %0 - "+a"(table_temp), // %1 - "+r"(src_argb), // %2 - "+r"(dst_argb), // %3 - "+rm"(width) // %4 - : "r"(luma), // %5 - "rm"(lumacoeff) // %6 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm3", "xmm4", "xmm5" -#endif - ); -} -#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 - -#endif // defined(__x86_64__) || defined(__i386__) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_win.cc b/drivers/theoraplayer/src/YUV/libyuv/src/row_win.cc deleted file mode 100755 index f13e4d7ae5..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/row_win.cc +++ /dev/null @@ -1,7284 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for Visual C x86. -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) - -#ifdef HAS_ARGBTOYROW_SSSE3 - -// Constants for ARGB. -static const vec8 kARGBToY = { - 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0 -}; - -// JPeg full range. -static const vec8 kARGBToYJ = { - 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0 -}; - -static const vec8 kARGBToU = { - 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0 -}; - -static const vec8 kARGBToUJ = { - 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0 -}; - -static const vec8 kARGBToV = { - -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -}; - -static const vec8 kARGBToVJ = { - -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0 -}; - -// vpermd for vphaddw + vpackuswb vpermd. -static const lvec32 kPermdARGBToY_AVX = { - 0, 4, 1, 5, 2, 6, 3, 7 -}; - -// vpshufb for vphaddw + vpackuswb packed to shorts. -static const lvec8 kShufARGBToUV_AVX = { - 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, - 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, -}; - -// Constants for BGRA. -static const vec8 kBGRAToY = { - 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13 -}; - -static const vec8 kBGRAToU = { - 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112 -}; - -static const vec8 kBGRAToV = { - 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18 -}; - -// Constants for ABGR. -static const vec8 kABGRToY = { - 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0 -}; - -static const vec8 kABGRToU = { - -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0 -}; - -static const vec8 kABGRToV = { - 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0 -}; - -// Constants for RGBA. -static const vec8 kRGBAToY = { - 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33 -}; - -static const vec8 kRGBAToU = { - 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38 -}; - -static const vec8 kRGBAToV = { - 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112 -}; - -static const uvec8 kAddY16 = { - 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u -}; - -static const vec16 kAddYJ64 = { - 64, 64, 64, 64, 64, 64, 64, 64 -}; - -static const uvec8 kAddUV128 = { - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, - 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u -}; - -static const uvec16 kAddUVJ128 = { - 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u -}; - -// Shuffle table for converting RGB24 to ARGB. -static const uvec8 kShuffleMaskRGB24ToARGB = { - 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u -}; - -// Shuffle table for converting RAW to ARGB. -static const uvec8 kShuffleMaskRAWToARGB = { - 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u -}; - -// Shuffle table for converting ARGB to RGB24. -static const uvec8 kShuffleMaskARGBToRGB24 = { - 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u -}; - -// Shuffle table for converting ARGB to RAW. -static const uvec8 kShuffleMaskARGBToRAW = { - 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u -}; - -// Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 -static const uvec8 kShuffleMaskARGBToRGB24_0 = { - 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u -}; - -// Shuffle table for converting ARGB to RAW. -static const uvec8 kShuffleMaskARGBToRAW_0 = { - 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u -}; - -// Duplicates gray value 3 times and fills in alpha opaque. -__declspec(naked) __declspec(align(16)) -void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { - __asm { - mov eax, [esp + 4] // src_y - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // pix - pcmpeqb xmm5, xmm5 // generate mask 0xff000000 - pslld xmm5, 24 - - align 4 - convertloop: - movq xmm0, qword ptr [eax] - lea eax, [eax + 8] - punpcklbw xmm0, xmm0 - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm0 - punpckhwd xmm1, xmm1 - por xmm0, xmm5 - por xmm1, xmm5 - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, - int pix) { - __asm { - mov eax, [esp + 4] // src_y - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // pix - pcmpeqb xmm5, xmm5 // generate mask 0xff000000 - pslld xmm5, 24 - - align 4 - convertloop: - movq xmm0, qword ptr [eax] - lea eax, [eax + 8] - punpcklbw xmm0, xmm0 - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm0 - punpckhwd xmm1, xmm1 - por xmm0, xmm5 - por xmm1, xmm5 - movdqu [edx], xmm0 - movdqu [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { - __asm { - mov eax, [esp + 4] // src_rgb24 - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // pix - pcmpeqb xmm5, xmm5 // generate mask 0xff000000 - pslld xmm5, 24 - movdqa xmm4, kShuffleMaskRGB24ToARGB - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm3, [eax + 32] - lea eax, [eax + 48] - movdqa xmm2, xmm3 - palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} - pshufb xmm2, xmm4 - por xmm2, xmm5 - palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} - pshufb xmm0, xmm4 - movdqa [edx + 32], xmm2 - por xmm0, xmm5 - pshufb xmm1, xmm4 - movdqa [edx], xmm0 - por xmm1, xmm5 - palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} - pshufb xmm3, xmm4 - movdqa [edx + 16], xmm1 - por xmm3, xmm5 - sub ecx, 16 - movdqa [edx + 48], xmm3 - lea edx, [edx + 64] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, - int pix) { - __asm { - mov eax, [esp + 4] // src_raw - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // pix - pcmpeqb xmm5, xmm5 // generate mask 0xff000000 - pslld xmm5, 24 - movdqa xmm4, kShuffleMaskRAWToARGB - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm3, [eax + 32] - lea eax, [eax + 48] - movdqa xmm2, xmm3 - palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} - pshufb xmm2, xmm4 - por xmm2, xmm5 - palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} - pshufb xmm0, xmm4 - movdqa [edx + 32], xmm2 - por xmm0, xmm5 - pshufb xmm1, xmm4 - movdqa [edx], xmm0 - por xmm1, xmm5 - palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} - pshufb xmm3, xmm4 - movdqa [edx + 16], xmm1 - por xmm3, xmm5 - sub ecx, 16 - movdqa [edx + 48], xmm3 - lea edx, [edx + 64] - jg convertloop - ret - } -} - -// pmul method to replicate bits. -// Math to replicate bits: -// (v << 8) | (v << 3) -// v * 256 + v * 8 -// v * (256 + 8) -// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 -// 20 instructions. -__declspec(naked) __declspec(align(16)) -void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, - int pix) { - __asm { - mov eax, 0x01080108 // generate multiplier to repeat 5 bits - movd xmm5, eax - pshufd xmm5, xmm5, 0 - mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits - movd xmm6, eax - pshufd xmm6, xmm6, 0 - pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red - psllw xmm3, 11 - pcmpeqb xmm4, xmm4 // generate mask 0x07e007e0 for Green - psllw xmm4, 10 - psrlw xmm4, 5 - pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha - psllw xmm7, 8 - - mov eax, [esp + 4] // src_rgb565 - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // pix - sub edx, eax - sub edx, eax - - align 4 - convertloop: - movdqu xmm0, [eax] // fetch 8 pixels of bgr565 - movdqa xmm1, xmm0 - movdqa xmm2, xmm0 - pand xmm1, xmm3 // R in upper 5 bits - psllw xmm2, 11 // B in upper 5 bits - pmulhuw xmm1, xmm5 // * (256 + 8) - pmulhuw xmm2, xmm5 // * (256 + 8) - psllw xmm1, 8 - por xmm1, xmm2 // RB - pand xmm0, xmm4 // G in middle 6 bits - pmulhuw xmm0, xmm6 // << 5 * (256 + 4) - por xmm0, xmm7 // AG - movdqa xmm2, xmm1 - punpcklbw xmm1, xmm0 - punpckhbw xmm2, xmm0 - movdqa [eax * 2 + edx], xmm1 // store 4 pixels of ARGB - movdqa [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB - lea eax, [eax + 16] - sub ecx, 8 - jg convertloop - ret - } -} - -// 24 instructions -__declspec(naked) __declspec(align(16)) -void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, - int pix) { - __asm { - mov eax, 0x01080108 // generate multiplier to repeat 5 bits - movd xmm5, eax - pshufd xmm5, xmm5, 0 - mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits - movd xmm6, eax - pshufd xmm6, xmm6, 0 - pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red - psllw xmm3, 11 - movdqa xmm4, xmm3 // generate mask 0x03e003e0 for Green - psrlw xmm4, 6 - pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha - psllw xmm7, 8 - - mov eax, [esp + 4] // src_argb1555 - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // pix - sub edx, eax - sub edx, eax - - align 4 - convertloop: - movdqu xmm0, [eax] // fetch 8 pixels of 1555 - movdqa xmm1, xmm0 - movdqa xmm2, xmm0 - psllw xmm1, 1 // R in upper 5 bits - psllw xmm2, 11 // B in upper 5 bits - pand xmm1, xmm3 - pmulhuw xmm2, xmm5 // * (256 + 8) - pmulhuw xmm1, xmm5 // * (256 + 8) - psllw xmm1, 8 - por xmm1, xmm2 // RB - movdqa xmm2, xmm0 - pand xmm0, xmm4 // G in middle 5 bits - psraw xmm2, 8 // A - pmulhuw xmm0, xmm6 // << 6 * (256 + 8) - pand xmm2, xmm7 - por xmm0, xmm2 // AG - movdqa xmm2, xmm1 - punpcklbw xmm1, xmm0 - punpckhbw xmm2, xmm0 - movdqa [eax * 2 + edx], xmm1 // store 4 pixels of ARGB - movdqa [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB - lea eax, [eax + 16] - sub ecx, 8 - jg convertloop - ret - } -} - -// 18 instructions. -__declspec(naked) __declspec(align(16)) -void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, - int pix) { - __asm { - mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f - movd xmm4, eax - pshufd xmm4, xmm4, 0 - movdqa xmm5, xmm4 // 0xf0f0f0f0 for high nibbles - pslld xmm5, 4 - mov eax, [esp + 4] // src_argb4444 - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // pix - sub edx, eax - sub edx, eax - - align 4 - convertloop: - movdqu xmm0, [eax] // fetch 8 pixels of bgra4444 - movdqa xmm2, xmm0 - pand xmm0, xmm4 // mask low nibbles - pand xmm2, xmm5 // mask high nibbles - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - psllw xmm1, 4 - psrlw xmm3, 4 - por xmm0, xmm1 - por xmm2, xmm3 - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm2 - punpckhbw xmm1, xmm2 - movdqa [eax * 2 + edx], xmm0 // store 4 pixels of ARGB - movdqa [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB - lea eax, [eax + 16] - sub ecx, 8 - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb - mov ecx, [esp + 12] // pix - movdqa xmm6, kShuffleMaskARGBToRGB24 - - align 4 - convertloop: - movdqu xmm0, [eax] // fetch 16 pixels of argb - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - lea eax, [eax + 64] - pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB - pshufb xmm1, xmm6 - pshufb xmm2, xmm6 - pshufb xmm3, xmm6 - movdqa xmm4, xmm1 // 4 bytes from 1 for 0 - psrldq xmm1, 4 // 8 bytes from 1 - pslldq xmm4, 12 // 4 bytes from 1 for 0 - movdqa xmm5, xmm2 // 8 bytes from 2 for 1 - por xmm0, xmm4 // 4 bytes from 1 for 0 - pslldq xmm5, 8 // 8 bytes from 2 for 1 - movdqu [edx], xmm0 // store 0 - por xmm1, xmm5 // 8 bytes from 2 for 1 - psrldq xmm2, 8 // 4 bytes from 2 - pslldq xmm3, 4 // 12 bytes from 3 for 2 - por xmm2, xmm3 // 12 bytes from 3 for 2 - movdqu [edx + 16], xmm1 // store 1 - movdqu [edx + 32], xmm2 // store 2 - lea edx, [edx + 48] - sub ecx, 16 - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb - mov ecx, [esp + 12] // pix - movdqa xmm6, kShuffleMaskARGBToRAW - - align 4 - convertloop: - movdqu xmm0, [eax] // fetch 16 pixels of argb - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - lea eax, [eax + 64] - pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB - pshufb xmm1, xmm6 - pshufb xmm2, xmm6 - pshufb xmm3, xmm6 - movdqa xmm4, xmm1 // 4 bytes from 1 for 0 - psrldq xmm1, 4 // 8 bytes from 1 - pslldq xmm4, 12 // 4 bytes from 1 for 0 - movdqa xmm5, xmm2 // 8 bytes from 2 for 1 - por xmm0, xmm4 // 4 bytes from 1 for 0 - pslldq xmm5, 8 // 8 bytes from 2 for 1 - movdqu [edx], xmm0 // store 0 - por xmm1, xmm5 // 8 bytes from 2 for 1 - psrldq xmm2, 8 // 4 bytes from 2 - pslldq xmm3, 4 // 12 bytes from 3 for 2 - por xmm2, xmm3 // 12 bytes from 3 for 2 - movdqu [edx + 16], xmm1 // store 1 - movdqu [edx + 32], xmm2 // store 2 - lea edx, [edx + 48] - sub ecx, 16 - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb - mov ecx, [esp + 12] // pix - pcmpeqb xmm3, xmm3 // generate mask 0x0000001f - psrld xmm3, 27 - pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 - psrld xmm4, 26 - pslld xmm4, 5 - pcmpeqb xmm5, xmm5 // generate mask 0xfffff800 - pslld xmm5, 11 - - align 4 - convertloop: - movdqa xmm0, [eax] // fetch 4 pixels of argb - movdqa xmm1, xmm0 // B - movdqa xmm2, xmm0 // G - pslld xmm0, 8 // R - psrld xmm1, 3 // B - psrld xmm2, 5 // G - psrad xmm0, 16 // R - pand xmm1, xmm3 // B - pand xmm2, xmm4 // G - pand xmm0, xmm5 // R - por xmm1, xmm2 // BG - por xmm0, xmm1 // BGR - packssdw xmm0, xmm0 - lea eax, [eax + 16] - movq qword ptr [edx], xmm0 // store 4 pixels of RGB565 - lea edx, [edx + 8] - sub ecx, 4 - jg convertloop - ret - } -} - -// TODO(fbarchard): Improve sign extension/packing. -__declspec(naked) __declspec(align(16)) -void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb - mov ecx, [esp + 12] // pix - pcmpeqb xmm4, xmm4 // generate mask 0x0000001f - psrld xmm4, 27 - movdqa xmm5, xmm4 // generate mask 0x000003e0 - pslld xmm5, 5 - movdqa xmm6, xmm4 // generate mask 0x00007c00 - pslld xmm6, 10 - pcmpeqb xmm7, xmm7 // generate mask 0xffff8000 - pslld xmm7, 15 - - align 4 - convertloop: - movdqa xmm0, [eax] // fetch 4 pixels of argb - movdqa xmm1, xmm0 // B - movdqa xmm2, xmm0 // G - movdqa xmm3, xmm0 // R - psrad xmm0, 16 // A - psrld xmm1, 3 // B - psrld xmm2, 6 // G - psrld xmm3, 9 // R - pand xmm0, xmm7 // A - pand xmm1, xmm4 // B - pand xmm2, xmm5 // G - pand xmm3, xmm6 // R - por xmm0, xmm1 // BA - por xmm2, xmm3 // GR - por xmm0, xmm2 // BGRA - packssdw xmm0, xmm0 - lea eax, [eax + 16] - movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555 - lea edx, [edx + 8] - sub ecx, 4 - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_rgb - mov ecx, [esp + 12] // pix - pcmpeqb xmm4, xmm4 // generate mask 0xf000f000 - psllw xmm4, 12 - movdqa xmm3, xmm4 // generate mask 0x00f000f0 - psrlw xmm3, 8 - - align 4 - convertloop: - movdqa xmm0, [eax] // fetch 4 pixels of argb - movdqa xmm1, xmm0 - pand xmm0, xmm3 // low nibble - pand xmm1, xmm4 // high nibble - psrl xmm0, 4 - psrl xmm1, 8 - por xmm0, xmm1 - packuswb xmm0, xmm0 - lea eax, [eax + 16] - movq qword ptr [edx], xmm0 // store 4 pixels of ARGB4444 - lea edx, [edx + 8] - sub ecx, 4 - jg convertloop - ret - } -} - -// Convert 16 ARGB pixels (64 bytes) to 16 Y values. -__declspec(naked) __declspec(align(16)) -void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - movdqa xmm5, kAddY16 - movdqa xmm4, kARGBToY - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - pmaddubsw xmm2, xmm4 - pmaddubsw xmm3, xmm4 - lea eax, [eax + 64] - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psrlw xmm0, 7 - psrlw xmm2, 7 - packuswb xmm0, xmm2 - paddb xmm0, xmm5 - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -// Convert 16 ARGB pixels (64 bytes) to 16 Y values. -__declspec(naked) __declspec(align(16)) -void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - movdqa xmm4, kARGBToYJ - movdqa xmm5, kAddYJ64 - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - pmaddubsw xmm2, xmm4 - pmaddubsw xmm3, xmm4 - lea eax, [eax + 64] - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - paddw xmm0, xmm5 // Add .5 for rounding. - paddw xmm2, xmm5 - psrlw xmm0, 7 - psrlw xmm2, 7 - packuswb xmm0, xmm2 - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -#ifdef HAS_ARGBTOYROW_AVX2 -// Convert 32 ARGB pixels (128 bytes) to 32 Y values. -__declspec(naked) __declspec(align(32)) -void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - vbroadcastf128 ymm4, kARGBToY - vbroadcastf128 ymm5, kAddY16 - vmovdqa ymm6, kPermdARGBToY_AVX - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - vmovdqu ymm2, [eax + 64] - vmovdqu ymm3, [eax + 96] - vpmaddubsw ymm0, ymm0, ymm4 - vpmaddubsw ymm1, ymm1, ymm4 - vpmaddubsw ymm2, ymm2, ymm4 - vpmaddubsw ymm3, ymm3, ymm4 - lea eax, [eax + 128] - vphaddw ymm0, ymm0, ymm1 // mutates. - vphaddw ymm2, ymm2, ymm3 - vpsrlw ymm0, ymm0, 7 - vpsrlw ymm2, ymm2, 7 - vpackuswb ymm0, ymm0, ymm2 // mutates. - vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation. - vpaddb ymm0, ymm0, ymm5 - sub ecx, 32 - vmovdqu [edx], ymm0 - lea edx, [edx + 32] - jg convertloop - vzeroupper - ret - } -} -#endif // HAS_ARGBTOYROW_AVX2 - -#ifdef HAS_ARGBTOYROW_AVX2 -// Convert 32 ARGB pixels (128 bytes) to 32 Y values. -__declspec(naked) __declspec(align(32)) -void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - vbroadcastf128 ymm4, kARGBToYJ - vbroadcastf128 ymm5, kAddYJ64 - vmovdqa ymm6, kPermdARGBToY_AVX - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - vmovdqu ymm2, [eax + 64] - vmovdqu ymm3, [eax + 96] - vpmaddubsw ymm0, ymm0, ymm4 - vpmaddubsw ymm1, ymm1, ymm4 - vpmaddubsw ymm2, ymm2, ymm4 - vpmaddubsw ymm3, ymm3, ymm4 - lea eax, [eax + 128] - vphaddw ymm0, ymm0, ymm1 // mutates. - vphaddw ymm2, ymm2, ymm3 - vpaddw ymm0, ymm0, ymm5 // Add .5 for rounding. - vpaddw ymm2, ymm2, ymm5 - vpsrlw ymm0, ymm0, 7 - vpsrlw ymm2, ymm2, 7 - vpackuswb ymm0, ymm0, ymm2 // mutates. - vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation. - sub ecx, 32 - vmovdqu [edx], ymm0 - lea edx, [edx + 32] - jg convertloop - - vzeroupper - ret - } -} -#endif // HAS_ARGBTOYJROW_AVX2 - -__declspec(naked) __declspec(align(16)) -void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - movdqa xmm5, kAddY16 - movdqa xmm4, kARGBToY - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - pmaddubsw xmm2, xmm4 - pmaddubsw xmm3, xmm4 - lea eax, [eax + 64] - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psrlw xmm0, 7 - psrlw xmm2, 7 - packuswb xmm0, xmm2 - paddb xmm0, xmm5 - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - movdqa xmm4, kARGBToYJ - movdqa xmm5, kAddYJ64 - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - pmaddubsw xmm2, xmm4 - pmaddubsw xmm3, xmm4 - lea eax, [eax + 64] - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - paddw xmm0, xmm5 - paddw xmm2, xmm5 - psrlw xmm0, 7 - psrlw xmm2, 7 - packuswb xmm0, xmm2 - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - movdqa xmm5, kAddY16 - movdqa xmm4, kBGRAToY - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - pmaddubsw xmm2, xmm4 - pmaddubsw xmm3, xmm4 - lea eax, [eax + 64] - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psrlw xmm0, 7 - psrlw xmm2, 7 - packuswb xmm0, xmm2 - paddb xmm0, xmm5 - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void BGRAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - movdqa xmm5, kAddY16 - movdqa xmm4, kBGRAToY - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - pmaddubsw xmm2, xmm4 - pmaddubsw xmm3, xmm4 - lea eax, [eax + 64] - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psrlw xmm0, 7 - psrlw xmm2, 7 - packuswb xmm0, xmm2 - paddb xmm0, xmm5 - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - movdqa xmm5, kAddY16 - movdqa xmm4, kABGRToY - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - pmaddubsw xmm2, xmm4 - pmaddubsw xmm3, xmm4 - lea eax, [eax + 64] - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psrlw xmm0, 7 - psrlw xmm2, 7 - packuswb xmm0, xmm2 - paddb xmm0, xmm5 - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ABGRToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - movdqa xmm5, kAddY16 - movdqa xmm4, kABGRToY - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - pmaddubsw xmm2, xmm4 - pmaddubsw xmm3, xmm4 - lea eax, [eax + 64] - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psrlw xmm0, 7 - psrlw xmm2, 7 - packuswb xmm0, xmm2 - paddb xmm0, xmm5 - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - movdqa xmm5, kAddY16 - movdqa xmm4, kRGBAToY - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - pmaddubsw xmm2, xmm4 - pmaddubsw xmm3, xmm4 - lea eax, [eax + 64] - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psrlw xmm0, 7 - psrlw xmm2, 7 - packuswb xmm0, xmm2 - paddb xmm0, xmm5 - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void RGBAToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_y */ - mov ecx, [esp + 12] /* pix */ - movdqa xmm5, kAddY16 - movdqa xmm4, kRGBAToY - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - pmaddubsw xmm2, xmm4 - pmaddubsw xmm3, xmm4 - lea eax, [eax + 64] - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psrlw xmm0, 7 - psrlw xmm2, 7 - packuswb xmm0, xmm2 - paddb xmm0, xmm5 - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - movdqa xmm7, kARGBToU - movdqa xmm6, kARGBToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pavgb xmm0, [eax + esi] - pavgb xmm1, [eax + esi + 16] - pavgb xmm2, [eax + esi + 32] - pavgb xmm3, [eax + esi + 48] - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - movdqa xmm7, kARGBToUJ - movdqa xmm6, kARGBToVJ - movdqa xmm5, kAddUVJ128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pavgb xmm0, [eax + esi] - pavgb xmm1, [eax + esi + 16] - pavgb xmm2, [eax + esi + 32] - pavgb xmm3, [eax + esi + 48] - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - paddw xmm0, xmm5 // +.5 rounding -> unsigned - paddw xmm1, xmm5 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - pop esi - ret - } -} - -#ifdef HAS_ARGBTOUVROW_AVX2 -__declspec(naked) __declspec(align(32)) -void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - vbroadcastf128 ymm5, kAddUV128 - vbroadcastf128 ymm6, kARGBToV - vbroadcastf128 ymm7, kARGBToU - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 32x2 argb pixels to 16x1 */ - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - vmovdqu ymm2, [eax + 64] - vmovdqu ymm3, [eax + 96] - vpavgb ymm0, ymm0, [eax + esi] - vpavgb ymm1, ymm1, [eax + esi + 32] - vpavgb ymm2, ymm2, [eax + esi + 64] - vpavgb ymm3, ymm3, [eax + esi + 96] - lea eax, [eax + 128] - vshufps ymm4, ymm0, ymm1, 0x88 - vshufps ymm0, ymm0, ymm1, 0xdd - vpavgb ymm0, ymm0, ymm4 // mutated by vshufps - vshufps ymm4, ymm2, ymm3, 0x88 - vshufps ymm2, ymm2, ymm3, 0xdd - vpavgb ymm2, ymm2, ymm4 // mutated by vshufps - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 32 different pixels, its 16 pixels of U and 16 of V - vpmaddubsw ymm1, ymm0, ymm7 // U - vpmaddubsw ymm3, ymm2, ymm7 - vpmaddubsw ymm0, ymm0, ymm6 // V - vpmaddubsw ymm2, ymm2, ymm6 - vphaddw ymm1, ymm1, ymm3 // mutates - vphaddw ymm0, ymm0, ymm2 - vpsraw ymm1, ymm1, 8 - vpsraw ymm0, ymm0, 8 - vpacksswb ymm0, ymm1, ymm0 // mutates - vpermq ymm0, ymm0, 0xd8 // For vpacksswb - vpshufb ymm0, ymm0, kShufARGBToUV_AVX // For vshufps + vphaddw - vpaddb ymm0, ymm0, ymm5 // -> unsigned - - // step 3 - store 16 U and 16 V values - sub ecx, 32 - vextractf128 [edx], ymm0, 0 // U - vextractf128 [edx + edi], ymm0, 1 // V - lea edx, [edx + 16] - jg convertloop - - pop edi - pop esi - vzeroupper - ret - } -} -#endif // HAS_ARGBTOUVROW_AVX2 - -__declspec(naked) __declspec(align(16)) -void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - movdqa xmm7, kARGBToU - movdqa xmm6, kARGBToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - movdqu xmm4, [eax + esi] - pavgb xmm0, xmm4 - movdqu xmm4, [eax + esi + 16] - pavgb xmm1, xmm4 - movdqu xmm4, [eax + esi + 32] - pavgb xmm2, xmm4 - movdqu xmm4, [eax + esi + 48] - pavgb xmm3, xmm4 - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - movdqa xmm7, kARGBToUJ - movdqa xmm6, kARGBToVJ - movdqa xmm5, kAddUVJ128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - movdqu xmm4, [eax + esi] - pavgb xmm0, xmm4 - movdqu xmm4, [eax + esi + 16] - pavgb xmm1, xmm4 - movdqu xmm4, [eax + esi + 32] - pavgb xmm2, xmm4 - movdqu xmm4, [eax + esi + 48] - pavgb xmm3, xmm4 - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - paddw xmm0, xmm5 // +.5 rounding -> unsigned - paddw xmm1, xmm5 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToUV444Row_SSSE3(const uint8* src_argb0, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_argb - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - movdqa xmm7, kARGBToU - movdqa xmm6, kARGBToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* convert to U and V */ - movdqa xmm0, [eax] // U - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pmaddubsw xmm0, xmm7 - pmaddubsw xmm1, xmm7 - pmaddubsw xmm2, xmm7 - pmaddubsw xmm3, xmm7 - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psraw xmm0, 8 - psraw xmm2, 8 - packsswb xmm0, xmm2 - paddb xmm0, xmm5 - sub ecx, 16 - movdqa [edx], xmm0 - - movdqa xmm0, [eax] // V - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pmaddubsw xmm0, xmm6 - pmaddubsw xmm1, xmm6 - pmaddubsw xmm2, xmm6 - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psraw xmm0, 8 - psraw xmm2, 8 - packsswb xmm0, xmm2 - paddb xmm0, xmm5 - lea eax, [eax + 64] - movdqa [edx + edi], xmm0 - lea edx, [edx + 16] - jg convertloop - - pop edi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb0, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_argb - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - movdqa xmm7, kARGBToU - movdqa xmm6, kARGBToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* convert to U and V */ - movdqu xmm0, [eax] // U - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - pmaddubsw xmm0, xmm7 - pmaddubsw xmm1, xmm7 - pmaddubsw xmm2, xmm7 - pmaddubsw xmm3, xmm7 - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psraw xmm0, 8 - psraw xmm2, 8 - packsswb xmm0, xmm2 - paddb xmm0, xmm5 - sub ecx, 16 - movdqu [edx], xmm0 - - movdqu xmm0, [eax] // V - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - pmaddubsw xmm0, xmm6 - pmaddubsw xmm1, xmm6 - pmaddubsw xmm2, xmm6 - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm1 - phaddw xmm2, xmm3 - psraw xmm0, 8 - psraw xmm2, 8 - packsswb xmm0, xmm2 - paddb xmm0, xmm5 - lea eax, [eax + 64] - movdqu [edx + edi], xmm0 - lea edx, [edx + 16] - jg convertloop - - pop edi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToUV422Row_SSSE3(const uint8* src_argb0, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_argb - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - movdqa xmm7, kARGBToU - movdqa xmm6, kARGBToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb0, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_argb - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - movdqa xmm7, kARGBToU - movdqa xmm6, kARGBToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - movdqa xmm7, kBGRAToU - movdqa xmm6, kBGRAToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pavgb xmm0, [eax + esi] - pavgb xmm1, [eax + esi + 16] - pavgb xmm2, [eax + esi + 32] - pavgb xmm3, [eax + esi + 48] - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - movdqa xmm7, kBGRAToU - movdqa xmm6, kBGRAToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - movdqu xmm4, [eax + esi] - pavgb xmm0, xmm4 - movdqu xmm4, [eax + esi + 16] - pavgb xmm1, xmm4 - movdqu xmm4, [eax + esi + 32] - pavgb xmm2, xmm4 - movdqu xmm4, [eax + esi + 48] - pavgb xmm3, xmm4 - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - movdqa xmm7, kABGRToU - movdqa xmm6, kABGRToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pavgb xmm0, [eax + esi] - pavgb xmm1, [eax + esi + 16] - pavgb xmm2, [eax + esi + 32] - pavgb xmm3, [eax + esi + 48] - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - movdqa xmm7, kABGRToU - movdqa xmm6, kABGRToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - movdqu xmm4, [eax + esi] - pavgb xmm0, xmm4 - movdqu xmm4, [eax + esi + 16] - pavgb xmm1, xmm4 - movdqu xmm4, [eax + esi + 32] - pavgb xmm2, xmm4 - movdqu xmm4, [eax + esi + 48] - pavgb xmm3, xmm4 - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - movdqa xmm7, kRGBAToU - movdqa xmm6, kRGBAToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - pavgb xmm0, [eax + esi] - pavgb xmm1, [eax + esi + 16] - pavgb xmm2, [eax + esi + 32] - pavgb xmm3, [eax + esi + 48] - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb, - uint8* dst_u, uint8* dst_v, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb - mov esi, [esp + 8 + 8] // src_stride_argb - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - movdqa xmm7, kRGBAToU - movdqa xmm6, kRGBAToV - movdqa xmm5, kAddUV128 - sub edi, edx // stride from u to v - - align 4 - convertloop: - /* step 1 - subsample 16x2 argb pixels to 8x1 */ - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + 32] - movdqu xmm3, [eax + 48] - movdqu xmm4, [eax + esi] - pavgb xmm0, xmm4 - movdqu xmm4, [eax + esi + 16] - pavgb xmm1, xmm4 - movdqu xmm4, [eax + esi + 32] - pavgb xmm2, xmm4 - movdqu xmm4, [eax + esi + 48] - pavgb xmm3, xmm4 - lea eax, [eax + 64] - movdqa xmm4, xmm0 - shufps xmm0, xmm1, 0x88 - shufps xmm4, xmm1, 0xdd - pavgb xmm0, xmm4 - movdqa xmm4, xmm2 - shufps xmm2, xmm3, 0x88 - shufps xmm4, xmm3, 0xdd - pavgb xmm2, xmm4 - - // step 2 - convert to U and V - // from here down is very similar to Y code except - // instead of 16 different pixels, its 8 pixels of U and 8 of V - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - pmaddubsw xmm0, xmm7 // U - pmaddubsw xmm2, xmm7 - pmaddubsw xmm1, xmm6 // V - pmaddubsw xmm3, xmm6 - phaddw xmm0, xmm2 - phaddw xmm1, xmm3 - psraw xmm0, 8 - psraw xmm1, 8 - packsswb xmm0, xmm1 - paddb xmm0, xmm5 // -> unsigned - - // step 3 - store 8 U and 8 V values - sub ecx, 16 - movlps qword ptr [edx], xmm0 // U - movhps qword ptr [edx + edi], xmm0 // V - lea edx, [edx + 8] - jg convertloop - - pop edi - pop esi - ret - } -} -#endif // HAS_ARGBTOYROW_SSSE3 - -#define YG 74 /* (int8)(1.164 * 64 + 0.5) */ - -#define UB 127 /* min(63,(int8)(2.018 * 64)) */ -#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */ -#define UR 0 - -#define VB 0 -#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */ -#define VR 102 /* (int8)(1.596 * 64 + 0.5) */ - -// Bias -#define BB UB * 128 + VB * 128 -#define BG UG * 128 + VG * 128 -#define BR UR * 128 + VR * 128 - -#ifdef HAS_I422TOARGBROW_AVX2 - -static const lvec8 kUVToB_AVX = { - UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, - UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB -}; -static const lvec8 kUVToR_AVX = { - UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, - UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR -}; -static const lvec8 kUVToG_AVX = { - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG -}; -static const lvec16 kYToRgb_AVX = { - YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG -}; -static const lvec16 kYSub16_AVX = { - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 -}; -static const lvec16 kUVBiasB_AVX = { - BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB -}; -static const lvec16 kUVBiasG_AVX = { - BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG -}; -static const lvec16 kUVBiasR_AVX = { - BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR -}; - -// 16 pixels -// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). -__declspec(naked) __declspec(align(16)) -void I422ToARGBRow_AVX2(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width - sub edi, esi - vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha - vpxor ymm4, ymm4, ymm4 - - align 4 - convertloop: - vmovq xmm0, qword ptr [esi] // U - vmovq xmm1, qword ptr [esi + edi] // V - lea esi, [esi + 8] - vpunpcklbw ymm0, ymm0, ymm1 // UV - vpermq ymm0, ymm0, 0xd8 - vpunpcklwd ymm0, ymm0, ymm0 // UVUV - vpmaddubsw ymm2, ymm0, kUVToB_AVX // scale B UV - vpmaddubsw ymm1, ymm0, kUVToG_AVX // scale G UV - vpmaddubsw ymm0, ymm0, kUVToR_AVX // scale R UV - vpsubw ymm2, ymm2, kUVBiasB_AVX // unbias back to signed - vpsubw ymm1, ymm1, kUVBiasG_AVX - vpsubw ymm0, ymm0, kUVBiasR_AVX - - // Step 2: Find Y contribution to 16 R,G,B values - vmovdqu xmm3, [eax] // NOLINT - lea eax, [eax + 16] - vpermq ymm3, ymm3, 0xd8 - vpunpcklbw ymm3, ymm3, ymm4 - vpsubsw ymm3, ymm3, kYSub16_AVX - vpmullw ymm3, ymm3, kYToRgb_AVX - vpaddsw ymm2, ymm2, ymm3 // B += Y - vpaddsw ymm1, ymm1, ymm3 // G += Y - vpaddsw ymm0, ymm0, ymm3 // R += Y - vpsraw ymm2, ymm2, 6 - vpsraw ymm1, ymm1, 6 - vpsraw ymm0, ymm0, 6 - vpackuswb ymm2, ymm2, ymm2 // B - vpackuswb ymm1, ymm1, ymm1 // G - vpackuswb ymm0, ymm0, ymm0 // R - - // Step 3: Weave into ARGB - vpunpcklbw ymm2, ymm2, ymm1 // BG - vpermq ymm2, ymm2, 0xd8 - vpunpcklbw ymm0, ymm0, ymm5 // RA - vpermq ymm0, ymm0, 0xd8 - vpunpcklwd ymm1, ymm2, ymm0 // BGRA first 8 pixels - vpunpckhwd ymm2, ymm2, ymm0 // BGRA next 8 pixels - vmovdqu [edx], ymm1 - vmovdqu [edx + 32], ymm2 - lea edx, [edx + 64] - sub ecx, 16 - jg convertloop - vzeroupper - - pop edi - pop esi - ret - } -} -#endif // HAS_I422TOARGBROW_AVX2 - -#ifdef HAS_I422TOARGBROW_SSSE3 - -static const vec8 kUVToB = { - UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB -}; - -static const vec8 kUVToR = { - UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR -}; - -static const vec8 kUVToG = { - UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG -}; - -static const vec8 kVUToB = { - VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, -}; - -static const vec8 kVUToR = { - VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, -}; - -static const vec8 kVUToG = { - VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, -}; - -static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG }; -static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 }; -static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB }; -static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG }; -static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR }; - -// TODO(fbarchard): Read that does half size on Y and treats 420 as 444. - -// Read 8 UV from 444. -#define READYUV444 __asm { \ - __asm movq xmm0, qword ptr [esi] /* U */ /* NOLINT */ \ - __asm movq xmm1, qword ptr [esi + edi] /* V */ /* NOLINT */ \ - __asm lea esi, [esi + 8] \ - __asm punpcklbw xmm0, xmm1 /* UV */ \ - } - -// Read 4 UV from 422, upsample to 8 UV. -#define READYUV422 __asm { \ - __asm movd xmm0, [esi] /* U */ \ - __asm movd xmm1, [esi + edi] /* V */ \ - __asm lea esi, [esi + 4] \ - __asm punpcklbw xmm0, xmm1 /* UV */ \ - __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ - } - -// Read 2 UV from 411, upsample to 8 UV. -#define READYUV411 __asm { \ - __asm movzx ebx, word ptr [esi] /* U */ /* NOLINT */ \ - __asm movd xmm0, ebx \ - __asm movzx ebx, word ptr [esi + edi] /* V */ /* NOLINT */ \ - __asm movd xmm1, ebx \ - __asm lea esi, [esi + 2] \ - __asm punpcklbw xmm0, xmm1 /* UV */ \ - __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ - __asm punpckldq xmm0, xmm0 /* UVUV (upsample) */ \ - } - -// Read 4 UV from NV12, upsample to 8 UV. -#define READNV12 __asm { \ - __asm movq xmm0, qword ptr [esi] /* UV */ /* NOLINT */ \ - __asm lea esi, [esi + 8] \ - __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ - } - -// Convert 8 pixels: 8 UV and 8 Y. -#define YUVTORGB __asm { \ - /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ - __asm movdqa xmm1, xmm0 \ - __asm movdqa xmm2, xmm0 \ - __asm pmaddubsw xmm0, kUVToB /* scale B UV */ \ - __asm pmaddubsw xmm1, kUVToG /* scale G UV */ \ - __asm pmaddubsw xmm2, kUVToR /* scale R UV */ \ - __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \ - __asm psubw xmm1, kUVBiasG \ - __asm psubw xmm2, kUVBiasR \ - /* Step 2: Find Y contribution to 8 R,G,B values */ \ - __asm movq xmm3, qword ptr [eax] /* NOLINT */ \ - __asm lea eax, [eax + 8] \ - __asm punpcklbw xmm3, xmm4 \ - __asm psubsw xmm3, kYSub16 \ - __asm pmullw xmm3, kYToRgb \ - __asm paddsw xmm0, xmm3 /* B += Y */ \ - __asm paddsw xmm1, xmm3 /* G += Y */ \ - __asm paddsw xmm2, xmm3 /* R += Y */ \ - __asm psraw xmm0, 6 \ - __asm psraw xmm1, 6 \ - __asm psraw xmm2, 6 \ - __asm packuswb xmm0, xmm0 /* B */ \ - __asm packuswb xmm1, xmm1 /* G */ \ - __asm packuswb xmm2, xmm2 /* R */ \ - } - -// Convert 8 pixels: 8 VU and 8 Y. -#define YVUTORGB __asm { \ - /* Step 1: Find 4 UV contributions to 8 R,G,B values */ \ - __asm movdqa xmm1, xmm0 \ - __asm movdqa xmm2, xmm0 \ - __asm pmaddubsw xmm0, kVUToB /* scale B UV */ \ - __asm pmaddubsw xmm1, kVUToG /* scale G UV */ \ - __asm pmaddubsw xmm2, kVUToR /* scale R UV */ \ - __asm psubw xmm0, kUVBiasB /* unbias back to signed */ \ - __asm psubw xmm1, kUVBiasG \ - __asm psubw xmm2, kUVBiasR \ - /* Step 2: Find Y contribution to 8 R,G,B values */ \ - __asm movq xmm3, qword ptr [eax] /* NOLINT */ \ - __asm lea eax, [eax + 8] \ - __asm punpcklbw xmm3, xmm4 \ - __asm psubsw xmm3, kYSub16 \ - __asm pmullw xmm3, kYToRgb \ - __asm paddsw xmm0, xmm3 /* B += Y */ \ - __asm paddsw xmm1, xmm3 /* G += Y */ \ - __asm paddsw xmm2, xmm3 /* R += Y */ \ - __asm psraw xmm0, 6 \ - __asm psraw xmm1, 6 \ - __asm psraw xmm2, 6 \ - __asm packuswb xmm0, xmm0 /* B */ \ - __asm packuswb xmm1, xmm1 /* G */ \ - __asm packuswb xmm2, xmm2 /* R */ \ - } - -// 8 pixels, dest aligned 16. -// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void I444ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV444 - YUVTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm5 // RA - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRA first 4 pixels - punpckhwd xmm1, xmm2 // BGRA next 4 pixels - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -// 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void I422ToRGB24Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgb24, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // rgb24 - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pxor xmm4, xmm4 - movdqa xmm5, kShuffleMaskARGBToRGB24_0 - movdqa xmm6, kShuffleMaskARGBToRGB24 - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into RRGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm2 // RR - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRR first 4 pixels - punpckhwd xmm1, xmm2 // BGRR next 4 pixels - pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes. - pshufb xmm1, xmm6 // Pack into first 12 bytes. - palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1 - movq qword ptr [edx], xmm0 // First 8 bytes - movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels. - lea edx, [edx + 24] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -// 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void I422ToRAWRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_raw, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // raw - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pxor xmm4, xmm4 - movdqa xmm5, kShuffleMaskARGBToRAW_0 - movdqa xmm6, kShuffleMaskARGBToRAW - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into RRGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm2 // RR - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRR first 4 pixels - punpckhwd xmm1, xmm2 // BGRR next 4 pixels - pshufb xmm0, xmm5 // Pack into first 8 and last 4 bytes. - pshufb xmm1, xmm6 // Pack into first 12 bytes. - palignr xmm1, xmm0, 12 // last 4 bytes of xmm0 + 12 from xmm1 - movq qword ptr [edx], xmm0 // First 8 bytes - movdqu [edx + 8], xmm1 // Last 16 bytes. = 24 bytes, 8 RGB pixels. - lea edx, [edx + 24] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -// 8 pixels, dest unaligned. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void I422ToRGB565Row_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb565_buf, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // rgb565 - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pxor xmm4, xmm4 - pcmpeqb xmm5, xmm5 // generate mask 0x0000001f - psrld xmm5, 27 - pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 - psrld xmm6, 26 - pslld xmm6, 5 - pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 - pslld xmm7, 11 - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into RRGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm2 // RR - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRR first 4 pixels - punpckhwd xmm1, xmm2 // BGRR next 4 pixels - - // Step 3b: RRGB -> RGB565 - movdqa xmm3, xmm0 // B first 4 pixels of argb - movdqa xmm2, xmm0 // G - pslld xmm0, 8 // R - psrld xmm3, 3 // B - psrld xmm2, 5 // G - psrad xmm0, 16 // R - pand xmm3, xmm5 // B - pand xmm2, xmm6 // G - pand xmm0, xmm7 // R - por xmm3, xmm2 // BG - por xmm0, xmm3 // BGR - movdqa xmm3, xmm1 // B next 4 pixels of argb - movdqa xmm2, xmm1 // G - pslld xmm1, 8 // R - psrld xmm3, 3 // B - psrld xmm2, 5 // G - psrad xmm1, 16 // R - pand xmm3, xmm5 // B - pand xmm2, xmm6 // G - pand xmm1, xmm7 // R - por xmm3, xmm2 // BG - por xmm1, xmm3 // BGR - packssdw xmm0, xmm1 - sub ecx, 8 - movdqu [edx], xmm0 // store 8 pixels of RGB565 - lea edx, [edx + 16] - jg convertloop - - pop edi - pop esi - ret - } -} - -// 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void I422ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm5 // RA - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRA first 4 pixels - punpckhwd xmm1, xmm2 // BGRA next 4 pixels - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -// 8 pixels, dest aligned 16. -// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -// Similar to I420 but duplicate UV once more. -__declspec(naked) __declspec(align(16)) -void I411ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - __asm { - push ebx - push esi - push edi - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U - mov edi, [esp + 12 + 12] // V - mov edx, [esp + 12 + 16] // argb - mov ecx, [esp + 12 + 20] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV411 // modifies EBX - YUVTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm5 // RA - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRA first 4 pixels - punpckhwd xmm1, xmm2 // BGRA next 4 pixels - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - pop ebx - ret - } -} - -// 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void NV12ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // Y - mov esi, [esp + 4 + 8] // UV - mov edx, [esp + 4 + 12] // argb - mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READNV12 - YUVTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm5 // RA - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRA first 4 pixels - punpckhwd xmm1, xmm2 // BGRA next 4 pixels - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop esi - ret - } -} - -// 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void NV21ToARGBRow_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // Y - mov esi, [esp + 4 + 8] // VU - mov edx, [esp + 4 + 12] // argb - mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READNV12 - YVUTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm5 // RA - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRA first 4 pixels - punpckhwd xmm1, xmm2 // BGRA next 4 pixels - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop esi - ret - } -} - -// 8 pixels, unaligned. -// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void I444ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV444 - YUVTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm5 // RA - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRA first 4 pixels - punpckhwd xmm1, xmm2 // BGRA next 4 pixels - movdqu [edx], xmm0 - movdqu [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -// 8 pixels, unaligned. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // argb - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm5 // RA - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRA first 4 pixels - punpckhwd xmm1, xmm2 // BGRA next 4 pixels - movdqu [edx], xmm0 - movdqu [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -// 8 pixels, unaligned. -// 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -// Similar to I420 but duplicate UV once more. -__declspec(naked) __declspec(align(16)) -void I411ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_argb, - int width) { - __asm { - push ebx - push esi - push edi - mov eax, [esp + 12 + 4] // Y - mov esi, [esp + 12 + 8] // U - mov edi, [esp + 12 + 12] // V - mov edx, [esp + 12 + 16] // argb - mov ecx, [esp + 12 + 20] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV411 // modifies EBX - YUVTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm5 // RA - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRA first 4 pixels - punpckhwd xmm1, xmm2 // BGRA next 4 pixels - movdqu [edx], xmm0 - movdqu [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - pop ebx - ret - } -} - -// 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void NV12ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // Y - mov esi, [esp + 4 + 8] // UV - mov edx, [esp + 4 + 12] // argb - mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READNV12 - YUVTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm5 // RA - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRA first 4 pixels - punpckhwd xmm1, xmm2 // BGRA next 4 pixels - movdqu [edx], xmm0 - movdqu [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop esi - ret - } -} - -// 8 pixels, dest aligned 16. -// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). -__declspec(naked) __declspec(align(16)) -void NV21ToARGBRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* uv_buf, - uint8* dst_argb, - int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // Y - mov esi, [esp + 4 + 8] // VU - mov edx, [esp + 4 + 12] // argb - mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READNV12 - YVUTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm0, xmm1 // BG - punpcklbw xmm2, xmm5 // RA - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm2 // BGRA first 4 pixels - punpckhwd xmm1, xmm2 // BGRA next 4 pixels - movdqu [edx], xmm0 - movdqu [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void I422ToBGRARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_bgra, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // bgra - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into BGRA - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - punpcklbw xmm1, xmm0 // GB - punpcklbw xmm5, xmm2 // AR - movdqa xmm0, xmm5 - punpcklwd xmm5, xmm1 // BGRA first 4 pixels - punpckhwd xmm0, xmm1 // BGRA next 4 pixels - movdqa [edx], xmm5 - movdqa [edx + 16], xmm0 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void I422ToBGRARow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_bgra, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // bgra - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into BGRA - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - punpcklbw xmm1, xmm0 // GB - punpcklbw xmm5, xmm2 // AR - movdqa xmm0, xmm5 - punpcklwd xmm5, xmm1 // BGRA first 4 pixels - punpckhwd xmm0, xmm1 // BGRA next 4 pixels - movdqu [edx], xmm5 - movdqu [edx + 16], xmm0 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void I422ToABGRRow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // abgr - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm2, xmm1 // RG - punpcklbw xmm0, xmm5 // BA - movdqa xmm1, xmm2 - punpcklwd xmm2, xmm0 // RGBA first 4 pixels - punpckhwd xmm1, xmm0 // RGBA next 4 pixels - movdqa [edx], xmm2 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void I422ToABGRRow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_abgr, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // abgr - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into ARGB - punpcklbw xmm2, xmm1 // RG - punpcklbw xmm0, xmm5 // BA - movdqa xmm1, xmm2 - punpcklwd xmm2, xmm0 // RGBA first 4 pixels - punpckhwd xmm1, xmm0 // RGBA next 4 pixels - movdqu [edx], xmm2 - movdqu [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void I422ToRGBARow_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgba, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // rgba - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into RGBA - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - punpcklbw xmm1, xmm2 // GR - punpcklbw xmm5, xmm0 // AB - movdqa xmm0, xmm5 - punpcklwd xmm5, xmm1 // RGBA first 4 pixels - punpckhwd xmm0, xmm1 // RGBA next 4 pixels - movdqa [edx], xmm5 - movdqa [edx + 16], xmm0 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void I422ToRGBARow_Unaligned_SSSE3(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* dst_rgba, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // Y - mov esi, [esp + 8 + 8] // U - mov edi, [esp + 8 + 12] // V - mov edx, [esp + 8 + 16] // rgba - mov ecx, [esp + 8 + 20] // width - sub edi, esi - pxor xmm4, xmm4 - - align 4 - convertloop: - READYUV422 - YUVTORGB - - // Step 3: Weave into RGBA - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha - punpcklbw xmm1, xmm2 // GR - punpcklbw xmm5, xmm0 // AB - movdqa xmm0, xmm5 - punpcklwd xmm5, xmm1 // RGBA first 4 pixels - punpckhwd xmm0, xmm1 // RGBA next 4 pixels - movdqu [edx], xmm5 - movdqu [edx + 16], xmm0 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop edi - pop esi - ret - } -} - -#endif // HAS_I422TOARGBROW_SSSE3 - -#ifdef HAS_YTOARGBROW_SSE2 -__declspec(naked) __declspec(align(16)) -void YToARGBRow_SSE2(const uint8* y_buf, - uint8* rgb_buf, - int width) { - __asm { - pxor xmm5, xmm5 - pcmpeqb xmm4, xmm4 // generate mask 0xff000000 - pslld xmm4, 24 - mov eax, 0x00100010 - movd xmm3, eax - pshufd xmm3, xmm3, 0 - mov eax, 0x004a004a // 74 - movd xmm2, eax - pshufd xmm2, xmm2,0 - mov eax, [esp + 4] // Y - mov edx, [esp + 8] // rgb - mov ecx, [esp + 12] // width - - align 4 - convertloop: - // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 - movq xmm0, qword ptr [eax] - lea eax, [eax + 8] - punpcklbw xmm0, xmm5 // 0.Y - psubusw xmm0, xmm3 - pmullw xmm0, xmm2 - psrlw xmm0, 6 - packuswb xmm0, xmm0 // G - - // Step 2: Weave into ARGB - punpcklbw xmm0, xmm0 // GG - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm0 // BGRA first 4 pixels - punpckhwd xmm1, xmm1 // BGRA next 4 pixels - por xmm0, xmm4 - por xmm1, xmm4 - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - ret - } -} -#endif // HAS_YTOARGBROW_SSE2 - -#ifdef HAS_MIRRORROW_SSSE3 -// Shuffle table for reversing the bytes. -static const uvec8 kShuffleMirror = { - 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u -}; - -__declspec(naked) __declspec(align(16)) -void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { - __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // width - movdqa xmm5, kShuffleMirror - lea eax, [eax - 16] - - align 4 - convertloop: - movdqa xmm0, [eax + ecx] - pshufb xmm0, xmm5 - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} -#endif // HAS_MIRRORROW_SSSE3 - -#ifdef HAS_MIRRORROW_AVX2 -// Shuffle table for reversing the bytes. -static const ulvec8 kShuffleMirror_AVX2 = { - 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u, - 15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u -}; - -__declspec(naked) __declspec(align(16)) -void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { - __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // width - vmovdqa ymm5, kShuffleMirror_AVX2 - lea eax, [eax - 32] - - align 4 - convertloop: - vmovdqu ymm0, [eax + ecx] - vpshufb ymm0, ymm0, ymm5 - vpermq ymm0, ymm0, 0x4e // swap high and low halfs - sub ecx, 32 - vmovdqu [edx], ymm0 - lea edx, [edx + 32] - jg convertloop - vzeroupper - ret - } -} -#endif // HAS_MIRRORROW_AVX2 - -#ifdef HAS_MIRRORROW_SSE2 -// SSE2 version has movdqu so it can be used on unaligned buffers when SSSE3 -// version can not. -__declspec(naked) __declspec(align(16)) -void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { - __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // width - lea eax, [eax - 16] - - align 4 - convertloop: - movdqu xmm0, [eax + ecx] - movdqa xmm1, xmm0 // swap bytes - psllw xmm0, 8 - psrlw xmm1, 8 - por xmm0, xmm1 - pshuflw xmm0, xmm0, 0x1b // swap words - pshufhw xmm0, xmm0, 0x1b - pshufd xmm0, xmm0, 0x4e // swap qwords - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} -#endif // HAS_MIRRORROW_SSE2 - -#ifdef HAS_MIRRORROW_UV_SSSE3 -// Shuffle table for reversing the bytes of UV channels. -static const uvec8 kShuffleMirrorUV = { - 14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u -}; - -__declspec(naked) __declspec(align(16)) -void MirrorUVRow_SSSE3(const uint8* src, uint8* dst_u, uint8* dst_v, - int width) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // width - movdqa xmm1, kShuffleMirrorUV - lea eax, [eax + ecx * 2 - 16] - sub edi, edx - - align 4 - convertloop: - movdqa xmm0, [eax] - lea eax, [eax - 16] - pshufb xmm0, xmm1 - sub ecx, 8 - movlpd qword ptr [edx], xmm0 - movhpd qword ptr [edx + edi], xmm0 - lea edx, [edx + 8] - jg convertloop - - pop edi - ret - } -} -#endif // HAS_MIRRORROW_UV_SSSE3 - -#ifdef HAS_ARGBMIRRORROW_SSSE3 -// Shuffle table for reversing the bytes. -static const uvec8 kARGBShuffleMirror = { - 12u, 13u, 14u, 15u, 8u, 9u, 10u, 11u, 4u, 5u, 6u, 7u, 0u, 1u, 2u, 3u -}; - -__declspec(naked) __declspec(align(16)) -void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width) { - __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // width - lea eax, [eax - 16 + ecx * 4] // last 4 pixels. - movdqa xmm5, kARGBShuffleMirror - - align 4 - convertloop: - movdqa xmm0, [eax] - lea eax, [eax - 16] - pshufb xmm0, xmm5 - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} -#endif // HAS_ARGBMIRRORROW_SSSE3 - -#ifdef HAS_ARGBMIRRORROW_AVX2 -// Shuffle table for reversing the bytes. -static const ulvec32 kARGBShuffleMirror_AVX2 = { - 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u -}; - -__declspec(naked) __declspec(align(16)) -void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { - __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // width - lea eax, [eax - 32] - vmovdqa ymm5, kARGBShuffleMirror_AVX2 - - align 4 - convertloop: - vpermd ymm0, ymm5, [eax + ecx * 4] // permute dword order - sub ecx, 8 - vmovdqu [edx], ymm0 - lea edx, [edx + 32] - jg convertloop - vzeroupper - ret - } -} -#endif // HAS_ARGBMIRRORROW_AVX2 - -#ifdef HAS_SPLITUVROW_SSE2 -__declspec(naked) __declspec(align(16)) -void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - pand xmm0, xmm5 // even bytes - pand xmm1, xmm5 - packuswb xmm0, xmm1 - psrlw xmm2, 8 // odd bytes - psrlw xmm3, 8 - packuswb xmm2, xmm3 - movdqa [edx], xmm0 - movdqa [edx + edi], xmm2 - lea edx, [edx + 16] - sub ecx, 16 - jg convertloop - - pop edi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, - int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - lea eax, [eax + 32] - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - pand xmm0, xmm5 // even bytes - pand xmm1, xmm5 - packuswb xmm0, xmm1 - psrlw xmm2, 8 // odd bytes - psrlw xmm3, 8 - packuswb xmm2, xmm3 - movdqu [edx], xmm0 - movdqu [edx + edi], xmm2 - lea edx, [edx + 16] - sub ecx, 16 - jg convertloop - - pop edi - ret - } -} -#endif // HAS_SPLITUVROW_SSE2 - -#ifdef HAS_SPLITUVROW_AVX2 -__declspec(naked) __declspec(align(16)) -void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff - vpsrlw ymm5, ymm5, 8 - sub edi, edx - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - lea eax, [eax + 64] - vpsrlw ymm2, ymm0, 8 // odd bytes - vpsrlw ymm3, ymm1, 8 - vpand ymm0, ymm0, ymm5 // even bytes - vpand ymm1, ymm1, ymm5 - vpackuswb ymm0, ymm0, ymm1 - vpackuswb ymm2, ymm2, ymm3 - vpermq ymm0, ymm0, 0xd8 - vpermq ymm2, ymm2, 0xd8 - vmovdqu [edx], ymm0 - vmovdqu [edx + edi], ymm2 - lea edx, [edx + 32] - sub ecx, 32 - jg convertloop - - pop edi - vzeroupper - ret - } -} -#endif // HAS_SPLITUVROW_AVX2 - -#ifdef HAS_MERGEUVROW_SSE2 -__declspec(naked) __declspec(align(16)) -void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_u - mov edx, [esp + 4 + 8] // src_v - mov edi, [esp + 4 + 12] // dst_uv - mov ecx, [esp + 4 + 16] // width - sub edx, eax - - align 4 - convertloop: - movdqa xmm0, [eax] // read 16 U's - movdqa xmm1, [eax + edx] // and 16 V's - lea eax, [eax + 16] - movdqa xmm2, xmm0 - punpcklbw xmm0, xmm1 // first 8 UV pairs - punpckhbw xmm2, xmm1 // next 8 UV pairs - movdqa [edi], xmm0 - movdqa [edi + 16], xmm2 - lea edi, [edi + 32] - sub ecx, 16 - jg convertloop - - pop edi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v, - uint8* dst_uv, int width) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_u - mov edx, [esp + 4 + 8] // src_v - mov edi, [esp + 4 + 12] // dst_uv - mov ecx, [esp + 4 + 16] // width - sub edx, eax - - align 4 - convertloop: - movdqu xmm0, [eax] // read 16 U's - movdqu xmm1, [eax + edx] // and 16 V's - lea eax, [eax + 16] - movdqa xmm2, xmm0 - punpcklbw xmm0, xmm1 // first 8 UV pairs - punpckhbw xmm2, xmm1 // next 8 UV pairs - movdqu [edi], xmm0 - movdqu [edi + 16], xmm2 - lea edi, [edi + 32] - sub ecx, 16 - jg convertloop - - pop edi - ret - } -} -#endif // HAS_MERGEUVROW_SSE2 - -#ifdef HAS_MERGEUVROW_AVX2 -__declspec(naked) __declspec(align(16)) -void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, - int width) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_u - mov edx, [esp + 4 + 8] // src_v - mov edi, [esp + 4 + 12] // dst_uv - mov ecx, [esp + 4 + 16] // width - sub edx, eax - - align 4 - convertloop: - vmovdqu ymm0, [eax] // read 32 U's - vmovdqu ymm1, [eax + edx] // and 32 V's - lea eax, [eax + 32] - vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2 - vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3 - vperm2i128 ymm1, ymm2, ymm0, 0x20 // low 128 of ymm2 and low 128 of ymm0 - vperm2i128 ymm2, ymm2, ymm0, 0x31 // high 128 of ymm2 and high 128 of ymm0 - vmovdqu [edi], ymm1 - vmovdqu [edi + 32], ymm2 - lea edi, [edi + 64] - sub ecx, 32 - jg convertloop - - pop edi - vzeroupper - ret - } -} -#endif // HAS_MERGEUVROW_AVX2 - -#ifdef HAS_COPYROW_SSE2 -// CopyRow copys 'count' bytes using a 16 byte load/store, 32 bytes at time. -__declspec(naked) __declspec(align(16)) -void CopyRow_SSE2(const uint8* src, uint8* dst, int count) { - __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - sub ecx, 32 - jg convertloop - ret - } -} -#endif // HAS_COPYROW_SSE2 - -// Unaligned Multiple of 1. -__declspec(naked) __declspec(align(16)) -void CopyRow_ERMS(const uint8* src, uint8* dst, int count) { - __asm { - mov eax, esi - mov edx, edi - mov esi, [esp + 4] // src - mov edi, [esp + 8] // dst - mov ecx, [esp + 12] // count - rep movsb - mov edi, edx - mov esi, eax - ret - } -} - -#ifdef HAS_COPYROW_X86 -__declspec(naked) __declspec(align(16)) -void CopyRow_X86(const uint8* src, uint8* dst, int count) { - __asm { - mov eax, esi - mov edx, edi - mov esi, [esp + 4] // src - mov edi, [esp + 8] // dst - mov ecx, [esp + 12] // count - shr ecx, 2 - rep movsd - mov edi, edx - mov esi, eax - ret - } -} -#endif // HAS_COPYROW_X86 - -#ifdef HAS_ARGBCOPYALPHAROW_SSE2 -// width in pixels -__declspec(naked) __declspec(align(16)) -void ARGBCopyAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { - __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count - pcmpeqb xmm0, xmm0 // generate mask 0xff000000 - pslld xmm0, 24 - pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff - psrld xmm1, 8 - - align 4 - convertloop: - movdqa xmm2, [eax] - movdqa xmm3, [eax + 16] - lea eax, [eax + 32] - movdqa xmm4, [edx] - movdqa xmm5, [edx + 16] - pand xmm2, xmm0 - pand xmm3, xmm0 - pand xmm4, xmm1 - pand xmm5, xmm1 - por xmm2, xmm4 - por xmm3, xmm5 - movdqa [edx], xmm2 - movdqa [edx + 16], xmm3 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - ret - } -} -#endif // HAS_ARGBCOPYALPHAROW_SSE2 - -#ifdef HAS_ARGBCOPYALPHAROW_AVX2 -// width in pixels -__declspec(naked) __declspec(align(16)) -void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { - __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count - vpcmpeqb ymm0, ymm0, ymm0 - vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff - - align 4 - convertloop: - vmovdqu ymm1, [eax] - vmovdqu ymm2, [eax + 32] - lea eax, [eax + 64] - vpblendvb ymm1, ymm1, [edx], ymm0 - vpblendvb ymm2, ymm2, [edx + 32], ymm0 - vmovdqu [edx], ymm1 - vmovdqu [edx + 32], ymm2 - lea edx, [edx + 64] - sub ecx, 16 - jg convertloop - - vzeroupper - ret - } -} -#endif // HAS_ARGBCOPYALPHAROW_AVX2 - -#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 -// width in pixels -__declspec(naked) __declspec(align(16)) -void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { - __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count - pcmpeqb xmm0, xmm0 // generate mask 0xff000000 - pslld xmm0, 24 - pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff - psrld xmm1, 8 - - align 4 - convertloop: - movq xmm2, qword ptr [eax] // 8 Y's - lea eax, [eax + 8] - punpcklbw xmm2, xmm2 - punpckhwd xmm3, xmm2 - punpcklwd xmm2, xmm2 - movdqa xmm4, [edx] - movdqa xmm5, [edx + 16] - pand xmm2, xmm0 - pand xmm3, xmm0 - pand xmm4, xmm1 - pand xmm5, xmm1 - por xmm2, xmm4 - por xmm3, xmm5 - movdqa [edx], xmm2 - movdqa [edx + 16], xmm3 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - ret - } -} -#endif // HAS_ARGBCOPYYTOALPHAROW_SSE2 - -#ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 -// width in pixels -__declspec(naked) __declspec(align(16)) -void ARGBCopyYToAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { - __asm { - mov eax, [esp + 4] // src - mov edx, [esp + 8] // dst - mov ecx, [esp + 12] // count - vpcmpeqb ymm0, ymm0, ymm0 - vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff - - align 4 - convertloop: - vpmovzxbd ymm1, qword ptr [eax] - vpmovzxbd ymm2, qword ptr [eax + 8] - lea eax, [eax + 16] - vpslld ymm1, ymm1, 24 - vpslld ymm2, ymm2, 24 - vpblendvb ymm1, ymm1, [edx], ymm0 - vpblendvb ymm2, ymm2, [edx + 32], ymm0 - vmovdqu [edx], ymm1 - vmovdqu [edx + 32], ymm2 - lea edx, [edx + 64] - sub ecx, 16 - jg convertloop - - vzeroupper - ret - } -} -#endif // HAS_ARGBCOPYYTOALPHAROW_AVX2 - -#ifdef HAS_SETROW_X86 -// SetRow8 writes 'count' bytes using a 32 bit value repeated. -__declspec(naked) __declspec(align(16)) -void SetRow_X86(uint8* dst, uint32 v32, int count) { - __asm { - mov edx, edi - mov edi, [esp + 4] // dst - mov eax, [esp + 8] // v32 - mov ecx, [esp + 12] // count - shr ecx, 2 - rep stosd - mov edi, edx - ret - } -} - -// SetRow32 writes 'count' words using a 32 bit value repeated. -__declspec(naked) __declspec(align(16)) -void ARGBSetRows_X86(uint8* dst, uint32 v32, int width, - int dst_stride, int height) { - __asm { - push esi - push edi - push ebp - mov edi, [esp + 12 + 4] // dst - mov eax, [esp + 12 + 8] // v32 - mov ebp, [esp + 12 + 12] // width - mov edx, [esp + 12 + 16] // dst_stride - mov esi, [esp + 12 + 20] // height - lea ecx, [ebp * 4] - sub edx, ecx // stride - width * 4 - - align 4 - convertloop: - mov ecx, ebp - rep stosd - add edi, edx - sub esi, 1 - jg convertloop - - pop ebp - pop edi - pop esi - ret - } -} -#endif // HAS_SETROW_X86 - -#ifdef HAS_YUY2TOYROW_AVX2 -__declspec(naked) __declspec(align(16)) -void YUY2ToYRow_AVX2(const uint8* src_yuy2, - uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] // src_yuy2 - mov edx, [esp + 8] // dst_y - mov ecx, [esp + 12] // pix - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff - vpsrlw ymm5, ymm5, 8 - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - lea eax, [eax + 64] - vpand ymm0, ymm0, ymm5 // even bytes are Y - vpand ymm1, ymm1, ymm5 - vpackuswb ymm0, ymm0, ymm1 // mutates. - vpermq ymm0, ymm0, 0xd8 - sub ecx, 32 - vmovdqu [edx], ymm0 - lea edx, [edx + 32] - jg convertloop - vzeroupper - ret - } -} - -__declspec(naked) __declspec(align(16)) -void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov esi, [esp + 8 + 8] // stride_yuy2 - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff - vpsrlw ymm5, ymm5, 8 - sub edi, edx - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - vpavgb ymm0, ymm0, [eax + esi] - vpavgb ymm1, ymm1, [eax + esi + 32] - lea eax, [eax + 64] - vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV - vpsrlw ymm1, ymm1, 8 - vpackuswb ymm0, ymm0, ymm1 // mutates. - vpermq ymm0, ymm0, 0xd8 - vpand ymm1, ymm0, ymm5 // U - vpsrlw ymm0, ymm0, 8 // V - vpackuswb ymm1, ymm1, ymm1 // mutates. - vpackuswb ymm0, ymm0, ymm0 // mutates. - vpermq ymm1, ymm1, 0xd8 - vpermq ymm0, ymm0, 0xd8 - vextractf128 [edx], ymm1, 0 // U - vextractf128 [edx + edi], ymm0, 0 // V - lea edx, [edx + 16] - sub ecx, 32 - jg convertloop - - pop edi - pop esi - vzeroupper - ret - } -} - -__declspec(naked) __declspec(align(16)) -void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_yuy2 - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff - vpsrlw ymm5, ymm5, 8 - sub edi, edx - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - lea eax, [eax + 64] - vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV - vpsrlw ymm1, ymm1, 8 - vpackuswb ymm0, ymm0, ymm1 // mutates. - vpermq ymm0, ymm0, 0xd8 - vpand ymm1, ymm0, ymm5 // U - vpsrlw ymm0, ymm0, 8 // V - vpackuswb ymm1, ymm1, ymm1 // mutates. - vpackuswb ymm0, ymm0, ymm0 // mutates. - vpermq ymm1, ymm1, 0xd8 - vpermq ymm0, ymm0, 0xd8 - vextractf128 [edx], ymm1, 0 // U - vextractf128 [edx + edi], ymm0, 0 // V - lea edx, [edx + 16] - sub ecx, 32 - jg convertloop - - pop edi - vzeroupper - ret - } -} - -__declspec(naked) __declspec(align(16)) -void UYVYToYRow_AVX2(const uint8* src_uyvy, - uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] // src_uyvy - mov edx, [esp + 8] // dst_y - mov ecx, [esp + 12] // pix - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - lea eax, [eax + 64] - vpsrlw ymm0, ymm0, 8 // odd bytes are Y - vpsrlw ymm1, ymm1, 8 - vpackuswb ymm0, ymm0, ymm1 // mutates. - vpermq ymm0, ymm0, 0xd8 - sub ecx, 32 - vmovdqu [edx], ymm0 - lea edx, [edx + 32] - jg convertloop - ret - vzeroupper - } -} - -__declspec(naked) __declspec(align(16)) -void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov esi, [esp + 8 + 8] // stride_yuy2 - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff - vpsrlw ymm5, ymm5, 8 - sub edi, edx - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - vpavgb ymm0, ymm0, [eax + esi] - vpavgb ymm1, ymm1, [eax + esi + 32] - lea eax, [eax + 64] - vpand ymm0, ymm0, ymm5 // UYVY -> UVUV - vpand ymm1, ymm1, ymm5 - vpackuswb ymm0, ymm0, ymm1 // mutates. - vpermq ymm0, ymm0, 0xd8 - vpand ymm1, ymm0, ymm5 // U - vpsrlw ymm0, ymm0, 8 // V - vpackuswb ymm1, ymm1, ymm1 // mutates. - vpackuswb ymm0, ymm0, ymm0 // mutates. - vpermq ymm1, ymm1, 0xd8 - vpermq ymm0, ymm0, 0xd8 - vextractf128 [edx], ymm1, 0 // U - vextractf128 [edx + edi], ymm0, 0 // V - lea edx, [edx + 16] - sub ecx, 32 - jg convertloop - - pop edi - pop esi - vzeroupper - ret - } -} - -__declspec(naked) __declspec(align(16)) -void UYVYToUV422Row_AVX2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_yuy2 - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff - vpsrlw ymm5, ymm5, 8 - sub edi, edx - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - lea eax, [eax + 64] - vpand ymm0, ymm0, ymm5 // UYVY -> UVUV - vpand ymm1, ymm1, ymm5 - vpackuswb ymm0, ymm0, ymm1 // mutates. - vpermq ymm0, ymm0, 0xd8 - vpand ymm1, ymm0, ymm5 // U - vpsrlw ymm0, ymm0, 8 // V - vpackuswb ymm1, ymm1, ymm1 // mutates. - vpackuswb ymm0, ymm0, ymm0 // mutates. - vpermq ymm1, ymm1, 0xd8 - vpermq ymm0, ymm0, 0xd8 - vextractf128 [edx], ymm1, 0 // U - vextractf128 [edx + edi], ymm0, 0 // V - lea edx, [edx + 16] - sub ecx, 32 - jg convertloop - - pop edi - vzeroupper - ret - } -} -#endif // HAS_YUY2TOYROW_AVX2 - -#ifdef HAS_YUY2TOYROW_SSE2 -__declspec(naked) __declspec(align(16)) -void YUY2ToYRow_SSE2(const uint8* src_yuy2, - uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] // src_yuy2 - mov edx, [esp + 8] // dst_y - mov ecx, [esp + 12] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - pand xmm0, xmm5 // even bytes are Y - pand xmm1, xmm5 - packuswb xmm0, xmm1 - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov esi, [esp + 8 + 8] // stride_yuy2 - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + esi] - movdqa xmm3, [eax + esi + 16] - lea eax, [eax + 32] - pavgb xmm0, xmm2 - pavgb xmm1, xmm3 - psrlw xmm0, 8 // YUYV -> UVUV - psrlw xmm1, 8 - packuswb xmm0, xmm1 - movdqa xmm1, xmm0 - pand xmm0, xmm5 // U - packuswb xmm0, xmm0 - psrlw xmm1, 8 // V - packuswb xmm1, xmm1 - movq qword ptr [edx], xmm0 - movq qword ptr [edx + edi], xmm1 - lea edx, [edx + 8] - sub ecx, 16 - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_yuy2 - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - psrlw xmm0, 8 // YUYV -> UVUV - psrlw xmm1, 8 - packuswb xmm0, xmm1 - movdqa xmm1, xmm0 - pand xmm0, xmm5 // U - packuswb xmm0, xmm0 - psrlw xmm1, 8 // V - packuswb xmm1, xmm1 - movq qword ptr [edx], xmm0 - movq qword ptr [edx + edi], xmm1 - lea edx, [edx + 8] - sub ecx, 16 - jg convertloop - - pop edi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2, - uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] // src_yuy2 - mov edx, [esp + 8] // dst_y - mov ecx, [esp + 12] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - lea eax, [eax + 32] - pand xmm0, xmm5 // even bytes are Y - pand xmm1, xmm5 - packuswb xmm0, xmm1 - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov esi, [esp + 8 + 8] // stride_yuy2 - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + esi] - movdqu xmm3, [eax + esi + 16] - lea eax, [eax + 32] - pavgb xmm0, xmm2 - pavgb xmm1, xmm3 - psrlw xmm0, 8 // YUYV -> UVUV - psrlw xmm1, 8 - packuswb xmm0, xmm1 - movdqa xmm1, xmm0 - pand xmm0, xmm5 // U - packuswb xmm0, xmm0 - psrlw xmm1, 8 // V - packuswb xmm1, xmm1 - movq qword ptr [edx], xmm0 - movq qword ptr [edx + edi], xmm1 - lea edx, [edx + 8] - sub ecx, 16 - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_yuy2 - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - lea eax, [eax + 32] - psrlw xmm0, 8 // YUYV -> UVUV - psrlw xmm1, 8 - packuswb xmm0, xmm1 - movdqa xmm1, xmm0 - pand xmm0, xmm5 // U - packuswb xmm0, xmm0 - psrlw xmm1, 8 // V - packuswb xmm1, xmm1 - movq qword ptr [edx], xmm0 - movq qword ptr [edx + edi], xmm1 - lea edx, [edx + 8] - sub ecx, 16 - jg convertloop - - pop edi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void UYVYToYRow_SSE2(const uint8* src_uyvy, - uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] // src_uyvy - mov edx, [esp + 8] // dst_y - mov ecx, [esp + 12] // pix - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - psrlw xmm0, 8 // odd bytes are Y - psrlw xmm1, 8 - packuswb xmm0, xmm1 - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov esi, [esp + 8 + 8] // stride_yuy2 - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + esi] - movdqa xmm3, [eax + esi + 16] - lea eax, [eax + 32] - pavgb xmm0, xmm2 - pavgb xmm1, xmm3 - pand xmm0, xmm5 // UYVY -> UVUV - pand xmm1, xmm5 - packuswb xmm0, xmm1 - movdqa xmm1, xmm0 - pand xmm0, xmm5 // U - packuswb xmm0, xmm0 - psrlw xmm1, 8 // V - packuswb xmm1, xmm1 - movq qword ptr [edx], xmm0 - movq qword ptr [edx + edi], xmm1 - lea edx, [edx + 8] - sub ecx, 16 - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void UYVYToUV422Row_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_yuy2 - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - align 4 - convertloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - pand xmm0, xmm5 // UYVY -> UVUV - pand xmm1, xmm5 - packuswb xmm0, xmm1 - movdqa xmm1, xmm0 - pand xmm0, xmm5 // U - packuswb xmm0, xmm0 - psrlw xmm1, 8 // V - packuswb xmm1, xmm1 - movq qword ptr [edx], xmm0 - movq qword ptr [edx + edi], xmm1 - lea edx, [edx + 8] - sub ecx, 16 - jg convertloop - - pop edi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy, - uint8* dst_y, int pix) { - __asm { - mov eax, [esp + 4] // src_uyvy - mov edx, [esp + 8] // dst_y - mov ecx, [esp + 12] // pix - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - lea eax, [eax + 32] - psrlw xmm0, 8 // odd bytes are Y - psrlw xmm1, 8 - packuswb xmm0, xmm1 - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_yuy2 - mov esi, [esp + 8 + 8] // stride_yuy2 - mov edx, [esp + 8 + 12] // dst_u - mov edi, [esp + 8 + 16] // dst_v - mov ecx, [esp + 8 + 20] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + esi] - movdqu xmm3, [eax + esi + 16] - lea eax, [eax + 32] - pavgb xmm0, xmm2 - pavgb xmm1, xmm3 - pand xmm0, xmm5 // UYVY -> UVUV - pand xmm1, xmm5 - packuswb xmm0, xmm1 - movdqa xmm1, xmm0 - pand xmm0, xmm5 // U - packuswb xmm0, xmm0 - psrlw xmm1, 8 // V - packuswb xmm1, xmm1 - movq qword ptr [edx], xmm0 - movq qword ptr [edx + edi], xmm1 - lea edx, [edx + 8] - sub ecx, 16 - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy, - uint8* dst_u, uint8* dst_v, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_yuy2 - mov edx, [esp + 4 + 8] // dst_u - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - sub edi, edx - - align 4 - convertloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - lea eax, [eax + 32] - pand xmm0, xmm5 // UYVY -> UVUV - pand xmm1, xmm5 - packuswb xmm0, xmm1 - movdqa xmm1, xmm0 - pand xmm0, xmm5 // U - packuswb xmm0, xmm0 - psrlw xmm1, 8 // V - packuswb xmm1, xmm1 - movq qword ptr [edx], xmm0 - movq qword ptr [edx + edi], xmm1 - lea edx, [edx + 8] - sub ecx, 16 - jg convertloop - - pop edi - ret - } -} -#endif // HAS_YUY2TOYROW_SSE2 - -#ifdef HAS_ARGBBLENDROW_SSE2 -// Blend 8 pixels at a time. -__declspec(naked) __declspec(align(16)) -void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm7, xmm7 // generate constant 1 - psrlw xmm7, 15 - pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff - psrlw xmm6, 8 - pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 - psllw xmm5, 8 - pcmpeqb xmm4, xmm4 // generate mask 0xff000000 - pslld xmm4, 24 - - sub ecx, 1 - je convertloop1 // only 1 pixel? - jl convertloop1b - - // 1 pixel loop until destination pointer is aligned. - alignloop1: - test edx, 15 // aligned? - je alignloop1b - movd xmm3, [eax] - lea eax, [eax + 4] - movdqa xmm0, xmm3 // src argb - pxor xmm3, xmm4 // ~alpha - movd xmm2, [esi] // _r_b - psrlw xmm3, 8 // alpha - pshufhw xmm3, xmm3, 0F5h // 8 alpha words - pshuflw xmm3, xmm3, 0F5h - pand xmm2, xmm6 // _r_b - paddw xmm3, xmm7 // 256 - alpha - pmullw xmm2, xmm3 // _r_b * alpha - movd xmm1, [esi] // _a_g - lea esi, [esi + 4] - psrlw xmm1, 8 // _a_g - por xmm0, xmm4 // set alpha to 255 - pmullw xmm1, xmm3 // _a_g * alpha - psrlw xmm2, 8 // _r_b convert to 8 bits again - paddusb xmm0, xmm2 // + src argb - pand xmm1, xmm5 // a_g_ convert to 8 bits again - paddusb xmm0, xmm1 // + src argb - sub ecx, 1 - movd [edx], xmm0 - lea edx, [edx + 4] - jge alignloop1 - - alignloop1b: - add ecx, 1 - 4 - jl convertloop4b - - // 4 pixel loop. - convertloop4: - movdqu xmm3, [eax] // src argb - lea eax, [eax + 16] - movdqa xmm0, xmm3 // src argb - pxor xmm3, xmm4 // ~alpha - movdqu xmm2, [esi] // _r_b - psrlw xmm3, 8 // alpha - pshufhw xmm3, xmm3, 0F5h // 8 alpha words - pshuflw xmm3, xmm3, 0F5h - pand xmm2, xmm6 // _r_b - paddw xmm3, xmm7 // 256 - alpha - pmullw xmm2, xmm3 // _r_b * alpha - movdqu xmm1, [esi] // _a_g - lea esi, [esi + 16] - psrlw xmm1, 8 // _a_g - por xmm0, xmm4 // set alpha to 255 - pmullw xmm1, xmm3 // _a_g * alpha - psrlw xmm2, 8 // _r_b convert to 8 bits again - paddusb xmm0, xmm2 // + src argb - pand xmm1, xmm5 // a_g_ convert to 8 bits again - paddusb xmm0, xmm1 // + src argb - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jge convertloop4 - - convertloop4b: - add ecx, 4 - 1 - jl convertloop1b - - // 1 pixel loop. - convertloop1: - movd xmm3, [eax] // src argb - lea eax, [eax + 4] - movdqa xmm0, xmm3 // src argb - pxor xmm3, xmm4 // ~alpha - movd xmm2, [esi] // _r_b - psrlw xmm3, 8 // alpha - pshufhw xmm3, xmm3, 0F5h // 8 alpha words - pshuflw xmm3, xmm3, 0F5h - pand xmm2, xmm6 // _r_b - paddw xmm3, xmm7 // 256 - alpha - pmullw xmm2, xmm3 // _r_b * alpha - movd xmm1, [esi] // _a_g - lea esi, [esi + 4] - psrlw xmm1, 8 // _a_g - por xmm0, xmm4 // set alpha to 255 - pmullw xmm1, xmm3 // _a_g * alpha - psrlw xmm2, 8 // _r_b convert to 8 bits again - paddusb xmm0, xmm2 // + src argb - pand xmm1, xmm5 // a_g_ convert to 8 bits again - paddusb xmm0, xmm1 // + src argb - sub ecx, 1 - movd [edx], xmm0 - lea edx, [edx + 4] - jge convertloop1 - - convertloop1b: - pop esi - ret - } -} -#endif // HAS_ARGBBLENDROW_SSE2 - -#ifdef HAS_ARGBBLENDROW_SSSE3 -// Shuffle table for isolating alpha. -static const uvec8 kShuffleAlpha = { - 3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, - 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 -}; -// Same as SSE2, but replaces: -// psrlw xmm3, 8 // alpha -// pshufhw xmm3, xmm3, 0F5h // 8 alpha words -// pshuflw xmm3, xmm3, 0F5h -// with.. -// pshufb xmm3, kShuffleAlpha // alpha -// Blend 8 pixels at a time. - -__declspec(naked) __declspec(align(16)) -void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - pcmpeqb xmm7, xmm7 // generate constant 0x0001 - psrlw xmm7, 15 - pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff - psrlw xmm6, 8 - pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 - psllw xmm5, 8 - pcmpeqb xmm4, xmm4 // generate mask 0xff000000 - pslld xmm4, 24 - - sub ecx, 1 - je convertloop1 // only 1 pixel? - jl convertloop1b - - // 1 pixel loop until destination pointer is aligned. - alignloop1: - test edx, 15 // aligned? - je alignloop1b - movd xmm3, [eax] - lea eax, [eax + 4] - movdqa xmm0, xmm3 // src argb - pxor xmm3, xmm4 // ~alpha - movd xmm2, [esi] // _r_b - pshufb xmm3, kShuffleAlpha // alpha - pand xmm2, xmm6 // _r_b - paddw xmm3, xmm7 // 256 - alpha - pmullw xmm2, xmm3 // _r_b * alpha - movd xmm1, [esi] // _a_g - lea esi, [esi + 4] - psrlw xmm1, 8 // _a_g - por xmm0, xmm4 // set alpha to 255 - pmullw xmm1, xmm3 // _a_g * alpha - psrlw xmm2, 8 // _r_b convert to 8 bits again - paddusb xmm0, xmm2 // + src argb - pand xmm1, xmm5 // a_g_ convert to 8 bits again - paddusb xmm0, xmm1 // + src argb - sub ecx, 1 - movd [edx], xmm0 - lea edx, [edx + 4] - jge alignloop1 - - alignloop1b: - add ecx, 1 - 4 - jl convertloop4b - - test eax, 15 // unaligned? - jne convertuloop4 - test esi, 15 // unaligned? - jne convertuloop4 - - // 4 pixel loop. - convertloop4: - movdqa xmm3, [eax] // src argb - lea eax, [eax + 16] - movdqa xmm0, xmm3 // src argb - pxor xmm3, xmm4 // ~alpha - movdqa xmm2, [esi] // _r_b - pshufb xmm3, kShuffleAlpha // alpha - pand xmm2, xmm6 // _r_b - paddw xmm3, xmm7 // 256 - alpha - pmullw xmm2, xmm3 // _r_b * alpha - movdqa xmm1, [esi] // _a_g - lea esi, [esi + 16] - psrlw xmm1, 8 // _a_g - por xmm0, xmm4 // set alpha to 255 - pmullw xmm1, xmm3 // _a_g * alpha - psrlw xmm2, 8 // _r_b convert to 8 bits again - paddusb xmm0, xmm2 // + src argb - pand xmm1, xmm5 // a_g_ convert to 8 bits again - paddusb xmm0, xmm1 // + src argb - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jge convertloop4 - jmp convertloop4b - - // 4 pixel unaligned loop. - convertuloop4: - movdqu xmm3, [eax] // src argb - lea eax, [eax + 16] - movdqa xmm0, xmm3 // src argb - pxor xmm3, xmm4 // ~alpha - movdqu xmm2, [esi] // _r_b - pshufb xmm3, kShuffleAlpha // alpha - pand xmm2, xmm6 // _r_b - paddw xmm3, xmm7 // 256 - alpha - pmullw xmm2, xmm3 // _r_b * alpha - movdqu xmm1, [esi] // _a_g - lea esi, [esi + 16] - psrlw xmm1, 8 // _a_g - por xmm0, xmm4 // set alpha to 255 - pmullw xmm1, xmm3 // _a_g * alpha - psrlw xmm2, 8 // _r_b convert to 8 bits again - paddusb xmm0, xmm2 // + src argb - pand xmm1, xmm5 // a_g_ convert to 8 bits again - paddusb xmm0, xmm1 // + src argb - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jge convertuloop4 - - convertloop4b: - add ecx, 4 - 1 - jl convertloop1b - - // 1 pixel loop. - convertloop1: - movd xmm3, [eax] // src argb - lea eax, [eax + 4] - movdqa xmm0, xmm3 // src argb - pxor xmm3, xmm4 // ~alpha - movd xmm2, [esi] // _r_b - pshufb xmm3, kShuffleAlpha // alpha - pand xmm2, xmm6 // _r_b - paddw xmm3, xmm7 // 256 - alpha - pmullw xmm2, xmm3 // _r_b * alpha - movd xmm1, [esi] // _a_g - lea esi, [esi + 4] - psrlw xmm1, 8 // _a_g - por xmm0, xmm4 // set alpha to 255 - pmullw xmm1, xmm3 // _a_g * alpha - psrlw xmm2, 8 // _r_b convert to 8 bits again - paddusb xmm0, xmm2 // + src argb - pand xmm1, xmm5 // a_g_ convert to 8 bits again - paddusb xmm0, xmm1 // + src argb - sub ecx, 1 - movd [edx], xmm0 - lea edx, [edx + 4] - jge convertloop1 - - convertloop1b: - pop esi - ret - } -} -#endif // HAS_ARGBBLENDROW_SSSE3 - -#ifdef HAS_ARGBATTENUATEROW_SSE2 -// Attenuate 4 pixels at a time. -// Aligned to 16 bytes. -__declspec(naked) __declspec(align(16)) -void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { - __asm { - mov eax, [esp + 4] // src_argb0 - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // width - pcmpeqb xmm4, xmm4 // generate mask 0xff000000 - pslld xmm4, 24 - pcmpeqb xmm5, xmm5 // generate mask 0x00ffffff - psrld xmm5, 8 - - align 4 - convertloop: - movdqa xmm0, [eax] // read 4 pixels - punpcklbw xmm0, xmm0 // first 2 - pshufhw xmm2, xmm0, 0FFh // 8 alpha words - pshuflw xmm2, xmm2, 0FFh - pmulhuw xmm0, xmm2 // rgb * a - movdqa xmm1, [eax] // read 4 pixels - punpckhbw xmm1, xmm1 // next 2 pixels - pshufhw xmm2, xmm1, 0FFh // 8 alpha words - pshuflw xmm2, xmm2, 0FFh - pmulhuw xmm1, xmm2 // rgb * a - movdqa xmm2, [eax] // alphas - lea eax, [eax + 16] - psrlw xmm0, 8 - pand xmm2, xmm4 - psrlw xmm1, 8 - packuswb xmm0, xmm1 - pand xmm0, xmm5 // keep original alphas - por xmm0, xmm2 - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - - ret - } -} -#endif // HAS_ARGBATTENUATEROW_SSE2 - -#ifdef HAS_ARGBATTENUATEROW_SSSE3 -// Shuffle table duplicating alpha. -static const uvec8 kShuffleAlpha0 = { - 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, -}; -static const uvec8 kShuffleAlpha1 = { - 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, - 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u, -}; -__declspec(naked) __declspec(align(16)) -void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { - __asm { - mov eax, [esp + 4] // src_argb0 - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // width - pcmpeqb xmm3, xmm3 // generate mask 0xff000000 - pslld xmm3, 24 - movdqa xmm4, kShuffleAlpha0 - movdqa xmm5, kShuffleAlpha1 - - align 4 - convertloop: - movdqu xmm0, [eax] // read 4 pixels - pshufb xmm0, xmm4 // isolate first 2 alphas - movdqu xmm1, [eax] // read 4 pixels - punpcklbw xmm1, xmm1 // first 2 pixel rgbs - pmulhuw xmm0, xmm1 // rgb * a - movdqu xmm1, [eax] // read 4 pixels - pshufb xmm1, xmm5 // isolate next 2 alphas - movdqu xmm2, [eax] // read 4 pixels - punpckhbw xmm2, xmm2 // next 2 pixel rgbs - pmulhuw xmm1, xmm2 // rgb * a - movdqu xmm2, [eax] // mask original alpha - lea eax, [eax + 16] - pand xmm2, xmm3 - psrlw xmm0, 8 - psrlw xmm1, 8 - packuswb xmm0, xmm1 - por xmm0, xmm2 // copy original alpha - sub ecx, 4 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - - ret - } -} -#endif // HAS_ARGBATTENUATEROW_SSSE3 - -#ifdef HAS_ARGBATTENUATEROW_AVX2 -// Shuffle table duplicating alpha. -static const ulvec8 kShuffleAlpha_AVX2 = { - 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, - 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u, - 6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, - 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u, -}; -__declspec(naked) __declspec(align(16)) -void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width) { - __asm { - mov eax, [esp + 4] // src_argb0 - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // width - sub edx, eax - vmovdqa ymm4, kShuffleAlpha_AVX2 - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 - vpslld ymm5, ymm5, 24 - - align 4 - convertloop: - vmovdqu ymm6, [eax] // read 8 pixels. - vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. - vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. - vpshufb ymm2, ymm0, ymm4 // low 4 alphas - vpshufb ymm3, ymm1, ymm4 // high 4 alphas - vpmulhuw ymm0, ymm0, ymm2 // rgb * a - vpmulhuw ymm1, ymm1, ymm3 // rgb * a - vpand ymm6, ymm6, ymm5 // isolate alpha - vpsrlw ymm0, ymm0, 8 - vpsrlw ymm1, ymm1, 8 - vpackuswb ymm0, ymm0, ymm1 // unmutated. - vpor ymm0, ymm0, ymm6 // copy original alpha - sub ecx, 8 - vmovdqu [eax + edx], ymm0 - lea eax, [eax + 32] - jg convertloop - - vzeroupper - ret - } -} -#endif // HAS_ARGBATTENUATEROW_AVX2 - -#ifdef HAS_ARGBUNATTENUATEROW_SSE2 -// Unattenuate 4 pixels at a time. -// Aligned to 16 bytes. -__declspec(naked) __declspec(align(16)) -void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, - int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_argb0 - mov edx, [esp + 8 + 8] // dst_argb - mov ecx, [esp + 8 + 12] // width - - align 4 - convertloop: - movdqu xmm0, [eax] // read 4 pixels - movzx esi, byte ptr [eax + 3] // first alpha - movzx edi, byte ptr [eax + 7] // second alpha - punpcklbw xmm0, xmm0 // first 2 - movd xmm2, dword ptr fixed_invtbl8[esi * 4] - movd xmm3, dword ptr fixed_invtbl8[edi * 4] - pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a - pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words - movlhps xmm2, xmm3 - pmulhuw xmm0, xmm2 // rgb * a - - movdqu xmm1, [eax] // read 4 pixels - movzx esi, byte ptr [eax + 11] // third alpha - movzx edi, byte ptr [eax + 15] // forth alpha - punpckhbw xmm1, xmm1 // next 2 - movd xmm2, dword ptr fixed_invtbl8[esi * 4] - movd xmm3, dword ptr fixed_invtbl8[edi * 4] - pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words - pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words - movlhps xmm2, xmm3 - pmulhuw xmm1, xmm2 // rgb * a - lea eax, [eax + 16] - - packuswb xmm0, xmm1 - sub ecx, 4 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - pop edi - pop esi - ret - } -} -#endif // HAS_ARGBUNATTENUATEROW_SSE2 - -#ifdef HAS_ARGBUNATTENUATEROW_AVX2 -// Shuffle table duplicating alpha. -static const ulvec8 kUnattenShuffleAlpha_AVX2 = { - 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15, - 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15, -}; -// TODO(fbarchard): Enable USE_GATHER for future hardware if faster. -// USE_GATHER is not on by default, due to being a slow instruction. -#ifdef USE_GATHER -__declspec(naked) __declspec(align(16)) -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, - int width) { - __asm { - mov eax, [esp + 4] // src_argb0 - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // width - sub edx, eax - vmovdqa ymm4, kUnattenShuffleAlpha_AVX2 - - align 4 - convertloop: - vmovdqu ymm6, [eax] // read 8 pixels. - vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xffffffff for gather. - vpsrld ymm2, ymm6, 24 // alpha in low 8 bits. - vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. - vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. - vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm5 // ymm5 cleared. 1, a - vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a - vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated. - vpshufb ymm2, ymm2, ymm4 // replicate low 4 alphas. 1, a, a, a - vpshufb ymm3, ymm3, ymm4 // replicate high 4 alphas - vpmulhuw ymm0, ymm0, ymm2 // rgb * ia - vpmulhuw ymm1, ymm1, ymm3 // rgb * ia - vpackuswb ymm0, ymm0, ymm1 // unmutated. - sub ecx, 8 - vmovdqu [eax + edx], ymm0 - lea eax, [eax + 32] - jg convertloop - - vzeroupper - ret - } -} -#else // USE_GATHER -__declspec(naked) __declspec(align(16)) -void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, - int width) { - __asm { - - mov eax, [esp + 4] // src_argb0 - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // width - sub edx, eax - vmovdqa ymm5, kUnattenShuffleAlpha_AVX2 - - push esi - push edi - - align 4 - convertloop: - // replace VPGATHER - movzx esi, byte ptr [eax + 3] // alpha0 - movzx edi, byte ptr [eax + 7] // alpha1 - vmovd xmm0, dword ptr fixed_invtbl8[esi * 4] // [1,a0] - vmovd xmm1, dword ptr fixed_invtbl8[edi * 4] // [1,a1] - movzx esi, byte ptr [eax + 11] // alpha2 - movzx edi, byte ptr [eax + 15] // alpha3 - vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0] - vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] // [1,a2] - vmovd xmm3, dword ptr fixed_invtbl8[edi * 4] // [1,a3] - movzx esi, byte ptr [eax + 19] // alpha4 - movzx edi, byte ptr [eax + 23] // alpha5 - vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2] - vmovd xmm0, dword ptr fixed_invtbl8[esi * 4] // [1,a4] - vmovd xmm1, dword ptr fixed_invtbl8[edi * 4] // [1,a5] - movzx esi, byte ptr [eax + 27] // alpha6 - movzx edi, byte ptr [eax + 31] // alpha7 - vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4] - vmovd xmm2, dword ptr fixed_invtbl8[esi * 4] // [1,a6] - vmovd xmm3, dword ptr fixed_invtbl8[edi * 4] // [1,a7] - vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6] - vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0] - vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4] - vinserti128 ymm3, ymm3, xmm0, 1 // [1,a7,1,a6,1,a5,1,a4,1,a3,1,a2,1,a1,1,a0] - // end of VPGATHER - - vmovdqu ymm6, [eax] // read 8 pixels. - vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. - vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. - vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a - vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated. - vpshufb ymm2, ymm2, ymm5 // replicate low 4 alphas. 1, a, a, a - vpshufb ymm3, ymm3, ymm5 // replicate high 4 alphas - vpmulhuw ymm0, ymm0, ymm2 // rgb * ia - vpmulhuw ymm1, ymm1, ymm3 // rgb * ia - vpackuswb ymm0, ymm0, ymm1 // unmutated. - sub ecx, 8 - vmovdqu [eax + edx], ymm0 - lea eax, [eax + 32] - jg convertloop - - pop edi - pop esi - vzeroupper - ret - } -} -#endif // USE_GATHER -#endif // HAS_ARGBATTENUATEROW_AVX2 - -#ifdef HAS_ARGBGRAYROW_SSSE3 -// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels. -__declspec(naked) __declspec(align(16)) -void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_argb */ - mov ecx, [esp + 12] /* width */ - movdqa xmm4, kARGBToYJ - movdqa xmm5, kAddYJ64 - - align 4 - convertloop: - movdqa xmm0, [eax] // G - movdqa xmm1, [eax + 16] - pmaddubsw xmm0, xmm4 - pmaddubsw xmm1, xmm4 - phaddw xmm0, xmm1 - paddw xmm0, xmm5 // Add .5 for rounding. - psrlw xmm0, 7 - packuswb xmm0, xmm0 // 8 G bytes - movdqa xmm2, [eax] // A - movdqa xmm3, [eax + 16] - lea eax, [eax + 32] - psrld xmm2, 24 - psrld xmm3, 24 - packuswb xmm2, xmm3 - packuswb xmm2, xmm2 // 8 A bytes - movdqa xmm3, xmm0 // Weave into GG, GA, then GGGA - punpcklbw xmm0, xmm0 // 8 GG words - punpcklbw xmm3, xmm2 // 8 GA words - movdqa xmm1, xmm0 - punpcklwd xmm0, xmm3 // GGGA first 4 - punpckhwd xmm1, xmm3 // GGGA next 4 - sub ecx, 8 - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - jg convertloop - ret - } -} -#endif // HAS_ARGBGRAYROW_SSSE3 - -#ifdef HAS_ARGBSEPIAROW_SSSE3 -// b = (r * 35 + g * 68 + b * 17) >> 7 -// g = (r * 45 + g * 88 + b * 22) >> 7 -// r = (r * 50 + g * 98 + b * 24) >> 7 -// Constant for ARGB color to sepia tone. -static const vec8 kARGBToSepiaB = { - 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0 -}; - -static const vec8 kARGBToSepiaG = { - 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0 -}; - -static const vec8 kARGBToSepiaR = { - 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0 -}; - -// Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. -__declspec(naked) __declspec(align(16)) -void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) { - __asm { - mov eax, [esp + 4] /* dst_argb */ - mov ecx, [esp + 8] /* width */ - movdqa xmm2, kARGBToSepiaB - movdqa xmm3, kARGBToSepiaG - movdqa xmm4, kARGBToSepiaR - - align 4 - convertloop: - movdqa xmm0, [eax] // B - movdqa xmm6, [eax + 16] - pmaddubsw xmm0, xmm2 - pmaddubsw xmm6, xmm2 - phaddw xmm0, xmm6 - psrlw xmm0, 7 - packuswb xmm0, xmm0 // 8 B values - movdqa xmm5, [eax] // G - movdqa xmm1, [eax + 16] - pmaddubsw xmm5, xmm3 - pmaddubsw xmm1, xmm3 - phaddw xmm5, xmm1 - psrlw xmm5, 7 - packuswb xmm5, xmm5 // 8 G values - punpcklbw xmm0, xmm5 // 8 BG values - movdqa xmm5, [eax] // R - movdqa xmm1, [eax + 16] - pmaddubsw xmm5, xmm4 - pmaddubsw xmm1, xmm4 - phaddw xmm5, xmm1 - psrlw xmm5, 7 - packuswb xmm5, xmm5 // 8 R values - movdqa xmm6, [eax] // A - movdqa xmm1, [eax + 16] - psrld xmm6, 24 - psrld xmm1, 24 - packuswb xmm6, xmm1 - packuswb xmm6, xmm6 // 8 A values - punpcklbw xmm5, xmm6 // 8 RA values - movdqa xmm1, xmm0 // Weave BG, RA together - punpcklwd xmm0, xmm5 // BGRA first 4 - punpckhwd xmm1, xmm5 // BGRA next 4 - sub ecx, 8 - movdqa [eax], xmm0 - movdqa [eax + 16], xmm1 - lea eax, [eax + 32] - jg convertloop - ret - } -} -#endif // HAS_ARGBSEPIAROW_SSSE3 - -#ifdef HAS_ARGBCOLORMATRIXROW_SSSE3 -// Tranform 8 ARGB pixels (32 bytes) with color matrix. -// Same as Sepia except matrix is provided. -// TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R -// and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd. -__declspec(naked) __declspec(align(16)) -void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const int8* matrix_argb, int width) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_argb */ - mov ecx, [esp + 12] /* matrix_argb */ - movdqu xmm5, [ecx] - pshufd xmm2, xmm5, 0x00 - pshufd xmm3, xmm5, 0x55 - pshufd xmm4, xmm5, 0xaa - pshufd xmm5, xmm5, 0xff - mov ecx, [esp + 16] /* width */ - - align 4 - convertloop: - movdqa xmm0, [eax] // B - movdqa xmm7, [eax + 16] - pmaddubsw xmm0, xmm2 - pmaddubsw xmm7, xmm2 - movdqa xmm6, [eax] // G - movdqa xmm1, [eax + 16] - pmaddubsw xmm6, xmm3 - pmaddubsw xmm1, xmm3 - phaddsw xmm0, xmm7 // B - phaddsw xmm6, xmm1 // G - psraw xmm0, 6 // B - psraw xmm6, 6 // G - packuswb xmm0, xmm0 // 8 B values - packuswb xmm6, xmm6 // 8 G values - punpcklbw xmm0, xmm6 // 8 BG values - movdqa xmm1, [eax] // R - movdqa xmm7, [eax + 16] - pmaddubsw xmm1, xmm4 - pmaddubsw xmm7, xmm4 - phaddsw xmm1, xmm7 // R - movdqa xmm6, [eax] // A - movdqa xmm7, [eax + 16] - pmaddubsw xmm6, xmm5 - pmaddubsw xmm7, xmm5 - phaddsw xmm6, xmm7 // A - psraw xmm1, 6 // R - psraw xmm6, 6 // A - packuswb xmm1, xmm1 // 8 R values - packuswb xmm6, xmm6 // 8 A values - punpcklbw xmm1, xmm6 // 8 RA values - movdqa xmm6, xmm0 // Weave BG, RA together - punpcklwd xmm0, xmm1 // BGRA first 4 - punpckhwd xmm6, xmm1 // BGRA next 4 - sub ecx, 8 - movdqa [edx], xmm0 - movdqa [edx + 16], xmm6 - lea eax, [eax + 32] - lea edx, [edx + 32] - jg convertloop - ret - } -} -#endif // HAS_ARGBCOLORMATRIXROW_SSSE3 - -#ifdef HAS_ARGBQUANTIZEROW_SSE2 -// Quantize 4 ARGB pixels (16 bytes). -// Aligned to 16 bytes. -__declspec(naked) __declspec(align(16)) -void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size, - int interval_offset, int width) { - __asm { - mov eax, [esp + 4] /* dst_argb */ - movd xmm2, [esp + 8] /* scale */ - movd xmm3, [esp + 12] /* interval_size */ - movd xmm4, [esp + 16] /* interval_offset */ - mov ecx, [esp + 20] /* width */ - pshuflw xmm2, xmm2, 040h - pshufd xmm2, xmm2, 044h - pshuflw xmm3, xmm3, 040h - pshufd xmm3, xmm3, 044h - pshuflw xmm4, xmm4, 040h - pshufd xmm4, xmm4, 044h - pxor xmm5, xmm5 // constant 0 - pcmpeqb xmm6, xmm6 // generate mask 0xff000000 - pslld xmm6, 24 - - align 4 - convertloop: - movdqa xmm0, [eax] // read 4 pixels - punpcklbw xmm0, xmm5 // first 2 pixels - pmulhuw xmm0, xmm2 // pixel * scale >> 16 - movdqa xmm1, [eax] // read 4 pixels - punpckhbw xmm1, xmm5 // next 2 pixels - pmulhuw xmm1, xmm2 - pmullw xmm0, xmm3 // * interval_size - movdqa xmm7, [eax] // read 4 pixels - pmullw xmm1, xmm3 - pand xmm7, xmm6 // mask alpha - paddw xmm0, xmm4 // + interval_size / 2 - paddw xmm1, xmm4 - packuswb xmm0, xmm1 - por xmm0, xmm7 - sub ecx, 4 - movdqa [eax], xmm0 - lea eax, [eax + 16] - jg convertloop - ret - } -} -#endif // HAS_ARGBQUANTIZEROW_SSE2 - -#ifdef HAS_ARGBSHADEROW_SSE2 -// Shade 4 pixels at a time by specified value. -// Aligned to 16 bytes. -__declspec(naked) __declspec(align(16)) -void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width, - uint32 value) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // width - movd xmm2, [esp + 16] // value - punpcklbw xmm2, xmm2 - punpcklqdq xmm2, xmm2 - - align 4 - convertloop: - movdqa xmm0, [eax] // read 4 pixels - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm0 // first 2 - punpckhbw xmm1, xmm1 // next 2 - pmulhuw xmm0, xmm2 // argb * value - pmulhuw xmm1, xmm2 // argb * value - psrlw xmm0, 8 - psrlw xmm1, 8 - packuswb xmm0, xmm1 - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - - ret - } -} -#endif // HAS_ARGBSHADEROW_SSE2 - -#ifdef HAS_ARGBMULTIPLYROW_SSE2 -// Multiply 2 rows of ARGB pixels together, 4 pixels at a time. -__declspec(naked) __declspec(align(16)) -void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - pxor xmm5, xmm5 // constant 0 - - align 4 - convertloop: - movdqu xmm0, [eax] // read 4 pixels from src_argb0 - movdqu xmm2, [esi] // read 4 pixels from src_argb1 - movdqu xmm1, xmm0 - movdqu xmm3, xmm2 - punpcklbw xmm0, xmm0 // first 2 - punpckhbw xmm1, xmm1 // next 2 - punpcklbw xmm2, xmm5 // first 2 - punpckhbw xmm3, xmm5 // next 2 - pmulhuw xmm0, xmm2 // src_argb0 * src_argb1 first 2 - pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2 - lea eax, [eax + 16] - lea esi, [esi + 16] - packuswb xmm0, xmm1 - sub ecx, 4 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - - pop esi - ret - } -} -#endif // HAS_ARGBMULTIPLYROW_SSE2 - -#ifdef HAS_ARGBADDROW_SSE2 -// Add 2 rows of ARGB pixels together, 4 pixels at a time. -// TODO(fbarchard): Port this to posix, neon and other math functions. -__declspec(naked) __declspec(align(16)) -void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - - sub ecx, 4 - jl convertloop49 - - align 4 - convertloop4: - movdqu xmm0, [eax] // read 4 pixels from src_argb0 - lea eax, [eax + 16] - movdqu xmm1, [esi] // read 4 pixels from src_argb1 - lea esi, [esi + 16] - paddusb xmm0, xmm1 // src_argb0 + src_argb1 - sub ecx, 4 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jge convertloop4 - - convertloop49: - add ecx, 4 - 1 - jl convertloop19 - - convertloop1: - movd xmm0, [eax] // read 1 pixels from src_argb0 - lea eax, [eax + 4] - movd xmm1, [esi] // read 1 pixels from src_argb1 - lea esi, [esi + 4] - paddusb xmm0, xmm1 // src_argb0 + src_argb1 - sub ecx, 1 - movd [edx], xmm0 - lea edx, [edx + 4] - jge convertloop1 - - convertloop19: - pop esi - ret - } -} -#endif // HAS_ARGBADDROW_SSE2 - -#ifdef HAS_ARGBSUBTRACTROW_SSE2 -// Subtract 2 rows of ARGB pixels together, 4 pixels at a time. -__declspec(naked) __declspec(align(16)) -void ARGBSubtractRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - - align 4 - convertloop: - movdqu xmm0, [eax] // read 4 pixels from src_argb0 - lea eax, [eax + 16] - movdqu xmm1, [esi] // read 4 pixels from src_argb1 - lea esi, [esi + 16] - psubusb xmm0, xmm1 // src_argb0 - src_argb1 - sub ecx, 4 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - - pop esi - ret - } -} -#endif // HAS_ARGBSUBTRACTROW_SSE2 - -#ifdef HAS_ARGBMULTIPLYROW_AVX2 -// Multiply 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) __declspec(align(16)) -void ARGBMultiplyRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - vpxor ymm5, ymm5, ymm5 // constant 0 - - align 4 - convertloop: - vmovdqu ymm1, [eax] // read 8 pixels from src_argb0 - lea eax, [eax + 32] - vmovdqu ymm3, [esi] // read 8 pixels from src_argb1 - lea esi, [esi + 32] - vpunpcklbw ymm0, ymm1, ymm1 // low 4 - vpunpckhbw ymm1, ymm1, ymm1 // high 4 - vpunpcklbw ymm2, ymm3, ymm5 // low 4 - vpunpckhbw ymm3, ymm3, ymm5 // high 4 - vpmulhuw ymm0, ymm0, ymm2 // src_argb0 * src_argb1 low 4 - vpmulhuw ymm1, ymm1, ymm3 // src_argb0 * src_argb1 high 4 - vpackuswb ymm0, ymm0, ymm1 - vmovdqu [edx], ymm0 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop esi - vzeroupper - ret - } -} -#endif // HAS_ARGBMULTIPLYROW_AVX2 - -#ifdef HAS_ARGBADDROW_AVX2 -// Add 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) __declspec(align(16)) -void ARGBAddRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - - align 4 - convertloop: - vmovdqu ymm0, [eax] // read 8 pixels from src_argb0 - lea eax, [eax + 32] - vpaddusb ymm0, ymm0, [esi] // add 8 pixels from src_argb1 - lea esi, [esi + 32] - vmovdqu [edx], ymm0 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop esi - vzeroupper - ret - } -} -#endif // HAS_ARGBADDROW_AVX2 - -#ifdef HAS_ARGBSUBTRACTROW_AVX2 -// Subtract 2 rows of ARGB pixels together, 8 pixels at a time. -__declspec(naked) __declspec(align(16)) -void ARGBSubtractRow_AVX2(const uint8* src_argb0, const uint8* src_argb1, - uint8* dst_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_argb0 - mov esi, [esp + 4 + 8] // src_argb1 - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - - align 4 - convertloop: - vmovdqu ymm0, [eax] // read 8 pixels from src_argb0 - lea eax, [eax + 32] - vpsubusb ymm0, ymm0, [esi] // src_argb0 - src_argb1 - lea esi, [esi + 32] - vmovdqu [edx], ymm0 - lea edx, [edx + 32] - sub ecx, 8 - jg convertloop - - pop esi - vzeroupper - ret - } -} -#endif // HAS_ARGBSUBTRACTROW_AVX2 - -#ifdef HAS_SOBELXROW_SSE2 -// SobelX as a matrix is -// -1 0 1 -// -2 0 2 -// -1 0 1 -__declspec(naked) __declspec(align(16)) -void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1, - const uint8* src_y2, uint8* dst_sobelx, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_y0 - mov esi, [esp + 8 + 8] // src_y1 - mov edi, [esp + 8 + 12] // src_y2 - mov edx, [esp + 8 + 16] // dst_sobelx - mov ecx, [esp + 8 + 20] // width - sub esi, eax - sub edi, eax - sub edx, eax - pxor xmm5, xmm5 // constant 0 - - align 4 - convertloop: - movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0] - movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2] - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - psubw xmm0, xmm1 - movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0] - movq xmm2, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2] - punpcklbw xmm1, xmm5 - punpcklbw xmm2, xmm5 - psubw xmm1, xmm2 - movq xmm2, qword ptr [eax + edi] // read 8 pixels from src_y2[0] - movq xmm3, qword ptr [eax + edi + 2] // read 8 pixels from src_y2[2] - punpcklbw xmm2, xmm5 - punpcklbw xmm3, xmm5 - psubw xmm2, xmm3 - paddw xmm0, xmm2 - paddw xmm0, xmm1 - paddw xmm0, xmm1 - pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw - psubw xmm1, xmm0 - pmaxsw xmm0, xmm1 - packuswb xmm0, xmm0 - sub ecx, 8 - movq qword ptr [eax + edx], xmm0 - lea eax, [eax + 8] - jg convertloop - - pop edi - pop esi - ret - } -} -#endif // HAS_SOBELXROW_SSE2 - -#ifdef HAS_SOBELYROW_SSE2 -// SobelY as a matrix is -// -1 -2 -1 -// 0 0 0 -// 1 2 1 -__declspec(naked) __declspec(align(16)) -void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1, - uint8* dst_sobely, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_y0 - mov esi, [esp + 4 + 8] // src_y1 - mov edx, [esp + 4 + 12] // dst_sobely - mov ecx, [esp + 4 + 16] // width - sub esi, eax - sub edx, eax - pxor xmm5, xmm5 // constant 0 - - align 4 - convertloop: - movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0] - movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0] - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - psubw xmm0, xmm1 - movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1] - movq xmm2, qword ptr [eax + esi + 1] // read 8 pixels from src_y1[1] - punpcklbw xmm1, xmm5 - punpcklbw xmm2, xmm5 - psubw xmm1, xmm2 - movq xmm2, qword ptr [eax + 2] // read 8 pixels from src_y0[2] - movq xmm3, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2] - punpcklbw xmm2, xmm5 - punpcklbw xmm3, xmm5 - psubw xmm2, xmm3 - paddw xmm0, xmm2 - paddw xmm0, xmm1 - paddw xmm0, xmm1 - pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw - psubw xmm1, xmm0 - pmaxsw xmm0, xmm1 - packuswb xmm0, xmm0 - sub ecx, 8 - movq qword ptr [eax + edx], xmm0 - lea eax, [eax + 8] - jg convertloop - - pop esi - ret - } -} -#endif // HAS_SOBELYROW_SSE2 - -#ifdef HAS_SOBELROW_SSE2 -// Adds Sobel X and Sobel Y and stores Sobel into ARGB. -// A = 255 -// R = Sobel -// G = Sobel -// B = Sobel -__declspec(naked) __declspec(align(16)) -void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_sobelx - mov esi, [esp + 4 + 8] // src_sobely - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - sub esi, eax - pcmpeqb xmm5, xmm5 // alpha 255 - pslld xmm5, 24 // 0xff000000 - - align 4 - convertloop: - movdqa xmm0, [eax] // read 16 pixels src_sobelx - movdqa xmm1, [eax + esi] // read 16 pixels src_sobely - lea eax, [eax + 16] - paddusb xmm0, xmm1 // sobel = sobelx + sobely - movdqa xmm2, xmm0 // GG - punpcklbw xmm2, xmm0 // First 8 - punpckhbw xmm0, xmm0 // Next 8 - movdqa xmm1, xmm2 // GGGG - punpcklwd xmm1, xmm2 // First 4 - punpckhwd xmm2, xmm2 // Next 4 - por xmm1, xmm5 // GGGA - por xmm2, xmm5 - movdqa xmm3, xmm0 // GGGG - punpcklwd xmm3, xmm0 // Next 4 - punpckhwd xmm0, xmm0 // Last 4 - por xmm3, xmm5 // GGGA - por xmm0, xmm5 - sub ecx, 16 - movdqa [edx], xmm1 - movdqa [edx + 16], xmm2 - movdqa [edx + 32], xmm3 - movdqa [edx + 48], xmm0 - lea edx, [edx + 64] - jg convertloop - - pop esi - ret - } -} -#endif // HAS_SOBELROW_SSE2 - -#ifdef HAS_SOBELTOPLANEROW_SSE2 -// Adds Sobel X and Sobel Y and stores Sobel into a plane. -__declspec(naked) __declspec(align(16)) -void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_y, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_sobelx - mov esi, [esp + 4 + 8] // src_sobely - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - sub esi, eax - - align 4 - convertloop: - movdqa xmm0, [eax] // read 16 pixels src_sobelx - movdqa xmm1, [eax + esi] // read 16 pixels src_sobely - lea eax, [eax + 16] - paddusb xmm0, xmm1 // sobel = sobelx + sobely - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg convertloop - - pop esi - ret - } -} -#endif // HAS_SOBELTOPLANEROW_SSE2 - -#ifdef HAS_SOBELXYROW_SSE2 -// Mixes Sobel X, Sobel Y and Sobel into ARGB. -// A = 255 -// R = Sobel X -// G = Sobel -// B = Sobel Y -__declspec(naked) __declspec(align(16)) -void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely, - uint8* dst_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_sobelx - mov esi, [esp + 4 + 8] // src_sobely - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // width - sub esi, eax - pcmpeqb xmm5, xmm5 // alpha 255 - - align 4 - convertloop: - movdqa xmm0, [eax] // read 16 pixels src_sobelx - movdqa xmm1, [eax + esi] // read 16 pixels src_sobely - lea eax, [eax + 16] - movdqa xmm2, xmm0 - paddusb xmm2, xmm1 // sobel = sobelx + sobely - movdqa xmm3, xmm0 // XA - punpcklbw xmm3, xmm5 - punpckhbw xmm0, xmm5 - movdqa xmm4, xmm1 // YS - punpcklbw xmm4, xmm2 - punpckhbw xmm1, xmm2 - movdqa xmm6, xmm4 // YSXA - punpcklwd xmm6, xmm3 // First 4 - punpckhwd xmm4, xmm3 // Next 4 - movdqa xmm7, xmm1 // YSXA - punpcklwd xmm7, xmm0 // Next 4 - punpckhwd xmm1, xmm0 // Last 4 - sub ecx, 16 - movdqa [edx], xmm6 - movdqa [edx + 16], xmm4 - movdqa [edx + 32], xmm7 - movdqa [edx + 48], xmm1 - lea edx, [edx + 64] - jg convertloop - - pop esi - ret - } -} -#endif // HAS_SOBELXYROW_SSE2 - -#ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 -// Consider float CumulativeSum. -// Consider calling CumulativeSum one row at time as needed. -// Consider circular CumulativeSum buffer of radius * 2 + 1 height. -// Convert cumulative sum for an area to an average for 1 pixel. -// topleft is pointer to top left of CumulativeSum buffer for area. -// botleft is pointer to bottom left of CumulativeSum buffer. -// width is offset from left to right of area in CumulativeSum buffer measured -// in number of ints. -// area is the number of pixels in the area being averaged. -// dst points to pixel to store result to. -// count is number of averaged pixels to produce. -// Does 4 pixels at a time, requires CumulativeSum pointers to be 16 byte -// aligned. -void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft, - int width, int area, uint8* dst, - int count) { - __asm { - mov eax, topleft // eax topleft - mov esi, botleft // esi botleft - mov edx, width - movd xmm5, area - mov edi, dst - mov ecx, count - cvtdq2ps xmm5, xmm5 - rcpss xmm4, xmm5 // 1.0f / area - pshufd xmm4, xmm4, 0 - sub ecx, 4 - jl l4b - - cmp area, 128 // 128 pixels will not overflow 15 bits. - ja l4 - - pshufd xmm5, xmm5, 0 // area - pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0 - psrld xmm6, 16 - cvtdq2ps xmm6, xmm6 - addps xmm5, xmm6 // (65536.0 + area - 1) - mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area - cvtps2dq xmm5, xmm5 // 0.16 fixed point - packssdw xmm5, xmm5 // 16 bit shorts - - // 4 pixel loop small blocks. - align 4 - s4: - // top left - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - - // - top right - psubd xmm0, [eax + edx * 4] - psubd xmm1, [eax + edx * 4 + 16] - psubd xmm2, [eax + edx * 4 + 32] - psubd xmm3, [eax + edx * 4 + 48] - lea eax, [eax + 64] - - // - bottom left - psubd xmm0, [esi] - psubd xmm1, [esi + 16] - psubd xmm2, [esi + 32] - psubd xmm3, [esi + 48] - - // + bottom right - paddd xmm0, [esi + edx * 4] - paddd xmm1, [esi + edx * 4 + 16] - paddd xmm2, [esi + edx * 4 + 32] - paddd xmm3, [esi + edx * 4 + 48] - lea esi, [esi + 64] - - packssdw xmm0, xmm1 // pack 4 pixels into 2 registers - packssdw xmm2, xmm3 - - pmulhuw xmm0, xmm5 - pmulhuw xmm2, xmm5 - - packuswb xmm0, xmm2 - movdqu [edi], xmm0 - lea edi, [edi + 16] - sub ecx, 4 - jge s4 - - jmp l4b - - // 4 pixel loop - align 4 - l4: - // top left - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + 32] - movdqa xmm3, [eax + 48] - - // - top right - psubd xmm0, [eax + edx * 4] - psubd xmm1, [eax + edx * 4 + 16] - psubd xmm2, [eax + edx * 4 + 32] - psubd xmm3, [eax + edx * 4 + 48] - lea eax, [eax + 64] - - // - bottom left - psubd xmm0, [esi] - psubd xmm1, [esi + 16] - psubd xmm2, [esi + 32] - psubd xmm3, [esi + 48] - - // + bottom right - paddd xmm0, [esi + edx * 4] - paddd xmm1, [esi + edx * 4 + 16] - paddd xmm2, [esi + edx * 4 + 32] - paddd xmm3, [esi + edx * 4 + 48] - lea esi, [esi + 64] - - cvtdq2ps xmm0, xmm0 // Average = Sum * 1 / Area - cvtdq2ps xmm1, xmm1 - mulps xmm0, xmm4 - mulps xmm1, xmm4 - cvtdq2ps xmm2, xmm2 - cvtdq2ps xmm3, xmm3 - mulps xmm2, xmm4 - mulps xmm3, xmm4 - cvtps2dq xmm0, xmm0 - cvtps2dq xmm1, xmm1 - cvtps2dq xmm2, xmm2 - cvtps2dq xmm3, xmm3 - packssdw xmm0, xmm1 - packssdw xmm2, xmm3 - packuswb xmm0, xmm2 - movdqu [edi], xmm0 - lea edi, [edi + 16] - sub ecx, 4 - jge l4 - - l4b: - add ecx, 4 - 1 - jl l1b - - // 1 pixel loop - align 4 - l1: - movdqa xmm0, [eax] - psubd xmm0, [eax + edx * 4] - lea eax, [eax + 16] - psubd xmm0, [esi] - paddd xmm0, [esi + edx * 4] - lea esi, [esi + 16] - cvtdq2ps xmm0, xmm0 - mulps xmm0, xmm4 - cvtps2dq xmm0, xmm0 - packssdw xmm0, xmm0 - packuswb xmm0, xmm0 - movd dword ptr [edi], xmm0 - lea edi, [edi + 4] - sub ecx, 1 - jge l1 - l1b: - } -} -#endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 - -#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 -// Creates a table of cumulative sums where each value is a sum of all values -// above and to the left of the value. -void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum, - const int32* previous_cumsum, int width) { - __asm { - mov eax, row - mov edx, cumsum - mov esi, previous_cumsum - mov ecx, width - pxor xmm0, xmm0 - pxor xmm1, xmm1 - - sub ecx, 4 - jl l4b - test edx, 15 - jne l4b - - // 4 pixel loop - align 4 - l4: - movdqu xmm2, [eax] // 4 argb pixels 16 bytes. - lea eax, [eax + 16] - movdqa xmm4, xmm2 - - punpcklbw xmm2, xmm1 - movdqa xmm3, xmm2 - punpcklwd xmm2, xmm1 - punpckhwd xmm3, xmm1 - - punpckhbw xmm4, xmm1 - movdqa xmm5, xmm4 - punpcklwd xmm4, xmm1 - punpckhwd xmm5, xmm1 - - paddd xmm0, xmm2 - movdqa xmm2, [esi] // previous row above. - paddd xmm2, xmm0 - - paddd xmm0, xmm3 - movdqa xmm3, [esi + 16] - paddd xmm3, xmm0 - - paddd xmm0, xmm4 - movdqa xmm4, [esi + 32] - paddd xmm4, xmm0 - - paddd xmm0, xmm5 - movdqa xmm5, [esi + 48] - lea esi, [esi + 64] - paddd xmm5, xmm0 - - movdqa [edx], xmm2 - movdqa [edx + 16], xmm3 - movdqa [edx + 32], xmm4 - movdqa [edx + 48], xmm5 - - lea edx, [edx + 64] - sub ecx, 4 - jge l4 - - l4b: - add ecx, 4 - 1 - jl l1b - - // 1 pixel loop - align 4 - l1: - movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes. - lea eax, [eax + 4] - punpcklbw xmm2, xmm1 - punpcklwd xmm2, xmm1 - paddd xmm0, xmm2 - movdqu xmm2, [esi] - lea esi, [esi + 16] - paddd xmm2, xmm0 - movdqu [edx], xmm2 - lea edx, [edx + 16] - sub ecx, 1 - jge l1 - - l1b: - } -} -#endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 - -#ifdef HAS_ARGBAFFINEROW_SSE2 -// Copy ARGB pixels from source image with slope to a row of destination. -__declspec(naked) __declspec(align(16)) -LIBYUV_API -void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, - uint8* dst_argb, const float* uv_dudv, int width) { - __asm { - push esi - push edi - mov eax, [esp + 12] // src_argb - mov esi, [esp + 16] // stride - mov edx, [esp + 20] // dst_argb - mov ecx, [esp + 24] // pointer to uv_dudv - movq xmm2, qword ptr [ecx] // uv - movq xmm7, qword ptr [ecx + 8] // dudv - mov ecx, [esp + 28] // width - shl esi, 16 // 4, stride - add esi, 4 - movd xmm5, esi - sub ecx, 4 - jl l4b - - // setup for 4 pixel loop - pshufd xmm7, xmm7, 0x44 // dup dudv - pshufd xmm5, xmm5, 0 // dup 4, stride - movdqa xmm0, xmm2 // x0, y0, x1, y1 - addps xmm0, xmm7 - movlhps xmm2, xmm0 - movdqa xmm4, xmm7 - addps xmm4, xmm4 // dudv *= 2 - movdqa xmm3, xmm2 // x2, y2, x3, y3 - addps xmm3, xmm4 - addps xmm4, xmm4 // dudv *= 4 - - // 4 pixel loop - align 4 - l4: - cvttps2dq xmm0, xmm2 // x, y float to int first 2 - cvttps2dq xmm1, xmm3 // x, y float to int next 2 - packssdw xmm0, xmm1 // x, y as 8 shorts - pmaddwd xmm0, xmm5 // offsets = x * 4 + y * stride. - movd esi, xmm0 - pshufd xmm0, xmm0, 0x39 // shift right - movd edi, xmm0 - pshufd xmm0, xmm0, 0x39 // shift right - movd xmm1, [eax + esi] // read pixel 0 - movd xmm6, [eax + edi] // read pixel 1 - punpckldq xmm1, xmm6 // combine pixel 0 and 1 - addps xmm2, xmm4 // x, y += dx, dy first 2 - movq qword ptr [edx], xmm1 - movd esi, xmm0 - pshufd xmm0, xmm0, 0x39 // shift right - movd edi, xmm0 - movd xmm6, [eax + esi] // read pixel 2 - movd xmm0, [eax + edi] // read pixel 3 - punpckldq xmm6, xmm0 // combine pixel 2 and 3 - addps xmm3, xmm4 // x, y += dx, dy next 2 - sub ecx, 4 - movq qword ptr 8[edx], xmm6 - lea edx, [edx + 16] - jge l4 - - l4b: - add ecx, 4 - 1 - jl l1b - - // 1 pixel loop - align 4 - l1: - cvttps2dq xmm0, xmm2 // x, y float to int - packssdw xmm0, xmm0 // x, y as shorts - pmaddwd xmm0, xmm5 // offset = x * 4 + y * stride - addps xmm2, xmm7 // x, y += dx, dy - movd esi, xmm0 - movd xmm0, [eax + esi] // copy a pixel - sub ecx, 1 - movd [edx], xmm0 - lea edx, [edx + 4] - jge l1 - l1b: - pop edi - pop esi - ret - } -} -#endif // HAS_ARGBAFFINEROW_SSE2 - -#ifdef HAS_INTERPOLATEROW_AVX2 -// Bilinear filter 16x2 -> 16x1 -__declspec(naked) __declspec(align(16)) -void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - __asm { - push esi - push edi - mov edi, [esp + 8 + 4] // dst_ptr - mov esi, [esp + 8 + 8] // src_ptr - mov edx, [esp + 8 + 12] // src_stride - mov ecx, [esp + 8 + 16] // dst_width - mov eax, [esp + 8 + 20] // source_y_fraction (0..255) - shr eax, 1 - // Dispatch to specialized filters if applicable. - cmp eax, 0 - je xloop100 // 0 / 128. Blend 100 / 0. - sub edi, esi - cmp eax, 32 - je xloop75 // 32 / 128 is 0.25. Blend 75 / 25. - cmp eax, 64 - je xloop50 // 64 / 128 is 0.50. Blend 50 / 50. - cmp eax, 96 - je xloop25 // 96 / 128 is 0.75. Blend 25 / 75. - - vmovd xmm0, eax // high fraction 0..127 - neg eax - add eax, 128 - vmovd xmm5, eax // low fraction 128..1 - vpunpcklbw xmm5, xmm5, xmm0 - vpunpcklwd xmm5, xmm5, xmm5 - vpxor ymm0, ymm0, ymm0 - vpermd ymm5, ymm0, ymm5 - - align 4 - xloop: - vmovdqu ymm0, [esi] - vmovdqu ymm2, [esi + edx] - vpunpckhbw ymm1, ymm0, ymm2 // mutates - vpunpcklbw ymm0, ymm0, ymm2 // mutates - vpmaddubsw ymm0, ymm0, ymm5 - vpmaddubsw ymm1, ymm1, ymm5 - vpsrlw ymm0, ymm0, 7 - vpsrlw ymm1, ymm1, 7 - vpackuswb ymm0, ymm0, ymm1 // unmutates - sub ecx, 32 - vmovdqu [esi + edi], ymm0 - lea esi, [esi + 32] - jg xloop - jmp xloop99 - - // Blend 25 / 75. - align 4 - xloop25: - vmovdqu ymm0, [esi] - vpavgb ymm0, ymm0, [esi + edx] - vpavgb ymm0, ymm0, [esi + edx] - sub ecx, 32 - vmovdqu [esi + edi], ymm0 - lea esi, [esi + 32] - jg xloop25 - jmp xloop99 - - // Blend 50 / 50. - align 4 - xloop50: - vmovdqu ymm0, [esi] - vpavgb ymm0, ymm0, [esi + edx] - sub ecx, 32 - vmovdqu [esi + edi], ymm0 - lea esi, [esi + 32] - jg xloop50 - jmp xloop99 - - // Blend 75 / 25. - align 4 - xloop75: - vmovdqu ymm0, [esi + edx] - vpavgb ymm0, ymm0, [esi] - vpavgb ymm0, ymm0, [esi] - sub ecx, 32 - vmovdqu [esi + edi], ymm0 - lea esi, [esi + 32] - jg xloop75 - jmp xloop99 - - // Blend 100 / 0 - Copy row unchanged. - align 4 - xloop100: - rep movsb - - xloop99: - pop edi - pop esi - vzeroupper - ret - } -} -#endif // HAS_INTERPOLATEROW_AVX2 - -#ifdef HAS_INTERPOLATEROW_SSSE3 -// Bilinear filter 16x2 -> 16x1 -__declspec(naked) __declspec(align(16)) -void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - __asm { - push esi - push edi - mov edi, [esp + 8 + 4] // dst_ptr - mov esi, [esp + 8 + 8] // src_ptr - mov edx, [esp + 8 + 12] // src_stride - mov ecx, [esp + 8 + 16] // dst_width - mov eax, [esp + 8 + 20] // source_y_fraction (0..255) - sub edi, esi - shr eax, 1 - // Dispatch to specialized filters if applicable. - cmp eax, 0 - je xloop100 // 0 / 128. Blend 100 / 0. - cmp eax, 32 - je xloop75 // 32 / 128 is 0.25. Blend 75 / 25. - cmp eax, 64 - je xloop50 // 64 / 128 is 0.50. Blend 50 / 50. - cmp eax, 96 - je xloop25 // 96 / 128 is 0.75. Blend 25 / 75. - - movd xmm0, eax // high fraction 0..127 - neg eax - add eax, 128 - movd xmm5, eax // low fraction 128..1 - punpcklbw xmm5, xmm0 - punpcklwd xmm5, xmm5 - pshufd xmm5, xmm5, 0 - - align 4 - xloop: - movdqa xmm0, [esi] - movdqa xmm2, [esi + edx] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm2 - punpckhbw xmm1, xmm2 - pmaddubsw xmm0, xmm5 - pmaddubsw xmm1, xmm5 - psrlw xmm0, 7 - psrlw xmm1, 7 - packuswb xmm0, xmm1 - sub ecx, 16 - movdqa [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop - jmp xloop99 - - // Blend 25 / 75. - align 4 - xloop25: - movdqa xmm0, [esi] - movdqa xmm1, [esi + edx] - pavgb xmm0, xmm1 - pavgb xmm0, xmm1 - sub ecx, 16 - movdqa [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop25 - jmp xloop99 - - // Blend 50 / 50. - align 4 - xloop50: - movdqa xmm0, [esi] - movdqa xmm1, [esi + edx] - pavgb xmm0, xmm1 - sub ecx, 16 - movdqa [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop50 - jmp xloop99 - - // Blend 75 / 25. - align 4 - xloop75: - movdqa xmm1, [esi] - movdqa xmm0, [esi + edx] - pavgb xmm0, xmm1 - pavgb xmm0, xmm1 - sub ecx, 16 - movdqa [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop75 - jmp xloop99 - - // Blend 100 / 0 - Copy row unchanged. - align 4 - xloop100: - movdqa xmm0, [esi] - sub ecx, 16 - movdqa [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop100 - - xloop99: - pop edi - pop esi - ret - } -} -#endif // HAS_INTERPOLATEROW_SSSE3 - -#ifdef HAS_INTERPOLATEROW_SSE2 -// Bilinear filter 16x2 -> 16x1 -__declspec(naked) __declspec(align(16)) -void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - __asm { - push esi - push edi - mov edi, [esp + 8 + 4] // dst_ptr - mov esi, [esp + 8 + 8] // src_ptr - mov edx, [esp + 8 + 12] // src_stride - mov ecx, [esp + 8 + 16] // dst_width - mov eax, [esp + 8 + 20] // source_y_fraction (0..255) - sub edi, esi - // Dispatch to specialized filters if applicable. - cmp eax, 0 - je xloop100 // 0 / 256. Blend 100 / 0. - cmp eax, 64 - je xloop75 // 64 / 256 is 0.25. Blend 75 / 25. - cmp eax, 128 - je xloop50 // 128 / 256 is 0.50. Blend 50 / 50. - cmp eax, 192 - je xloop25 // 192 / 256 is 0.75. Blend 25 / 75. - - movd xmm5, eax // xmm5 = y fraction - punpcklbw xmm5, xmm5 - psrlw xmm5, 1 - punpcklwd xmm5, xmm5 - punpckldq xmm5, xmm5 - punpcklqdq xmm5, xmm5 - pxor xmm4, xmm4 - - align 4 - xloop: - movdqa xmm0, [esi] // row0 - movdqa xmm2, [esi + edx] // row1 - movdqa xmm1, xmm0 - movdqa xmm3, xmm2 - punpcklbw xmm2, xmm4 - punpckhbw xmm3, xmm4 - punpcklbw xmm0, xmm4 - punpckhbw xmm1, xmm4 - psubw xmm2, xmm0 // row1 - row0 - psubw xmm3, xmm1 - paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16 - paddw xmm3, xmm3 - pmulhw xmm2, xmm5 // scale diff - pmulhw xmm3, xmm5 - paddw xmm0, xmm2 // sum rows - paddw xmm1, xmm3 - packuswb xmm0, xmm1 - sub ecx, 16 - movdqa [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop - jmp xloop99 - - // Blend 25 / 75. - align 4 - xloop25: - movdqa xmm0, [esi] - movdqa xmm1, [esi + edx] - pavgb xmm0, xmm1 - pavgb xmm0, xmm1 - sub ecx, 16 - movdqa [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop25 - jmp xloop99 - - // Blend 50 / 50. - align 4 - xloop50: - movdqa xmm0, [esi] - movdqa xmm1, [esi + edx] - pavgb xmm0, xmm1 - sub ecx, 16 - movdqa [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop50 - jmp xloop99 - - // Blend 75 / 25. - align 4 - xloop75: - movdqa xmm1, [esi] - movdqa xmm0, [esi + edx] - pavgb xmm0, xmm1 - pavgb xmm0, xmm1 - sub ecx, 16 - movdqa [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop75 - jmp xloop99 - - // Blend 100 / 0 - Copy row unchanged. - align 4 - xloop100: - movdqa xmm0, [esi] - sub ecx, 16 - movdqa [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop100 - - xloop99: - pop edi - pop esi - ret - } -} -#endif // HAS_INTERPOLATEROW_SSE2 - -// Bilinear filter 16x2 -> 16x1 -__declspec(naked) __declspec(align(16)) -void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - __asm { - push esi - push edi - mov edi, [esp + 8 + 4] // dst_ptr - mov esi, [esp + 8 + 8] // src_ptr - mov edx, [esp + 8 + 12] // src_stride - mov ecx, [esp + 8 + 16] // dst_width - mov eax, [esp + 8 + 20] // source_y_fraction (0..255) - sub edi, esi - shr eax, 1 - // Dispatch to specialized filters if applicable. - cmp eax, 0 - je xloop100 // 0 / 128. Blend 100 / 0. - cmp eax, 32 - je xloop75 // 32 / 128 is 0.25. Blend 75 / 25. - cmp eax, 64 - je xloop50 // 64 / 128 is 0.50. Blend 50 / 50. - cmp eax, 96 - je xloop25 // 96 / 128 is 0.75. Blend 25 / 75. - - movd xmm0, eax // high fraction 0..127 - neg eax - add eax, 128 - movd xmm5, eax // low fraction 128..1 - punpcklbw xmm5, xmm0 - punpcklwd xmm5, xmm5 - pshufd xmm5, xmm5, 0 - - align 4 - xloop: - movdqu xmm0, [esi] - movdqu xmm2, [esi + edx] - movdqu xmm1, xmm0 - punpcklbw xmm0, xmm2 - punpckhbw xmm1, xmm2 - pmaddubsw xmm0, xmm5 - pmaddubsw xmm1, xmm5 - psrlw xmm0, 7 - psrlw xmm1, 7 - packuswb xmm0, xmm1 - sub ecx, 16 - movdqu [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop - jmp xloop99 - - // Blend 25 / 75. - align 4 - xloop25: - movdqu xmm0, [esi] - movdqu xmm1, [esi + edx] - pavgb xmm0, xmm1 - pavgb xmm0, xmm1 - sub ecx, 16 - movdqu [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop25 - jmp xloop99 - - // Blend 50 / 50. - align 4 - xloop50: - movdqu xmm0, [esi] - movdqu xmm1, [esi + edx] - pavgb xmm0, xmm1 - sub ecx, 16 - movdqu [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop50 - jmp xloop99 - - // Blend 75 / 25. - align 4 - xloop75: - movdqu xmm1, [esi] - movdqu xmm0, [esi + edx] - pavgb xmm0, xmm1 - pavgb xmm0, xmm1 - sub ecx, 16 - movdqu [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop75 - jmp xloop99 - - // Blend 100 / 0 - Copy row unchanged. - align 4 - xloop100: - movdqu xmm0, [esi] - sub ecx, 16 - movdqu [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop100 - - xloop99: - pop edi - pop esi - ret - } -} - -#ifdef HAS_INTERPOLATEROW_SSE2 -// Bilinear filter 16x2 -> 16x1 -__declspec(naked) __declspec(align(16)) -void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, - int source_y_fraction) { - __asm { - push esi - push edi - mov edi, [esp + 8 + 4] // dst_ptr - mov esi, [esp + 8 + 8] // src_ptr - mov edx, [esp + 8 + 12] // src_stride - mov ecx, [esp + 8 + 16] // dst_width - mov eax, [esp + 8 + 20] // source_y_fraction (0..255) - sub edi, esi - // Dispatch to specialized filters if applicable. - cmp eax, 0 - je xloop100 // 0 / 256. Blend 100 / 0. - cmp eax, 64 - je xloop75 // 64 / 256 is 0.25. Blend 75 / 25. - cmp eax, 128 - je xloop50 // 128 / 256 is 0.50. Blend 50 / 50. - cmp eax, 192 - je xloop25 // 192 / 256 is 0.75. Blend 25 / 75. - - movd xmm5, eax // xmm5 = y fraction - punpcklbw xmm5, xmm5 - psrlw xmm5, 1 - punpcklwd xmm5, xmm5 - punpckldq xmm5, xmm5 - punpcklqdq xmm5, xmm5 - pxor xmm4, xmm4 - - align 4 - xloop: - movdqu xmm0, [esi] // row0 - movdqu xmm2, [esi + edx] // row1 - movdqu xmm1, xmm0 - movdqu xmm3, xmm2 - punpcklbw xmm2, xmm4 - punpckhbw xmm3, xmm4 - punpcklbw xmm0, xmm4 - punpckhbw xmm1, xmm4 - psubw xmm2, xmm0 // row1 - row0 - psubw xmm3, xmm1 - paddw xmm2, xmm2 // 9 bits * 15 bits = 8.16 - paddw xmm3, xmm3 - pmulhw xmm2, xmm5 // scale diff - pmulhw xmm3, xmm5 - paddw xmm0, xmm2 // sum rows - paddw xmm1, xmm3 - packuswb xmm0, xmm1 - sub ecx, 16 - movdqu [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop - jmp xloop99 - - // Blend 25 / 75. - align 4 - xloop25: - movdqu xmm0, [esi] - movdqu xmm1, [esi + edx] - pavgb xmm0, xmm1 - pavgb xmm0, xmm1 - sub ecx, 16 - movdqu [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop25 - jmp xloop99 - - // Blend 50 / 50. - align 4 - xloop50: - movdqu xmm0, [esi] - movdqu xmm1, [esi + edx] - pavgb xmm0, xmm1 - sub ecx, 16 - movdqu [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop50 - jmp xloop99 - - // Blend 75 / 25. - align 4 - xloop75: - movdqu xmm1, [esi] - movdqu xmm0, [esi + edx] - pavgb xmm0, xmm1 - pavgb xmm0, xmm1 - sub ecx, 16 - movdqu [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop75 - jmp xloop99 - - // Blend 100 / 0 - Copy row unchanged. - align 4 - xloop100: - movdqu xmm0, [esi] - sub ecx, 16 - movdqu [esi + edi], xmm0 - lea esi, [esi + 16] - jg xloop100 - - xloop99: - pop edi - pop esi - ret - } -} -#endif // HAS_INTERPOLATEROW_SSE2 - -__declspec(naked) __declspec(align(16)) -void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // src_uv_stride - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - sub edi, eax - - align 4 - convertloop: - movdqa xmm0, [eax] - pavgb xmm0, [eax + edx] - sub ecx, 16 - movdqa [eax + edi], xmm0 - lea eax, [eax + 16] - jg convertloop - pop edi - ret - } -} - -#ifdef HAS_HALFROW_AVX2 -__declspec(naked) __declspec(align(16)) -void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride, - uint8* dst_uv, int pix) { - __asm { - push edi - mov eax, [esp + 4 + 4] // src_uv - mov edx, [esp + 4 + 8] // src_uv_stride - mov edi, [esp + 4 + 12] // dst_v - mov ecx, [esp + 4 + 16] // pix - sub edi, eax - - align 4 - convertloop: - vmovdqu ymm0, [eax] - vpavgb ymm0, ymm0, [eax + edx] - sub ecx, 32 - vmovdqu [eax + edi], ymm0 - lea eax, [eax + 32] - jg convertloop - - pop edi - vzeroupper - ret - } -} -#endif // HAS_HALFROW_AVX2 - -__declspec(naked) __declspec(align(16)) -void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_bayer - movd xmm5, [esp + 12] // selector - mov ecx, [esp + 16] // pix - pshufd xmm5, xmm5, 0 - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - pshufb xmm0, xmm5 - pshufb xmm1, xmm5 - punpckldq xmm0, xmm1 - sub ecx, 8 - movq qword ptr [edx], xmm0 - lea edx, [edx + 8] - jg wloop - ret - } -} - -// Specialized ARGB to Bayer that just isolates G channel. -__declspec(naked) __declspec(align(16)) -void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer, - uint32 selector, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_bayer - // selector - mov ecx, [esp + 16] // pix - pcmpeqb xmm5, xmm5 // generate mask 0x000000ff - psrld xmm5, 24 - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - psrld xmm0, 8 // Move green to bottom. - psrld xmm1, 8 - pand xmm0, xmm5 - pand xmm1, xmm5 - packssdw xmm0, xmm1 - packuswb xmm0, xmm1 - sub ecx, 8 - movq qword ptr [edx], xmm0 - lea edx, [edx + 8] - jg wloop - ret - } -} - -// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. -__declspec(naked) __declspec(align(16)) -void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // shuffler - movdqa xmm5, [ecx] - mov ecx, [esp + 16] // pix - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - pshufb xmm0, xmm5 - pshufb xmm1, xmm5 - sub ecx, 8 - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - jg wloop - ret - } -} - -__declspec(naked) __declspec(align(16)) -void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // shuffler - movdqa xmm5, [ecx] - mov ecx, [esp + 16] // pix - - align 4 - wloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - lea eax, [eax + 32] - pshufb xmm0, xmm5 - pshufb xmm1, xmm5 - sub ecx, 8 - movdqu [edx], xmm0 - movdqu [edx + 16], xmm1 - lea edx, [edx + 32] - jg wloop - ret - } -} - -#ifdef HAS_ARGBSHUFFLEROW_AVX2 -__declspec(naked) __declspec(align(16)) -void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix) { - __asm { - mov eax, [esp + 4] // src_argb - mov edx, [esp + 8] // dst_argb - mov ecx, [esp + 12] // shuffler - vbroadcastf128 ymm5, [ecx] // same shuffle in high as low. - mov ecx, [esp + 16] // pix - - align 4 - wloop: - vmovdqu ymm0, [eax] - vmovdqu ymm1, [eax + 32] - lea eax, [eax + 64] - vpshufb ymm0, ymm0, ymm5 - vpshufb ymm1, ymm1, ymm5 - sub ecx, 16 - vmovdqu [edx], ymm0 - vmovdqu [edx + 32], ymm1 - lea edx, [edx + 64] - jg wloop - - vzeroupper - ret - } -} -#endif // HAS_ARGBSHUFFLEROW_AVX2 - -__declspec(naked) __declspec(align(16)) -void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, - const uint8* shuffler, int pix) { - __asm { - push ebx - push esi - mov eax, [esp + 8 + 4] // src_argb - mov edx, [esp + 8 + 8] // dst_argb - mov esi, [esp + 8 + 12] // shuffler - mov ecx, [esp + 8 + 16] // pix - pxor xmm5, xmm5 - - mov ebx, [esi] // shuffler - cmp ebx, 0x03000102 - je shuf_3012 - cmp ebx, 0x00010203 - je shuf_0123 - cmp ebx, 0x00030201 - je shuf_0321 - cmp ebx, 0x02010003 - je shuf_2103 - - // TODO(fbarchard): Use one source pointer and 3 offsets. - shuf_any1: - movzx ebx, byte ptr [esi] - movzx ebx, byte ptr [eax + ebx] - mov [edx], bl - movzx ebx, byte ptr [esi + 1] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 1], bl - movzx ebx, byte ptr [esi + 2] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 2], bl - movzx ebx, byte ptr [esi + 3] - movzx ebx, byte ptr [eax + ebx] - mov [edx + 3], bl - lea eax, [eax + 4] - lea edx, [edx + 4] - sub ecx, 1 - jg shuf_any1 - jmp shuf99 - - align 4 - shuf_0123: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 01Bh // 1B = 00011011 = 0x0123 = BGRAToARGB - pshuflw xmm0, xmm0, 01Bh - pshufhw xmm1, xmm1, 01Bh - pshuflw xmm1, xmm1, 01Bh - packuswb xmm0, xmm1 - sub ecx, 4 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg shuf_0123 - jmp shuf99 - - align 4 - shuf_0321: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 039h // 39 = 00111001 = 0x0321 = RGBAToARGB - pshuflw xmm0, xmm0, 039h - pshufhw xmm1, xmm1, 039h - pshuflw xmm1, xmm1, 039h - packuswb xmm0, xmm1 - sub ecx, 4 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg shuf_0321 - jmp shuf99 - - align 4 - shuf_2103: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 093h // 93 = 10010011 = 0x2103 = ARGBToRGBA - pshuflw xmm0, xmm0, 093h - pshufhw xmm1, xmm1, 093h - pshuflw xmm1, xmm1, 093h - packuswb xmm0, xmm1 - sub ecx, 4 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg shuf_2103 - jmp shuf99 - - align 4 - shuf_3012: - movdqu xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm5 - punpckhbw xmm1, xmm5 - pshufhw xmm0, xmm0, 0C6h // C6 = 11000110 = 0x3012 = ABGRToARGB - pshuflw xmm0, xmm0, 0C6h - pshufhw xmm1, xmm1, 0C6h - pshuflw xmm1, xmm1, 0C6h - packuswb xmm0, xmm1 - sub ecx, 4 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg shuf_3012 - - shuf99: - pop esi - pop ebx - ret - } -} - -// YUY2 - Macro-pixel = 2 image pixels -// Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4.... - -// UYVY - Macro-pixel = 2 image pixels -// U0Y0V0Y1 - -__declspec(naked) __declspec(align(16)) -void I422ToYUY2Row_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_y - mov esi, [esp + 8 + 8] // src_u - mov edx, [esp + 8 + 12] // src_v - mov edi, [esp + 8 + 16] // dst_frame - mov ecx, [esp + 8 + 20] // width - sub edx, esi - - align 4 - convertloop: - movq xmm2, qword ptr [esi] // U - movq xmm3, qword ptr [esi + edx] // V - lea esi, [esi + 8] - punpcklbw xmm2, xmm3 // UV - movdqu xmm0, [eax] // Y - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm2 // YUYV - punpckhbw xmm1, xmm2 - movdqu [edi], xmm0 - movdqu [edi + 16], xmm1 - lea edi, [edi + 32] - sub ecx, 16 - jg convertloop - - pop edi - pop esi - ret - } -} - -__declspec(naked) __declspec(align(16)) -void I422ToUYVYRow_SSE2(const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_frame, int width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_y - mov esi, [esp + 8 + 8] // src_u - mov edx, [esp + 8 + 12] // src_v - mov edi, [esp + 8 + 16] // dst_frame - mov ecx, [esp + 8 + 20] // width - sub edx, esi - - align 4 - convertloop: - movq xmm2, qword ptr [esi] // U - movq xmm3, qword ptr [esi + edx] // V - lea esi, [esi + 8] - punpcklbw xmm2, xmm3 // UV - movdqu xmm0, [eax] // Y - movdqa xmm1, xmm2 - lea eax, [eax + 16] - punpcklbw xmm1, xmm0 // UYVY - punpckhbw xmm2, xmm0 - movdqu [edi], xmm1 - movdqu [edi + 16], xmm2 - lea edi, [edi + 32] - sub ecx, 16 - jg convertloop - - pop edi - pop esi - ret - } -} - -#ifdef HAS_ARGBPOLYNOMIALROW_SSE2 -__declspec(naked) __declspec(align(16)) -void ARGBPolynomialRow_SSE2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] /* src_argb */ - mov edx, [esp + 4 + 8] /* dst_argb */ - mov esi, [esp + 4 + 12] /* poly */ - mov ecx, [esp + 4 + 16] /* width */ - pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints. - - // 2 pixel loop. - align 4 - convertloop: -// pmovzxbd xmm0, dword ptr [eax] // BGRA pixel -// pmovzxbd xmm4, dword ptr [eax + 4] // BGRA pixel - movq xmm0, qword ptr [eax] // BGRABGRA - lea eax, [eax + 8] - punpcklbw xmm0, xmm3 - movdqa xmm4, xmm0 - punpcklwd xmm0, xmm3 // pixel 0 - punpckhwd xmm4, xmm3 // pixel 1 - cvtdq2ps xmm0, xmm0 // 4 floats - cvtdq2ps xmm4, xmm4 - movdqa xmm1, xmm0 // X - movdqa xmm5, xmm4 - mulps xmm0, [esi + 16] // C1 * X - mulps xmm4, [esi + 16] - addps xmm0, [esi] // result = C0 + C1 * X - addps xmm4, [esi] - movdqa xmm2, xmm1 - movdqa xmm6, xmm5 - mulps xmm2, xmm1 // X * X - mulps xmm6, xmm5 - mulps xmm1, xmm2 // X * X * X - mulps xmm5, xmm6 - mulps xmm2, [esi + 32] // C2 * X * X - mulps xmm6, [esi + 32] - mulps xmm1, [esi + 48] // C3 * X * X * X - mulps xmm5, [esi + 48] - addps xmm0, xmm2 // result += C2 * X * X - addps xmm4, xmm6 - addps xmm0, xmm1 // result += C3 * X * X * X - addps xmm4, xmm5 - cvttps2dq xmm0, xmm0 - cvttps2dq xmm4, xmm4 - packuswb xmm0, xmm4 - packuswb xmm0, xmm0 - sub ecx, 2 - movq qword ptr [edx], xmm0 - lea edx, [edx + 8] - jg convertloop - pop esi - ret - } -} -#endif // HAS_ARGBPOLYNOMIALROW_SSE2 - -#ifdef HAS_ARGBPOLYNOMIALROW_AVX2 -__declspec(naked) __declspec(align(16)) -void ARGBPolynomialRow_AVX2(const uint8* src_argb, - uint8* dst_argb, const float* poly, - int width) { - __asm { - mov eax, [esp + 4] /* src_argb */ - mov edx, [esp + 8] /* dst_argb */ - mov ecx, [esp + 12] /* poly */ - vbroadcastf128 ymm4, [ecx] // C0 - vbroadcastf128 ymm5, [ecx + 16] // C1 - vbroadcastf128 ymm6, [ecx + 32] // C2 - vbroadcastf128 ymm7, [ecx + 48] // C3 - mov ecx, [esp + 16] /* width */ - - // 2 pixel loop. - align 4 - convertloop: - vpmovzxbd ymm0, qword ptr [eax] // 2 BGRA pixels - lea eax, [eax + 8] - vcvtdq2ps ymm0, ymm0 // X 8 floats - vmulps ymm2, ymm0, ymm0 // X * X - vmulps ymm3, ymm0, ymm7 // C3 * X - vfmadd132ps ymm0, ymm4, ymm5 // result = C0 + C1 * X - vfmadd231ps ymm0, ymm2, ymm6 // result += C2 * X * X - vfmadd231ps ymm0, ymm2, ymm3 // result += C3 * X * X * X - vcvttps2dq ymm0, ymm0 - vpackusdw ymm0, ymm0, ymm0 // b0g0r0a0_00000000_b0g0r0a0_00000000 - vpermq ymm0, ymm0, 0xd8 // b0g0r0a0_b0g0r0a0_00000000_00000000 - vpackuswb xmm0, xmm0, xmm0 // bgrabgra_00000000_00000000_00000000 - sub ecx, 2 - vmovq qword ptr [edx], xmm0 - lea edx, [edx + 8] - jg convertloop - vzeroupper - ret - } -} -#endif // HAS_ARGBPOLYNOMIALROW_AVX2 - -#ifdef HAS_ARGBCOLORTABLEROW_X86 -// Tranform ARGB pixels with color table. -__declspec(naked) __declspec(align(16)) -void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, - int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] /* dst_argb */ - mov esi, [esp + 4 + 8] /* table_argb */ - mov ecx, [esp + 4 + 12] /* width */ - - // 1 pixel loop. - align 4 - convertloop: - movzx edx, byte ptr [eax] - lea eax, [eax + 4] - movzx edx, byte ptr [esi + edx * 4] - mov byte ptr [eax - 4], dl - movzx edx, byte ptr [eax - 4 + 1] - movzx edx, byte ptr [esi + edx * 4 + 1] - mov byte ptr [eax - 4 + 1], dl - movzx edx, byte ptr [eax - 4 + 2] - movzx edx, byte ptr [esi + edx * 4 + 2] - mov byte ptr [eax - 4 + 2], dl - movzx edx, byte ptr [eax - 4 + 3] - movzx edx, byte ptr [esi + edx * 4 + 3] - mov byte ptr [eax - 4 + 3], dl - dec ecx - jg convertloop - pop esi - ret - } -} -#endif // HAS_ARGBCOLORTABLEROW_X86 - -#ifdef HAS_RGBCOLORTABLEROW_X86 -// Tranform RGB pixels with color table. -__declspec(naked) __declspec(align(16)) -void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { - __asm { - push esi - mov eax, [esp + 4 + 4] /* dst_argb */ - mov esi, [esp + 4 + 8] /* table_argb */ - mov ecx, [esp + 4 + 12] /* width */ - - // 1 pixel loop. - align 4 - convertloop: - movzx edx, byte ptr [eax] - lea eax, [eax + 4] - movzx edx, byte ptr [esi + edx * 4] - mov byte ptr [eax - 4], dl - movzx edx, byte ptr [eax - 4 + 1] - movzx edx, byte ptr [esi + edx * 4 + 1] - mov byte ptr [eax - 4 + 1], dl - movzx edx, byte ptr [eax - 4 + 2] - movzx edx, byte ptr [esi + edx * 4 + 2] - mov byte ptr [eax - 4 + 2], dl - dec ecx - jg convertloop - - pop esi - ret - } -} -#endif // HAS_RGBCOLORTABLEROW_X86 - -#ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 -// Tranform RGB pixels with luma table. -__declspec(naked) __declspec(align(16)) -void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, - int width, - const uint8* luma, uint32 lumacoeff) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] /* src_argb */ - mov edi, [esp + 8 + 8] /* dst_argb */ - mov ecx, [esp + 8 + 12] /* width */ - movd xmm2, dword ptr [esp + 8 + 16] // luma table - movd xmm3, dword ptr [esp + 8 + 20] // lumacoeff - pshufd xmm2, xmm2, 0 - pshufd xmm3, xmm3, 0 - pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00 - psllw xmm4, 8 - pxor xmm5, xmm5 - - // 4 pixel loop. - align 4 - convertloop: - movdqu xmm0, qword ptr [eax] // generate luma ptr - pmaddubsw xmm0, xmm3 - phaddw xmm0, xmm0 - pand xmm0, xmm4 // mask out low bits - punpcklwd xmm0, xmm5 - paddd xmm0, xmm2 // add table base - movd esi, xmm0 - pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32 - - movzx edx, byte ptr [eax] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi], dl - movzx edx, byte ptr [eax + 1] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 1], dl - movzx edx, byte ptr [eax + 2] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 2], dl - movzx edx, byte ptr [eax + 3] // copy alpha. - mov byte ptr [edi + 3], dl - - movd esi, xmm0 - pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32 - - movzx edx, byte ptr [eax + 4] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 4], dl - movzx edx, byte ptr [eax + 5] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 5], dl - movzx edx, byte ptr [eax + 6] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 6], dl - movzx edx, byte ptr [eax + 7] // copy alpha. - mov byte ptr [edi + 7], dl - - movd esi, xmm0 - pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32 - - movzx edx, byte ptr [eax + 8] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 8], dl - movzx edx, byte ptr [eax + 9] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 9], dl - movzx edx, byte ptr [eax + 10] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 10], dl - movzx edx, byte ptr [eax + 11] // copy alpha. - mov byte ptr [edi + 11], dl - - movd esi, xmm0 - - movzx edx, byte ptr [eax + 12] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 12], dl - movzx edx, byte ptr [eax + 13] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 13], dl - movzx edx, byte ptr [eax + 14] - movzx edx, byte ptr [esi + edx] - mov byte ptr [edi + 14], dl - movzx edx, byte ptr [eax + 15] // copy alpha. - mov byte ptr [edi + 15], dl - - sub ecx, 4 - lea eax, [eax + 16] - lea edi, [edi + 16] - jg convertloop - - pop edi - pop esi - ret - } -} -#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 - -#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/row_x86.asm b/drivers/theoraplayer/src/YUV/libyuv/src/row_x86.asm deleted file mode 100755 index 0cb326f8e5..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/row_x86.asm +++ /dev/null @@ -1,146 +0,0 @@ -; -; Copyright 2012 The LibYuv Project Authors. All rights reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%ifdef __YASM_VERSION_ID__ -%if __YASM_VERSION_ID__ < 01020000h -%error AVX2 is supported only by yasm 1.2.0 or later. -%endif -%endif -%include "x86inc.asm" - -SECTION .text - -; cglobal numeric constants are parameters, gpr regs, mm regs - -; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) - -%macro YUY2TOYROW 2-3 -cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix -%ifidn %1,YUY2 - pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff - psrlw m2, m2, 8 -%endif - - ALIGN 4 -.convertloop: - mov%2 m0, [src_yuy2q] - mov%2 m1, [src_yuy2q + mmsize] - lea src_yuy2q, [src_yuy2q + mmsize * 2] -%ifidn %1,YUY2 - pand m0, m0, m2 ; YUY2 even bytes are Y - pand m1, m1, m2 -%else - psrlw m0, m0, 8 ; UYVY odd bytes are Y - psrlw m1, m1, 8 -%endif - packuswb m0, m0, m1 -%if cpuflag(AVX2) - vpermq m0, m0, 0xd8 -%endif - sub pixd, mmsize - mov%2 [dst_yq], m0 - lea dst_yq, [dst_yq + mmsize] - jg .convertloop - REP_RET -%endmacro - -; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version. -INIT_MMX MMX -YUY2TOYROW YUY2,a, -YUY2TOYROW YUY2,u,_Unaligned -YUY2TOYROW UYVY,a, -YUY2TOYROW UYVY,u,_Unaligned -INIT_XMM SSE2 -YUY2TOYROW YUY2,a, -YUY2TOYROW YUY2,u,_Unaligned -YUY2TOYROW UYVY,a, -YUY2TOYROW UYVY,u,_Unaligned -INIT_YMM AVX2 -YUY2TOYROW YUY2,a, -YUY2TOYROW UYVY,a, - -; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) - -%macro SplitUVRow 1-2 -cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix - pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff - psrlw m4, m4, 8 - sub dst_vq, dst_uq - - ALIGN 4 -.convertloop: - mov%1 m0, [src_uvq] - mov%1 m1, [src_uvq + mmsize] - lea src_uvq, [src_uvq + mmsize * 2] - psrlw m2, m0, 8 ; odd bytes - psrlw m3, m1, 8 - pand m0, m0, m4 ; even bytes - pand m1, m1, m4 - packuswb m0, m0, m1 - packuswb m2, m2, m3 -%if cpuflag(AVX2) - vpermq m0, m0, 0xd8 - vpermq m2, m2, 0xd8 -%endif - mov%1 [dst_uq], m0 - mov%1 [dst_uq + dst_vq], m2 - lea dst_uq, [dst_uq + mmsize] - sub pixd, mmsize - jg .convertloop - REP_RET -%endmacro - -INIT_MMX MMX -SplitUVRow a, -SplitUVRow u,_Unaligned -INIT_XMM SSE2 -SplitUVRow a, -SplitUVRow u,_Unaligned -INIT_YMM AVX2 -SplitUVRow a, - -; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv, -; int width); - -%macro MergeUVRow_ 1-2 -cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix - sub src_vq, src_uq - - ALIGN 4 -.convertloop: - mov%1 m0, [src_uq] - mov%1 m1, [src_vq] - lea src_uq, [src_uq + mmsize] - punpcklbw m2, m0, m1 // first 8 UV pairs - punpckhbw m0, m0, m1 // next 8 UV pairs -%if cpuflag(AVX2) - vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0 - vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0 - mov%1 [dst_uvq], m1 - mov%1 [dst_uvq + mmsize], m2 -%else - mov%1 [dst_uvq], m2 - mov%1 [dst_uvq + mmsize], m0 -%endif - lea dst_uvq, [dst_uvq + mmsize * 2] - sub pixd, mmsize - jg .convertloop - REP_RET -%endmacro - -INIT_MMX MMX -MergeUVRow_ a, -MergeUVRow_ u,_Unaligned -INIT_XMM SSE2 -MergeUVRow_ a, -MergeUVRow_ u,_Unaligned -INIT_YMM AVX2 -MergeUVRow_ a, - diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale.cc deleted file mode 100755 index b3893cc00c..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/scale.cc +++ /dev/null @@ -1,926 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/scale.h" - -#include <assert.h> -#include <string.h> - -#include "libyuv/cpu_id.h" -#include "libyuv/planar_functions.h" // For CopyPlane -#include "libyuv/row.h" -#include "libyuv/scale_row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// Remove this macro if OVERREAD is safe. -#define AVOID_OVERREAD 1 - -static __inline int Abs(int v) { - return v >= 0 ? v : -v; -} - -#define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) - -// Scale plane, 1/2 -// This is an optimized version for scaling down a plane to 1/2 of -// its original size. - -static void ScalePlaneDown2(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, - enum FilterMode filtering) { - int y; - void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) = - filtering == kFilterNone ? ScaleRowDown2_C : - (filtering == kFilterLinear ? ScaleRowDown2Linear_C : - ScaleRowDown2Box_C); - int row_stride = src_stride << 1; - if (!filtering) { - src_ptr += src_stride; // Point to odd rows. - src_stride = 0; - } - -#if defined(HAS_SCALEROWDOWN2_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { - ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON; - } -#elif defined(HAS_SCALEROWDOWN2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { - ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 : - (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 : - ScaleRowDown2Box_Unaligned_SSE2); - if (IS_ALIGNED(src_ptr, 16) && - IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) && - IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 : - (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 : - ScaleRowDown2Box_SSE2); - } - } -#elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && - IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown2 = filtering ? - ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2; - } -#endif - - if (filtering == kFilterLinear) { - src_stride = 0; - } - // TODO(fbarchard): Loop through source height to allow odd height. - for (y = 0; y < dst_height; ++y) { - ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); - src_ptr += row_stride; - dst_ptr += dst_stride; - } -} - -// Scale plane, 1/4 -// This is an optimized version for scaling down a plane to 1/4 of -// its original size. - -static void ScalePlaneDown4(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, - enum FilterMode filtering) { - int y; - void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) = - filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; - int row_stride = src_stride << 2; - if (!filtering) { - src_ptr += src_stride * 2; // Point to row 2. - src_stride = 0; - } -#if defined(HAS_SCALEROWDOWN4_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { - ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; - } -#elif defined(HAS_SCALEROWDOWN4_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && - IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) && - IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { - ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; - } -#elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - ScaleRowDown4 = filtering ? - ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2; - } -#endif - - if (filtering == kFilterLinear) { - src_stride = 0; - } - for (y = 0; y < dst_height; ++y) { - ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); - src_ptr += row_stride; - dst_ptr += dst_stride; - } -} - -// Scale plane down, 3/4 - -static void ScalePlaneDown34(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, - enum FilterMode filtering) { - int y; - void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; - assert(dst_width % 3 == 0); - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_C; - ScaleRowDown34_1 = ScaleRowDown34_C; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_C; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_C; - } -#if defined(HAS_SCALEROWDOWN34_NEON) - if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_NEON; - ScaleRowDown34_1 = ScaleRowDown34_NEON; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; - } - } -#endif -#if defined(HAS_SCALEROWDOWN34_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && - IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_SSSE3; - ScaleRowDown34_1 = ScaleRowDown34_SSSE3; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; - } - } -#endif -#if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2; - } else { - ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2; - ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2; - } - } -#endif - - for (y = 0; y < dst_height - 2; y += 3) { - ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); - src_ptr += src_stride; - dst_ptr += dst_stride; - ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); - src_ptr += src_stride; - dst_ptr += dst_stride; - ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, - dst_ptr, dst_width); - src_ptr += src_stride * 2; - dst_ptr += dst_stride; - } - - // Remainder 1 or 2 rows with last row vertically unfiltered - if ((dst_height % 3) == 2) { - ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); - src_ptr += src_stride; - dst_ptr += dst_stride; - ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); - } else if ((dst_height % 3) == 1) { - ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); - } -} - - -// Scale plane, 3/8 -// This is an optimized version for scaling down a plane to 3/8 -// of its original size. -// -// Uses box filter arranges like this -// aaabbbcc -> abc -// aaabbbcc def -// aaabbbcc ghi -// dddeeeff -// dddeeeff -// dddeeeff -// ggghhhii -// ggghhhii -// Boxes are 3x3, 2x3, 3x2 and 2x2 - -static void ScalePlaneDown38(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, - enum FilterMode filtering) { - int y; - void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width); - const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; - assert(dst_width % 3 == 0); - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_C; - ScaleRowDown38_2 = ScaleRowDown38_C; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_C; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_C; - } -#if defined(HAS_SCALEROWDOWN38_NEON) - if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_NEON; - ScaleRowDown38_2 = ScaleRowDown38_NEON; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; - } - } -#elif defined(HAS_SCALEROWDOWN38_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && - IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_SSSE3; - ScaleRowDown38_2 = ScaleRowDown38_SSSE3; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3; - } - } -#elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && - IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { - if (!filtering) { - ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2; - } else { - ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2; - ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2; - } - } -#endif - - for (y = 0; y < dst_height - 2; y += 3) { - ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); - src_ptr += src_stride * 3; - dst_ptr += dst_stride; - ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); - src_ptr += src_stride * 3; - dst_ptr += dst_stride; - ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); - src_ptr += src_stride * 2; - dst_ptr += dst_stride; - } - - // Remainder 1 or 2 rows with last row vertically unfiltered - if ((dst_height % 3) == 2) { - ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); - src_ptr += src_stride * 3; - dst_ptr += dst_stride; - ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); - } else if ((dst_height % 3) == 1) { - ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); - } -} - -static __inline uint32 SumBox(int iboxwidth, int iboxheight, - ptrdiff_t src_stride, const uint8* src_ptr) { - uint32 sum = 0u; - int y; - assert(iboxwidth > 0); - assert(iboxheight > 0); - for (y = 0; y < iboxheight; ++y) { - int x; - for (x = 0; x < iboxwidth; ++x) { - sum += src_ptr[x]; - } - src_ptr += src_stride; - } - return sum; -} - -static void ScalePlaneBoxRow_C(int dst_width, int boxheight, - int x, int dx, ptrdiff_t src_stride, - const uint8* src_ptr, uint8* dst_ptr) { - int i; - int boxwidth; - for (i = 0; i < dst_width; ++i) { - int ix = x >> 16; - x += dx; - boxwidth = (x >> 16) - ix; - *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) / - (boxwidth * boxheight); - } -} - -static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { - uint32 sum = 0u; - int x; - assert(iboxwidth > 0); - for (x = 0; x < iboxwidth; ++x) { - sum += src_ptr[x]; - } - return sum; -} - -static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, - const uint16* src_ptr, uint8* dst_ptr) { - int i; - int scaletbl[2]; - int minboxwidth = (dx >> 16); - int* scaleptr = scaletbl - minboxwidth; - int boxwidth; - scaletbl[0] = 65536 / (minboxwidth * boxheight); - scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); - for (i = 0; i < dst_width; ++i) { - int ix = x >> 16; - x += dx; - boxwidth = (x >> 16) - ix; - *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16; - } -} - -static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, - const uint16* src_ptr, uint8* dst_ptr) { - int boxwidth = (dx >> 16); - int scaleval = 65536 / (boxwidth * boxheight); - int i; - for (i = 0; i < dst_width; ++i) { - *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; - x += boxwidth; - } -} - -// Scale plane down to any dimensions, with interpolation. -// (boxfilter). -// -// Same method as SimpleScale, which is fixed point, outputting -// one pixel of destination using fixed point (16.16) to step -// through source, sampling a box of pixel with simple -// averaging. -static void ScalePlaneBox(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr) { - int j; - // Initial source x/y coordinate and step values as 16.16 fixed point. - int x = 0; - int y = 0; - int dx = 0; - int dy = 0; - const int max_y = (src_height << 16); - ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, - &x, &y, &dx, &dy); - src_width = Abs(src_width); - // TODO(fbarchard): Remove this and make AddRows handle boxheight 1. - if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { - uint8* dst = dst_ptr; - int j; - for (j = 0; j < dst_height; ++j) { - int boxheight; - int iy = y >> 16; - const uint8* src = src_ptr + iy * src_stride; - y += dy; - if (y > max_y) { - y = max_y; - } - boxheight = (y >> 16) - iy; - ScalePlaneBoxRow_C(dst_width, boxheight, - x, dx, src_stride, - src, dst); - dst += dst_stride; - } - return; - } - { - // Allocate a row buffer of uint16. - align_buffer_64(row16, src_width * 2); - void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, - const uint16* src_ptr, uint8* dst_ptr) = - (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C; - void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C; - -#if defined(HAS_SCALEADDROWS_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && -#ifdef AVOID_OVERREAD - IS_ALIGNED(src_width, 16) && -#endif - IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { - ScaleAddRows = ScaleAddRows_SSE2; - } -#endif - - for (j = 0; j < dst_height; ++j) { - int boxheight; - int iy = y >> 16; - const uint8* src = src_ptr + iy * src_stride; - y += dy; - if (y > (src_height << 16)) { - y = (src_height << 16); - } - boxheight = (y >> 16) - iy; - ScaleAddRows(src, src_stride, (uint16*)(row16), - src_width, boxheight); - ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), - dst_ptr); - dst_ptr += dst_stride; - } - free_aligned_buffer_64(row16); - } -} - -// Scale plane down with bilinear interpolation. -void ScalePlaneBilinearDown(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, - enum FilterMode filtering) { - // Initial source x/y coordinate and step values as 16.16 fixed point. - int x = 0; - int y = 0; - int dx = 0; - int dy = 0; - // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. - // Allocate a row buffer. - align_buffer_64(row, src_width); - - const int max_y = (src_height - 1) << 16; - int j; - void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) = - (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; - void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, - &x, &y, &dx, &dy); - src_width = Abs(src_width); - -#if defined(HAS_INTERPOLATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) { - InterpolateRow = InterpolateRow_Any_SSE2; - if (IS_ALIGNED(src_width, 16)) { - InterpolateRow = InterpolateRow_Unaligned_SSE2; - if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { - InterpolateRow = InterpolateRow_SSE2; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) { - InterpolateRow = InterpolateRow_Any_SSSE3; - if (IS_ALIGNED(src_width, 16)) { - InterpolateRow = InterpolateRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { - InterpolateRow = InterpolateRow_SSSE3; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) { - InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(src_width, 32)) { - InterpolateRow = InterpolateRow_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) { - InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(src_width, 16)) { - InterpolateRow = InterpolateRow_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) { - InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; - if (IS_ALIGNED(src_width, 4)) { - InterpolateRow = InterpolateRow_MIPS_DSPR2; - } - } -#endif - - -#if defined(HAS_SCALEFILTERCOLS_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { - ScaleFilterCols = ScaleFilterCols_SSSE3; - } -#endif - if (y > max_y) { - y = max_y; - } - - for (j = 0; j < dst_height; ++j) { - int yi = y >> 16; - const uint8* src = src_ptr + yi * src_stride; - if (filtering == kFilterLinear) { - ScaleFilterCols(dst_ptr, src, dst_width, x, dx); - } else { - int yf = (y >> 8) & 255; - InterpolateRow(row, src, src_stride, src_width, yf); - ScaleFilterCols(dst_ptr, row, dst_width, x, dx); - } - dst_ptr += dst_stride; - y += dy; - if (y > max_y) { - y = max_y; - } - } - free_aligned_buffer_64(row); -} - -// Scale up down with bilinear interpolation. -void ScalePlaneBilinearUp(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr, - enum FilterMode filtering) { - int j; - // Initial source x/y coordinate and step values as 16.16 fixed point. - int x = 0; - int y = 0; - int dx = 0; - int dy = 0; - const int max_y = (src_height - 1) << 16; - void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) = - filtering ? ScaleFilterCols_C : ScaleCols_C; - ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, - &x, &y, &dx, &dy); - src_width = Abs(src_width); - -#if defined(HAS_INTERPOLATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) { - InterpolateRow = InterpolateRow_Any_SSE2; - if (IS_ALIGNED(dst_width, 16)) { - InterpolateRow = InterpolateRow_Unaligned_SSE2; - if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { - InterpolateRow = InterpolateRow_SSE2; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) { - InterpolateRow = InterpolateRow_Any_SSSE3; - if (IS_ALIGNED(dst_width, 16)) { - InterpolateRow = InterpolateRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { - InterpolateRow = InterpolateRow_SSSE3; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) { - InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(dst_width, 32)) { - InterpolateRow = InterpolateRow_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) { - InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(dst_width, 16)) { - InterpolateRow = InterpolateRow_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) { - InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_MIPS_DSPR2; - } - } -#endif - - if (filtering && src_width >= 32768) { - ScaleFilterCols = ScaleFilterCols64_C; - } -#if defined(HAS_SCALEFILTERCOLS_SSSE3) - if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { - ScaleFilterCols = ScaleFilterCols_SSSE3; - } -#endif - if (!filtering && src_width * 2 == dst_width && x < 0x8000) { - ScaleFilterCols = ScaleColsUp2_C; -#if defined(HAS_SCALECOLS_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && - IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleFilterCols = ScaleColsUp2_SSE2; - } -#endif - } - - if (y > max_y) { - y = max_y; - } - { - int yi = y >> 16; - const uint8* src = src_ptr + yi * src_stride; - - // Allocate 2 row buffers. - const int kRowSize = (dst_width + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - uint8* rowptr = row; - int rowstride = kRowSize; - int lasty = yi; - - ScaleFilterCols(rowptr, src, dst_width, x, dx); - if (src_height > 1) { - src += src_stride; - } - ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); - src += src_stride; - - for (j = 0; j < dst_height; ++j) { - yi = y >> 16; - if (yi != lasty) { - if (y > max_y) { - y = max_y; - yi = y >> 16; - src = src_ptr + yi * src_stride; - } - if (yi != lasty) { - ScaleFilterCols(rowptr, src, dst_width, x, dx); - rowptr += rowstride; - rowstride = -rowstride; - lasty = yi; - src += src_stride; - } - } - if (filtering == kFilterLinear) { - InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); - } else { - int yf = (y >> 8) & 255; - InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); - } - dst_ptr += dst_stride; - y += dy; - } - free_aligned_buffer_64(row); - } -} - -// Scale Plane to/from any dimensions, without interpolation. -// Fixed point math is used for performance: The upper 16 bits -// of x and dx is the integer part of the source position and -// the lower 16 bits are the fixed decimal part. - -static void ScalePlaneSimple(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_ptr, uint8* dst_ptr) { - int i; - void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) = ScaleCols_C; - // Initial source x/y coordinate and step values as 16.16 fixed point. - int x = 0; - int y = 0; - int dx = 0; - int dy = 0; - ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, - &x, &y, &dx, &dy); - src_width = Abs(src_width); - - if (src_width * 2 == dst_width && x < 0x8000) { - ScaleCols = ScaleColsUp2_C; -#if defined(HAS_SCALECOLS_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && - IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleCols = ScaleColsUp2_SSE2; - } -#endif - } - - for (i = 0; i < dst_height; ++i) { - ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, - dst_width, x, dx); - dst_ptr += dst_stride; - y += dy; - } -} - -// Scale a plane. -// This function dispatches to a specialized scaler based on scale factor. - -LIBYUV_API -void ScalePlane(const uint8* src, int src_stride, - int src_width, int src_height, - uint8* dst, int dst_stride, - int dst_width, int dst_height, - enum FilterMode filtering) { - // Simplify filtering when possible. - filtering = ScaleFilterReduce(src_width, src_height, - dst_width, dst_height, - filtering); - - // Negative height means invert the image. - if (src_height < 0) { - src_height = -src_height; - src = src + (src_height - 1) * src_stride; - src_stride = -src_stride; - } - - // Use specialized scales to improve performance for common resolutions. - // For example, all the 1/2 scalings will use ScalePlaneDown2() - if (dst_width == src_width && dst_height == src_height) { - // Straight copy. - CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); - return; - } - if (dst_width == src_width) { - int dy = FixedDiv(src_height, dst_height); - // Arbitrary scale vertically, but unscaled vertically. - ScalePlaneVertical(src_height, - dst_width, dst_height, - src_stride, dst_stride, src, dst, - 0, 0, dy, 1, filtering); - return; - } - if (dst_width <= Abs(src_width) && dst_height <= src_height) { - // Scale down. - if (4 * dst_width == 3 * src_width && - 4 * dst_height == 3 * src_height) { - // optimized, 3/4 - ScalePlaneDown34(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; - } - if (2 * dst_width == src_width && 2 * dst_height == src_height) { - // optimized, 1/2 - ScalePlaneDown2(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; - } - // 3/8 rounded up for odd sized chroma height. - if (8 * dst_width == 3 * src_width && - dst_height == ((src_height * 3 + 7) / 8)) { - // optimized, 3/8 - ScalePlaneDown38(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; - } - if (4 * dst_width == src_width && 4 * dst_height == src_height && - filtering != kFilterBilinear) { - // optimized, 1/4 - ScalePlaneDown4(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; - } - } - if (filtering == kFilterBox && dst_height * 2 < src_height) { - ScalePlaneBox(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst); - return; - } - if (filtering && dst_height > src_height) { - ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; - } - if (filtering) { - ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst, filtering); - return; - } - ScalePlaneSimple(src_width, src_height, dst_width, dst_height, - src_stride, dst_stride, src, dst); -} - -// Scale an I420 image. -// This function in turn calls a scaling function for each plane. - -LIBYUV_API -int I420Scale(const uint8* src_y, int src_stride_y, - const uint8* src_u, int src_stride_u, - const uint8* src_v, int src_stride_v, - int src_width, int src_height, - uint8* dst_y, int dst_stride_y, - uint8* dst_u, int dst_stride_u, - uint8* dst_v, int dst_stride_v, - int dst_width, int dst_height, - enum FilterMode filtering) { - int src_halfwidth = SUBSAMPLE(src_width, 1, 1); - int src_halfheight = SUBSAMPLE(src_height, 1, 1); - int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); - int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); - if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || - !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { - return -1; - } - - ScalePlane(src_y, src_stride_y, src_width, src_height, - dst_y, dst_stride_y, dst_width, dst_height, - filtering); - ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, - dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, - filtering); - ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, - dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, - filtering); - return 0; -} - -// Deprecated api -LIBYUV_API -int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, - int src_stride_y, int src_stride_u, int src_stride_v, - int src_width, int src_height, - uint8* dst_y, uint8* dst_u, uint8* dst_v, - int dst_stride_y, int dst_stride_u, int dst_stride_v, - int dst_width, int dst_height, - LIBYUV_BOOL interpolate) { - return I420Scale(src_y, src_stride_y, - src_u, src_stride_u, - src_v, src_stride_v, - src_width, src_height, - dst_y, dst_stride_y, - dst_u, dst_stride_u, - dst_v, dst_stride_v, - dst_width, dst_height, - interpolate ? kFilterBox : kFilterNone); -} - -// Deprecated api -LIBYUV_API -int ScaleOffset(const uint8* src, int src_width, int src_height, - uint8* dst, int dst_width, int dst_height, int dst_yoffset, - LIBYUV_BOOL interpolate) { - // Chroma requires offset to multiple of 2. - int dst_yoffset_even = dst_yoffset & ~1; - int src_halfwidth = SUBSAMPLE(src_width, 1, 1); - int src_halfheight = SUBSAMPLE(src_height, 1, 1); - int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); - int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); - int aheight = dst_height - dst_yoffset_even * 2; // actual output height - const uint8* src_y = src; - const uint8* src_u = src + src_width * src_height; - const uint8* src_v = src + src_width * src_height + - src_halfwidth * src_halfheight; - uint8* dst_y = dst + dst_yoffset_even * dst_width; - uint8* dst_u = dst + dst_width * dst_height + - (dst_yoffset_even >> 1) * dst_halfwidth; - uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight + - (dst_yoffset_even >> 1) * dst_halfwidth; - if (!src || src_width <= 0 || src_height <= 0 || - !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 || - dst_yoffset_even >= dst_height) { - return -1; - } - return I420Scale(src_y, src_width, - src_u, src_halfwidth, - src_v, src_halfwidth, - src_width, src_height, - dst_y, dst_width, - dst_u, dst_halfwidth, - dst_v, dst_halfwidth, - dst_width, aheight, - interpolate ? kFilterBox : kFilterNone); -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb.cc deleted file mode 100755 index e339cd7c79..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb.cc +++ /dev/null @@ -1,809 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/scale.h" - -#include <assert.h> -#include <string.h> - -#include "libyuv/cpu_id.h" -#include "libyuv/planar_functions.h" // For CopyARGB -#include "libyuv/row.h" -#include "libyuv/scale_row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -static __inline int Abs(int v) { - return v >= 0 ? v : -v; -} - -// ScaleARGB ARGB, 1/2 -// This is an optimized version for scaling down a ARGB to 1/2 of -// its original size. -static void ScaleARGBDown2(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy, - enum FilterMode filtering) { - int j; - int row_stride = src_stride * (dy >> 16); - void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) = - filtering == kFilterNone ? ScaleARGBRowDown2_C : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C : - ScaleARGBRowDown2Box_C); - assert(dx == 65536 * 2); // Test scale factor of 2. - assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. - // Advance to odd row, even column. - if (filtering == kFilterBilinear) { - src_argb += (y >> 16) * src_stride + (x >> 16) * 4; - } else { - src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4; - } - -#if defined(HAS_SCALEARGBROWDOWN2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 : - (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 : - ScaleARGBRowDown2Box_SSE2); - } -#elif defined(HAS_SCALEARGBROWDOWN2_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && - IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) { - ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON : - ScaleARGBRowDown2_NEON; - } -#endif - - if (filtering == kFilterLinear) { - src_stride = 0; - } - for (j = 0; j < dst_height; ++j) { - ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width); - src_argb += row_stride; - dst_argb += dst_stride; - } -} - -// ScaleARGB ARGB, 1/4 -// This is an optimized version for scaling down a ARGB to 1/4 of -// its original size. -static void ScaleARGBDown4Box(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy) { - int j; - // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 2 * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - int row_stride = src_stride * (dy >> 16); - void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; - // Advance to odd row, even column. - src_argb += (y >> 16) * src_stride + (x >> 16) * 4; - assert(dx == 65536 * 4); // Test scale factor of 4. - assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. -#if defined(HAS_SCALEARGBROWDOWN2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; - } -#elif defined(HAS_SCALEARGBROWDOWN2_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) && - IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) { - ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON; - } -#endif - for (j = 0; j < dst_height; ++j) { - ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); - ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, - row + kRowSize, dst_width * 2); - ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); - src_argb += row_stride; - dst_argb += dst_stride; - } - free_aligned_buffer_64(row); -} - -// ScaleARGB ARGB Even -// This is an optimized version for scaling down a ARGB to even -// multiple of its original size. -static void ScaleARGBDownEven(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy, - enum FilterMode filtering) { - int j; - int col_step = dx >> 16; - int row_stride = (dy >> 16) * src_stride; - void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride, - int src_step, uint8* dst_argb, int dst_width) = - filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; - assert(IS_ALIGNED(src_width, 2)); - assert(IS_ALIGNED(src_height, 2)); - src_argb += (y >> 16) * src_stride + (x >> 16) * 4; -#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : - ScaleARGBRowDownEven_SSE2; - } -#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON) - if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) && - IS_ALIGNED(src_argb, 4)) { - ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : - ScaleARGBRowDownEven_NEON; - } -#endif - - if (filtering == kFilterLinear) { - src_stride = 0; - } - for (j = 0; j < dst_height; ++j) { - ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width); - src_argb += row_stride; - dst_argb += dst_stride; - } -} - -// Scale ARGB down with bilinear interpolation. -static void ScaleARGBBilinearDown(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy, - enum FilterMode filtering) { - int j; - void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = - (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; - int64 xlast = x + (int64)(dst_width - 1) * dx; - int64 xl = (dx >= 0) ? x : xlast; - int64 xr = (dx >= 0) ? xlast : x; - int clip_src_width; - xl = (xl >> 16) & ~3; // Left edge aligned. - xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels. - xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel. - if (xr > src_width) { - xr = src_width; - } - clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4. - src_argb += xl * 4; - x -= (int)(xl << 16); -#if defined(HAS_INTERPOLATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) { - InterpolateRow = InterpolateRow_Any_SSE2; - if (IS_ALIGNED(clip_src_width, 16)) { - InterpolateRow = InterpolateRow_Unaligned_SSE2; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { - InterpolateRow = InterpolateRow_SSE2; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) { - InterpolateRow = InterpolateRow_Any_SSSE3; - if (IS_ALIGNED(clip_src_width, 16)) { - InterpolateRow = InterpolateRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) { - InterpolateRow = InterpolateRow_SSSE3; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) { - InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(clip_src_width, 32)) { - InterpolateRow = InterpolateRow_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) { - InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(clip_src_width, 16)) { - InterpolateRow = InterpolateRow_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 && - IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) { - InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; - if (IS_ALIGNED(clip_src_width, 4)) { - InterpolateRow = InterpolateRow_MIPS_DSPR2; - } - } -#endif -#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { - ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; - } -#endif - // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. - // Allocate a row of ARGB. - { - align_buffer_64(row, clip_src_width * 4); - - const int max_y = (src_height - 1) << 16; - if (y > max_y) { - y = max_y; - } - for (j = 0; j < dst_height; ++j) { - int yi = y >> 16; - const uint8* src = src_argb + yi * src_stride; - if (filtering == kFilterLinear) { - ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); - } else { - int yf = (y >> 8) & 255; - InterpolateRow(row, src, src_stride, clip_src_width, yf); - ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); - } - dst_argb += dst_stride; - y += dy; - if (y > max_y) { - y = max_y; - } - } - free_aligned_buffer_64(row); - } -} - -// Scale ARGB up with bilinear interpolation. -static void ScaleARGBBilinearUp(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy, - enum FilterMode filtering) { - int j; - void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = - filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; - const int max_y = (src_height - 1) << 16; -#if defined(HAS_INTERPOLATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) { - InterpolateRow = InterpolateRow_Any_SSE2; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_Unaligned_SSE2; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - InterpolateRow = InterpolateRow_SSE2; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { - InterpolateRow = InterpolateRow_Any_SSSE3; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - InterpolateRow = InterpolateRow_SSSE3; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) { - InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(dst_width, 8)) { - InterpolateRow = InterpolateRow_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { - InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_MIPS_DSPR2; - } -#endif - if (src_width >= 32768) { - ScaleARGBFilterCols = filtering ? - ScaleARGBFilterCols64_C : ScaleARGBCols64_C; - } -#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) - if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { - ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; - } -#endif -#if defined(HAS_SCALEARGBCOLS_SSE2) - if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { - ScaleARGBFilterCols = ScaleARGBCols_SSE2; - } -#endif - if (!filtering && src_width * 2 == dst_width && x < 0x8000) { - ScaleARGBFilterCols = ScaleARGBColsUp2_C; -#if defined(HAS_SCALEARGBCOLSUP2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; - } -#endif - } - - if (y > max_y) { - y = max_y; - } - - { - int yi = y >> 16; - const uint8* src = src_argb + yi * src_stride; - - // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - uint8* rowptr = row; - int rowstride = kRowSize; - int lasty = yi; - - ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); - if (src_height > 1) { - src += src_stride; - } - ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx); - src += src_stride; - - for (j = 0; j < dst_height; ++j) { - yi = y >> 16; - if (yi != lasty) { - if (y > max_y) { - y = max_y; - yi = y >> 16; - src = src_argb + yi * src_stride; - } - if (yi != lasty) { - ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); - rowptr += rowstride; - rowstride = -rowstride; - lasty = yi; - src += src_stride; - } - } - if (filtering == kFilterLinear) { - InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); - } else { - int yf = (y >> 8) & 255; - InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); - } - dst_argb += dst_stride; - y += dy; - } - free_aligned_buffer_64(row); - } -} - -#ifdef YUVSCALEUP -// Scale YUV to ARGB up with bilinear interpolation. -static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride_y, - int src_stride_u, - int src_stride_v, - int dst_stride_argb, - const uint8* src_y, - const uint8* src_u, - const uint8* src_v, - uint8* dst_argb, - int x, int dx, int y, int dy, - enum FilterMode filtering) { - int j; - void (*I422ToARGBRow)(const uint8* y_buf, - const uint8* u_buf, - const uint8* v_buf, - uint8* rgb_buf, - int width) = I422ToARGBRow_C; -#if defined(HAS_I422TOARGBROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) { - I422ToARGBRow = I422ToARGBRow_Any_SSSE3; - if (IS_ALIGNED(src_width, 8)) { - I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - I422ToARGBRow = I422ToARGBRow_SSSE3; - } - } - } -#endif -#if defined(HAS_I422TOARGBROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) { - I422ToARGBRow = I422ToARGBRow_Any_AVX2; - if (IS_ALIGNED(src_width, 16)) { - I422ToARGBRow = I422ToARGBRow_AVX2; - } - } -#endif -#if defined(HAS_I422TOARGBROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) { - I422ToARGBRow = I422ToARGBRow_Any_NEON; - if (IS_ALIGNED(src_width, 8)) { - I422ToARGBRow = I422ToARGBRow_NEON; - } - } -#endif -#if defined(HAS_I422TOARGBROW_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) && - IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && - IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && - IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2; - } -#endif - - void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; -#if defined(HAS_INTERPOLATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) { - InterpolateRow = InterpolateRow_Any_SSE2; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_Unaligned_SSE2; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - InterpolateRow = InterpolateRow_SSE2; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) { - InterpolateRow = InterpolateRow_Any_SSSE3; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_Unaligned_SSSE3; - if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { - InterpolateRow = InterpolateRow_SSSE3; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) { - InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(dst_width, 8)) { - InterpolateRow = InterpolateRow_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) { - InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(dst_width, 4)) { - InterpolateRow = InterpolateRow_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { - InterpolateRow = InterpolateRow_MIPS_DSPR2; - } -#endif - - void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = - filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; - if (src_width >= 32768) { - ScaleARGBFilterCols = filtering ? - ScaleARGBFilterCols64_C : ScaleARGBCols64_C; - } -#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) - if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { - ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; - } -#endif -#if defined(HAS_SCALEARGBCOLS_SSE2) - if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { - ScaleARGBFilterCols = ScaleARGBCols_SSE2; - } -#endif - if (!filtering && src_width * 2 == dst_width && x < 0x8000) { - ScaleARGBFilterCols = ScaleARGBColsUp2_C; -#if defined(HAS_SCALEARGBCOLSUP2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; - } -#endif - } - - const int max_y = (src_height - 1) << 16; - if (y > max_y) { - y = max_y; - } - const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. - int yi = y >> 16; - int uv_yi = yi >> kYShift; - const uint8* src_row_y = src_y + yi * src_stride_y; - const uint8* src_row_u = src_u + uv_yi * src_stride_u; - const uint8* src_row_v = src_v + uv_yi * src_stride_v; - - // Allocate 2 rows of ARGB. - const int kRowSize = (dst_width * 4 + 15) & ~15; - align_buffer_64(row, kRowSize * 2); - - // Allocate 1 row of ARGB for source conversion. - align_buffer_64(argb_row, src_width * 4); - - uint8* rowptr = row; - int rowstride = kRowSize; - int lasty = yi; - - // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. - ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); - if (src_height > 1) { - src_row_y += src_stride_y; - if (yi & 1) { - src_row_u += src_stride_u; - src_row_v += src_stride_v; - } - } - ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx); - if (src_height > 2) { - src_row_y += src_stride_y; - if (!(yi & 1)) { - src_row_u += src_stride_u; - src_row_v += src_stride_v; - } - } - - for (j = 0; j < dst_height; ++j) { - yi = y >> 16; - if (yi != lasty) { - if (y > max_y) { - y = max_y; - yi = y >> 16; - uv_yi = yi >> kYShift; - src_row_y = src_y + yi * src_stride_y; - src_row_u = src_u + uv_yi * src_stride_u; - src_row_v = src_v + uv_yi * src_stride_v; - } - if (yi != lasty) { - // TODO(fbarchard): Convert the clipped region of row. - I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width); - ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx); - rowptr += rowstride; - rowstride = -rowstride; - lasty = yi; - src_row_y += src_stride_y; - if (yi & 1) { - src_row_u += src_stride_u; - src_row_v += src_stride_v; - } - } - } - if (filtering == kFilterLinear) { - InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); - } else { - int yf = (y >> 8) & 255; - InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); - } - dst_argb += dst_stride_argb; - y += dy; - } - free_aligned_buffer_64(row); - free_aligned_buffer_64(row_argb); -} -#endif - -// Scale ARGB to/from any dimensions, without interpolation. -// Fixed point math is used for performance: The upper 16 bits -// of x and dx is the integer part of the source position and -// the lower 16 bits are the fixed decimal part. - -static void ScaleARGBSimple(int src_width, int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int dx, int y, int dy) { - int j; - void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) = - (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; -#if defined(HAS_SCALEARGBCOLS_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { - ScaleARGBCols = ScaleARGBCols_SSE2; - } -#endif - if (src_width * 2 == dst_width && x < 0x8000) { - ScaleARGBCols = ScaleARGBColsUp2_C; -#if defined(HAS_SCALEARGBCOLSUP2_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && - IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - ScaleARGBCols = ScaleARGBColsUp2_SSE2; - } -#endif - } - - for (j = 0; j < dst_height; ++j) { - ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, - dst_width, x, dx); - dst_argb += dst_stride; - y += dy; - } -} - -// ScaleARGB a ARGB. -// This function in turn calls a scaling function -// suitable for handling the desired resolutions. -static void ScaleARGB(const uint8* src, int src_stride, - int src_width, int src_height, - uint8* dst, int dst_stride, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, - enum FilterMode filtering) { - // Initial source x/y coordinate and step values as 16.16 fixed point. - int x = 0; - int y = 0; - int dx = 0; - int dy = 0; - // ARGB does not support box filter yet, but allow the user to pass it. - // Simplify filtering when possible. - filtering = ScaleFilterReduce(src_width, src_height, - dst_width, dst_height, - filtering); - - // Negative src_height means invert the image. - if (src_height < 0) { - src_height = -src_height; - src = src + (src_height - 1) * src_stride; - src_stride = -src_stride; - } - ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, - &x, &y, &dx, &dy); - src_width = Abs(src_width); - if (clip_x) { - int64 clipf = (int64)(clip_x) * dx; - x += (clipf & 0xffff); - src += (clipf >> 16) * 4; - dst += clip_x * 4; - } - if (clip_y) { - int64 clipf = (int64)(clip_y) * dy; - y += (clipf & 0xffff); - src += (clipf >> 16) * src_stride; - dst += clip_y * dst_stride; - } - - // Special case for integer step values. - if (((dx | dy) & 0xffff) == 0) { - if (!dx || !dy) { // 1 pixel wide and/or tall. - filtering = kFilterNone; - } else { - // Optimized even scale down. ie 2, 4, 6, 8, 10x. - if (!(dx & 0x10000) && !(dy & 0x10000)) { - if (dx == 0x20000) { - // Optimized 1/2 downsample. - ScaleARGBDown2(src_width, src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy, filtering); - return; - } - if (dx == 0x40000 && filtering == kFilterBox) { - // Optimized 1/4 box downsample. - ScaleARGBDown4Box(src_width, src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy); - return; - } - ScaleARGBDownEven(src_width, src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy, filtering); - return; - } - // Optimized odd scale down. ie 3, 5, 7, 9x. - if ((dx & 0x10000) && (dy & 0x10000)) { - filtering = kFilterNone; - if (dx == 0x10000 && dy == 0x10000) { - // Straight copy. - ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride, - dst, dst_stride, clip_width, clip_height); - return; - } - } - } - } - if (dx == 0x10000 && (x & 0xffff) == 0) { - // Arbitrary scale vertically, but unscaled vertically. - ScalePlaneVertical(src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, y, dy, 4, filtering); - return; - } - if (filtering && dy < 65536) { - ScaleARGBBilinearUp(src_width, src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy, filtering); - return; - } - if (filtering) { - ScaleARGBBilinearDown(src_width, src_height, - clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy, filtering); - return; - } - ScaleARGBSimple(src_width, src_height, clip_width, clip_height, - src_stride, dst_stride, src, dst, - x, dx, y, dy); -} - -LIBYUV_API -int ARGBScaleClip(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - int clip_x, int clip_y, int clip_width, int clip_height, - enum FilterMode filtering) { - if (!src_argb || src_width == 0 || src_height == 0 || - !dst_argb || dst_width <= 0 || dst_height <= 0 || - clip_x < 0 || clip_y < 0 || - (clip_x + clip_width) > dst_width || - (clip_y + clip_height) > dst_height) { - return -1; - } - ScaleARGB(src_argb, src_stride_argb, src_width, src_height, - dst_argb, dst_stride_argb, dst_width, dst_height, - clip_x, clip_y, clip_width, clip_height, filtering); - return 0; -} - -// Scale an ARGB image. -LIBYUV_API -int ARGBScale(const uint8* src_argb, int src_stride_argb, - int src_width, int src_height, - uint8* dst_argb, int dst_stride_argb, - int dst_width, int dst_height, - enum FilterMode filtering) { - if (!src_argb || src_width == 0 || src_height == 0 || - !dst_argb || dst_width <= 0 || dst_height <= 0) { - return -1; - } - ScaleARGB(src_argb, src_stride_argb, src_width, src_height, - dst_argb, dst_stride_argb, dst_width, dst_height, - 0, 0, dst_width, dst_height, filtering); - return 0; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb_neon.cc deleted file mode 100755 index c0b5433239..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_argb_neon.cc +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for GCC Neon -#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) - -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t /* src_stride */, - uint8* dst, int dst_width) { - asm volatile ( -#ifdef _ANDROID
- ".fpu neon\n"
-#endif - "1: \n" - // load even pixels into q0, odd into q1 - "vld2.32 {q0, q1}, [%0]! \n" - "vld2.32 {q2, q3}, [%0]! \n" - "subs %2, %2, #8 \n" // 8 processed per loop - "vst1.8 {q1}, [%1]! \n" // store odd pixels - "vst1.8 {q3}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List - ); -} - -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - // change the stride to row 2 pointer - "add %1, %1, %0 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. - "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels. - "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels. - "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts. - "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts. - "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack - "vrshrn.u16 d1, q1, #2 \n" - "vrshrn.u16 d2, q2, #2 \n" - "vrshrn.u16 d3, q3, #2 \n" - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(src_stride), // %1 - "+r"(dst), // %2 - "+r"(dst_width) // %3 - : - : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" - ); -} - -// Reads 4 pixels at a time. -// Alignment requirement: src_argb 4 byte aligned. -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t, int src_stepx, - uint8* dst_argb, int dst_width) { - asm volatile ( - "mov r12, %3, lsl #2 \n" - ".p2align 2 \n" - "1: \n" - "vld1.32 {d0[0]}, [%0], r12 \n" - "vld1.32 {d0[1]}, [%0], r12 \n" - "vld1.32 {d1[0]}, [%0], r12 \n" - "vld1.32 {d1[1]}, [%0], r12 \n" - "subs %2, %2, #4 \n" // 4 pixels per loop. - "vst1.8 {q0}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : "r"(src_stepx) // %3 - : "memory", "cc", "r12", "q0" - ); -} - -// Reads 4 pixels at a time. -// Alignment requirement: src_argb 4 byte aligned. -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width) { - asm volatile ( - "mov r12, %4, lsl #2 \n" - "add %1, %1, %0 \n" - ".p2align 2 \n" - "1: \n" - "vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1 - "vld1.8 {d1}, [%1], r12 \n" - "vld1.8 {d2}, [%0], r12 \n" - "vld1.8 {d3}, [%1], r12 \n" - "vld1.8 {d4}, [%0], r12 \n" - "vld1.8 {d5}, [%1], r12 \n" - "vld1.8 {d6}, [%0], r12 \n" - "vld1.8 {d7}, [%1], r12 \n" - "vaddl.u8 q0, d0, d1 \n" - "vaddl.u8 q1, d2, d3 \n" - "vaddl.u8 q2, d4, d5 \n" - "vaddl.u8 q3, d6, d7 \n" - "vswp.8 d1, d2 \n" // ab_cd -> ac_bd - "vswp.8 d5, d6 \n" // ef_gh -> eg_fh - "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d) - "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h) - "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels. - "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels. - "subs %3, %3, #4 \n" // 4 pixels per loop. - "vst1.8 {q0}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stride), // %1 - "+r"(dst_argb), // %2 - "+r"(dst_width) // %3 - : "r"(src_stepx) // %4 - : "memory", "cc", "r12", "q0", "q1", "q2", "q3" - ); -} -#endif // __ARM_NEON__ - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_common.cc deleted file mode 100644 index 6ed8bfaf97..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_common.cc +++ /dev/null @@ -1,772 +0,0 @@ -/* - * Copyright 2013 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/scale.h" - -#include <assert.h> -#include <string.h> - -#include "libyuv/cpu_id.h" -#include "libyuv/planar_functions.h" // For CopyARGB -#include "libyuv/row.h" -#include "libyuv/scale_row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -static __inline int Abs(int v) { - return v >= 0 ? v : -v; -} - -// CPU agnostic row functions -void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - int x; - for (x = 0; x < dst_width - 1; x += 2) { - dst[0] = src_ptr[1]; - dst[1] = src_ptr[3]; - dst += 2; - src_ptr += 4; - } - if (dst_width & 1) { - dst[0] = src_ptr[1]; - } -} - -void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - const uint8* s = src_ptr; - int x; - for (x = 0; x < dst_width - 1; x += 2) { - dst[0] = (s[0] + s[1] + 1) >> 1; - dst[1] = (s[2] + s[3] + 1) >> 1; - dst += 2; - s += 4; - } - if (dst_width & 1) { - dst[0] = (s[0] + s[1] + 1) >> 1; - } -} - -void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; - int x; - for (x = 0; x < dst_width - 1; x += 2) { - dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; - dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; - dst += 2; - s += 4; - t += 4; - } - if (dst_width & 1) { - dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; - } -} - -void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - int x; - for (x = 0; x < dst_width - 1; x += 2) { - dst[0] = src_ptr[2]; - dst[1] = src_ptr[6]; - dst += 2; - src_ptr += 8; - } - if (dst_width & 1) { - dst[0] = src_ptr[2]; - } -} - -void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - intptr_t stride = src_stride; - int x; - for (x = 0; x < dst_width - 1; x += 2) { - dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2] + src_ptr[stride + 3] + - src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + - src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + - src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + - src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + - 8) >> 4; - dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + - src_ptr[stride + 4] + src_ptr[stride + 5] + - src_ptr[stride + 6] + src_ptr[stride + 7] + - src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] + - src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] + - src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] + - src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] + - 8) >> 4; - dst += 2; - src_ptr += 8; - } - if (dst_width & 1) { - dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2] + src_ptr[stride + 3] + - src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + - src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + - src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + - src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + - 8) >> 4; - } -} - -void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - int x; - assert((dst_width % 3 == 0) && (dst_width > 0)); - for (x = 0; x < dst_width; x += 3) { - dst[0] = src_ptr[0]; - dst[1] = src_ptr[1]; - dst[2] = src_ptr[3]; - dst += 3; - src_ptr += 4; - } -} - -// Filter rows 0 and 1 together, 3 : 1 -void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; - int x; - assert((dst_width % 3 == 0) && (dst_width > 0)); - for (x = 0; x < dst_width; x += 3) { - uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; - uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; - uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; - uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; - uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; - uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; - d[0] = (a0 * 3 + b0 + 2) >> 2; - d[1] = (a1 * 3 + b1 + 2) >> 2; - d[2] = (a2 * 3 + b2 + 2) >> 2; - d += 3; - s += 4; - t += 4; - } -} - -// Filter rows 1 and 2 together, 1 : 1 -void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width) { - const uint8* s = src_ptr; - const uint8* t = src_ptr + src_stride; - int x; - assert((dst_width % 3 == 0) && (dst_width > 0)); - for (x = 0; x < dst_width; x += 3) { - uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; - uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; - uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; - uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; - uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; - uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; - d[0] = (a0 + b0 + 1) >> 1; - d[1] = (a1 + b1 + 1) >> 1; - d[2] = (a2 + b2 + 1) >> 1; - d += 3; - s += 4; - t += 4; - } -} - -// Scales a single row of pixels using point sampling. -void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { - int j; - for (j = 0; j < dst_width - 1; j += 2) { - dst_ptr[0] = src_ptr[x >> 16]; - x += dx; - dst_ptr[1] = src_ptr[x >> 16]; - x += dx; - dst_ptr += 2; - } - if (dst_width & 1) { - dst_ptr[0] = src_ptr[x >> 16]; - } -} - -// Scales a single row of pixels up by 2x using point sampling. -void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { - int j; - for (j = 0; j < dst_width - 1; j += 2) { - dst_ptr[1] = dst_ptr[0] = src_ptr[0]; - src_ptr += 1; - dst_ptr += 2; - } - if (dst_width & 1) { - dst_ptr[0] = src_ptr[0]; - } -} - -// (1-f)a + fb can be replaced with a + f(b-a) -#define BLENDER(a, b, f) (uint8)((int)(a) + \ - ((int)(f) * ((int)(b) - (int)(a)) >> 16)) - -void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { - int j; - for (j = 0; j < dst_width - 1; j += 2) { - int xi = x >> 16; - int a = src_ptr[xi]; - int b = src_ptr[xi + 1]; - dst_ptr[0] = BLENDER(a, b, x & 0xffff); - x += dx; - xi = x >> 16; - a = src_ptr[xi]; - b = src_ptr[xi + 1]; - dst_ptr[1] = BLENDER(a, b, x & 0xffff); - x += dx; - dst_ptr += 2; - } - if (dst_width & 1) { - int xi = x >> 16; - int a = src_ptr[xi]; - int b = src_ptr[xi + 1]; - dst_ptr[0] = BLENDER(a, b, x & 0xffff); - } -} - -void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x32, int dx) { - int64 x = (int64)(x32); - int j; - for (j = 0; j < dst_width - 1; j += 2) { - int64 xi = x >> 16; - int a = src_ptr[xi]; - int b = src_ptr[xi + 1]; - dst_ptr[0] = BLENDER(a, b, x & 0xffff); - x += dx; - xi = x >> 16; - a = src_ptr[xi]; - b = src_ptr[xi + 1]; - dst_ptr[1] = BLENDER(a, b, x & 0xffff); - x += dx; - dst_ptr += 2; - } - if (dst_width & 1) { - int64 xi = x >> 16; - int a = src_ptr[xi]; - int b = src_ptr[xi + 1]; - dst_ptr[0] = BLENDER(a, b, x & 0xffff); - } -} -#undef BLENDER - -void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - int x; - assert(dst_width % 3 == 0); - for (x = 0; x < dst_width; x += 3) { - dst[0] = src_ptr[0]; - dst[1] = src_ptr[3]; - dst[2] = src_ptr[6]; - dst += 3; - src_ptr += 8; - } -} - -// 8x3 -> 3x1 -void ScaleRowDown38_3_Box_C(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - intptr_t stride = src_stride; - int i; - assert((dst_width % 3 == 0) && (dst_width > 0)); - for (i = 0; i < dst_width; i += 3) { - dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + - src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * - (65536 / 9) >> 16; - dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + - src_ptr[stride + 3] + src_ptr[stride + 4] + - src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + - src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * - (65536 / 9) >> 16; - dst_ptr[2] = (src_ptr[6] + src_ptr[7] + - src_ptr[stride + 6] + src_ptr[stride + 7] + - src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * - (65536 / 6) >> 16; - src_ptr += 8; - dst_ptr += 3; - } -} - -// 8x2 -> 3x1 -void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - intptr_t stride = src_stride; - int i; - assert((dst_width % 3 == 0) && (dst_width > 0)); - for (i = 0; i < dst_width; i += 3) { - dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + - src_ptr[stride + 0] + src_ptr[stride + 1] + - src_ptr[stride + 2]) * (65536 / 6) >> 16; - dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + - src_ptr[stride + 3] + src_ptr[stride + 4] + - src_ptr[stride + 5]) * (65536 / 6) >> 16; - dst_ptr[2] = (src_ptr[6] + src_ptr[7] + - src_ptr[stride + 6] + src_ptr[stride + 7]) * - (65536 / 4) >> 16; - src_ptr += 8; - dst_ptr += 3; - } -} - -void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, int src_height) { - int x; - assert(src_width > 0); - assert(src_height > 0); - for (x = 0; x < src_width; ++x) { - const uint8* s = src_ptr + x; - unsigned int sum = 0u; - int y; - for (y = 0; y < src_height; ++y) { - sum += s[0]; - s += src_stride; - } - // TODO(fbarchard): Consider limitting height to 256 to avoid overflow. - dst_ptr[x] = sum < 65535u ? sum : 65535u; - } -} - -void ScaleARGBRowDown2_C(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); - - int x; - for (x = 0; x < dst_width - 1; x += 2) { - dst[0] = src[1]; - dst[1] = src[3]; - src += 4; - dst += 2; - } - if (dst_width & 1) { - dst[0] = src[1]; - } -} - -void ScaleARGBRowDown2Linear_C(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - int x; - for (x = 0; x < dst_width; ++x) { - dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1; - dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1; - dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1; - dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1; - src_argb += 8; - dst_argb += 4; - } -} - -void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - int x; - for (x = 0; x < dst_width; ++x) { - dst_argb[0] = (src_argb[0] + src_argb[4] + - src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; - dst_argb[1] = (src_argb[1] + src_argb[5] + - src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; - dst_argb[2] = (src_argb[2] + src_argb[6] + - src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; - dst_argb[3] = (src_argb[3] + src_argb[7] + - src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; - src_argb += 8; - dst_argb += 4; - } -} - -void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); - - int x; - for (x = 0; x < dst_width - 1; x += 2) { - dst[0] = src[0]; - dst[1] = src[src_stepx]; - src += src_stepx * 2; - dst += 2; - } - if (dst_width & 1) { - dst[0] = src[0]; - } -} - -void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width) { - int x; - for (x = 0; x < dst_width; ++x) { - dst_argb[0] = (src_argb[0] + src_argb[4] + - src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; - dst_argb[1] = (src_argb[1] + src_argb[5] + - src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; - dst_argb[2] = (src_argb[2] + src_argb[6] + - src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; - dst_argb[3] = (src_argb[3] + src_argb[7] + - src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; - src_argb += src_stepx * 4; - dst_argb += 4; - } -} - -// Scales a single row of pixels using point sampling. -void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); - int j; - for (j = 0; j < dst_width - 1; j += 2) { - dst[0] = src[x >> 16]; - x += dx; - dst[1] = src[x >> 16]; - x += dx; - dst += 2; - } - if (dst_width & 1) { - dst[0] = src[x >> 16]; - } -} - -void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x32, int dx) { - int64 x = (int64)(x32); - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); - int j; - for (j = 0; j < dst_width - 1; j += 2) { - dst[0] = src[x >> 16]; - x += dx; - dst[1] = src[x >> 16]; - x += dx; - dst += 2; - } - if (dst_width & 1) { - dst[0] = src[x >> 16]; - } -} - -// Scales a single row of pixels up by 2x using point sampling. -void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); - int j; - for (j = 0; j < dst_width - 1; j += 2) { - dst[1] = dst[0] = src[0]; - src += 1; - dst += 2; - } - if (dst_width & 1) { - dst[0] = src[0]; - } -} - -// Mimics SSSE3 blender -#define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7 -#define BLENDERC(a, b, f, s) (uint32)( \ - BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) -#define BLENDER(a, b, f) \ - BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \ - BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0) - -void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); - int j; - for (j = 0; j < dst_width - 1; j += 2) { - int xi = x >> 16; - int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; - dst[0] = BLENDER(a, b, xf); - x += dx; - xi = x >> 16; - xf = (x >> 9) & 0x7f; - a = src[xi]; - b = src[xi + 1]; - dst[1] = BLENDER(a, b, xf); - x += dx; - dst += 2; - } - if (dst_width & 1) { - int xi = x >> 16; - int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; - dst[0] = BLENDER(a, b, xf); - } -} - -void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x32, int dx) { - int64 x = (int64)(x32); - const uint32* src = (const uint32*)(src_argb); - uint32* dst = (uint32*)(dst_argb); - int j; - for (j = 0; j < dst_width - 1; j += 2) { - int64 xi = x >> 16; - int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; - dst[0] = BLENDER(a, b, xf); - x += dx; - xi = x >> 16; - xf = (x >> 9) & 0x7f; - a = src[xi]; - b = src[xi + 1]; - dst[1] = BLENDER(a, b, xf); - x += dx; - dst += 2; - } - if (dst_width & 1) { - int64 xi = x >> 16; - int xf = (x >> 9) & 0x7f; - uint32 a = src[xi]; - uint32 b = src[xi + 1]; - dst[0] = BLENDER(a, b, xf); - } -} -#undef BLENDER1 -#undef BLENDERC -#undef BLENDER - -// Scale plane vertically with bilinear interpolation. -void ScalePlaneVertical(int src_height, - int dst_width, int dst_height, - int src_stride, int dst_stride, - const uint8* src_argb, uint8* dst_argb, - int x, int y, int dy, - int bpp, enum FilterMode filtering) { - // TODO(fbarchard): Allow higher bpp. - int dst_width_bytes = dst_width * bpp; - void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, - ptrdiff_t src_stride, int dst_width, int source_y_fraction) = - InterpolateRow_C; - const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; - int j; - assert(bpp >= 1 && bpp <= 4); - assert(src_height != 0); - assert(dst_width > 0); - assert(dst_height > 0); - src_argb += (x >> 16) * bpp; -#if defined(HAS_INTERPOLATEROW_SSE2) - if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) { - InterpolateRow = InterpolateRow_Any_SSE2; - if (IS_ALIGNED(dst_width_bytes, 16)) { - InterpolateRow = InterpolateRow_Unaligned_SSE2; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - InterpolateRow = InterpolateRow_SSE2; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_SSSE3) - if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) { - InterpolateRow = InterpolateRow_Any_SSSE3; - if (IS_ALIGNED(dst_width_bytes, 16)) { - InterpolateRow = InterpolateRow_Unaligned_SSSE3; - if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && - IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { - InterpolateRow = InterpolateRow_SSSE3; - } - } - } -#endif -#if defined(HAS_INTERPOLATEROW_AVX2) - if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) { - InterpolateRow = InterpolateRow_Any_AVX2; - if (IS_ALIGNED(dst_width_bytes, 32)) { - InterpolateRow = InterpolateRow_AVX2; - } - } -#endif -#if defined(HAS_INTERPOLATEROW_NEON) - if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) { - InterpolateRow = InterpolateRow_Any_NEON; - if (IS_ALIGNED(dst_width_bytes, 16)) { - InterpolateRow = InterpolateRow_NEON; - } - } -#endif -#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) - if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 && - IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && - IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { - InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; - if (IS_ALIGNED(dst_width_bytes, 4)) { - InterpolateRow = InterpolateRow_MIPS_DSPR2; - } - } -#endif - for (j = 0; j < dst_height; ++j) { - int yi; - int yf; - if (y > max_y) { - y = max_y; - } - yi = y >> 16; - yf = filtering ? ((y >> 8) & 255) : 0; - InterpolateRow(dst_argb, src_argb + yi * src_stride, - src_stride, dst_width_bytes, yf); - dst_argb += dst_stride; - y += dy; - } -} - -// Simplify the filtering based on scale factors. -enum FilterMode ScaleFilterReduce(int src_width, int src_height, - int dst_width, int dst_height, - enum FilterMode filtering) { - if (src_width < 0) { - src_width = -src_width; - } - if (src_height < 0) { - src_height = -src_height; - } - if (filtering == kFilterBox) { - // If scaling both axis to 0.5 or larger, switch from Box to Bilinear. - if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) { - filtering = kFilterBilinear; - } - // If scaling to larger, switch from Box to Bilinear. - if (dst_width >= src_width || dst_height >= src_height) { - filtering = kFilterBilinear; - } - } - if (filtering == kFilterBilinear) { - if (src_height == 1) { - filtering = kFilterLinear; - } - // TODO(fbarchard): Detect any odd scale factor and reduce to Linear. - if (dst_height == src_height || dst_height * 3 == src_height) { - filtering = kFilterLinear; - } - // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to - // avoid reading 2 pixels horizontally that causes memory exception. - if (src_width == 1) { - filtering = kFilterNone; - } - } - if (filtering == kFilterLinear) { - if (src_width == 1) { - filtering = kFilterNone; - } - // TODO(fbarchard): Detect any odd scale factor and reduce to None. - if (dst_width == src_width || dst_width * 3 == src_width) { - filtering = kFilterNone; - } - } - return filtering; -} - -// Divide num by div and return as 16.16 fixed point result. -int FixedDiv_C(int num, int div) { - return (int)(((int64)(num) << 16) / div); -} - -// Divide num by div and return as 16.16 fixed point result. -int FixedDiv1_C(int num, int div) { - return (int)((((int64)(num) << 16) - 0x00010001) / - (div - 1)); -} - -#define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) - -// Compute slope values for stepping. -void ScaleSlope(int src_width, int src_height, - int dst_width, int dst_height, - enum FilterMode filtering, - int* x, int* y, int* dx, int* dy) { - assert(x != NULL); - assert(y != NULL); - assert(dx != NULL); - assert(dy != NULL); - assert(src_width != 0); - assert(src_height != 0); - assert(dst_width > 0); - assert(dst_height > 0); - // Check for 1 pixel and avoid FixedDiv overflow. - if (dst_width == 1 && src_width >= 32768) { - dst_width = src_width; - } - if (dst_height == 1 && src_height >= 32768) { - dst_height = src_height; - } - if (filtering == kFilterBox) { - // Scale step for point sampling duplicates all pixels equally. - *dx = FixedDiv(Abs(src_width), dst_width); - *dy = FixedDiv(src_height, dst_height); - *x = 0; - *y = 0; - } else if (filtering == kFilterBilinear) { - // Scale step for bilinear sampling renders last pixel once for upsample. - if (dst_width <= Abs(src_width)) { - *dx = FixedDiv(Abs(src_width), dst_width); - *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. - } else if (dst_width > 1) { - *dx = FixedDiv1(Abs(src_width), dst_width); - *x = 0; - } - if (dst_height <= src_height) { - *dy = FixedDiv(src_height, dst_height); - *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter. - } else if (dst_height > 1) { - *dy = FixedDiv1(src_height, dst_height); - *y = 0; - } - } else if (filtering == kFilterLinear) { - // Scale step for bilinear sampling renders last pixel once for upsample. - if (dst_width <= Abs(src_width)) { - *dx = FixedDiv(Abs(src_width), dst_width); - *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. - } else if (dst_width > 1) { - *dx = FixedDiv1(Abs(src_width), dst_width); - *x = 0; - } - *dy = FixedDiv(src_height, dst_height); - *y = *dy >> 1; - } else { - // Scale step for point sampling duplicates all pixels equally. - *dx = FixedDiv(Abs(src_width), dst_width); - *dy = FixedDiv(src_height, dst_height); - *x = CENTERSTART(*dx, 0); - *y = CENTERSTART(*dy, 0); - } - // Negative src_width means horizontally mirror. - if (src_width < 0) { - *x += (dst_width - 1) * *dx; - *dx = -*dx; - // src_width = -src_width; // Caller must do this. - } -} -#undef CENTERSTART - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_mips.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_mips.cc deleted file mode 100755 index 4572f4504e..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_mips.cc +++ /dev/null @@ -1,653 +0,0 @@ -/* - * Copyright 2012 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/basic_types.h" -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for GCC MIPS DSPR2 -#if !defined(LIBYUV_DISABLE_MIPS) && \ - defined(__mips_dsp) && (__mips_dsp_rev >= 2) - -void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 4 \n" // iterations -> by 16 - "beqz $t9, 2f \n" - " nop \n" - - ".p2align 2 \n" - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| - // TODO(fbarchard): Use odd pixels instead of even. - "precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0| - "precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8| - "precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16| - "precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu $t9, $t9, -1 \n" - "sw $t8, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $t1, 8(%[dst]) \n" - "sw $t2, 12(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 16 \n" - - "2: \n" - "andi $t9, %[dst_width], 0xf \n" // residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lbu $t0, 0(%[src_ptr]) \n" - "addiu %[src_ptr], %[src_ptr], 2 \n" - "addiu $t9, $t9, -1 \n" - "sb $t0, 0(%[dst]) \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst) - : [dst_width] "r" (dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9" - ); -} - -void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - const uint8* t = src_ptr + src_stride; - - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 3 \n" // iterations -> step 8 - "bltz $t9, 2f \n" - " nop \n" - - ".p2align 2 \n" - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 0(%[t]) \n" // |19|18|17|16| - "lw $t5, 4(%[t]) \n" // |23|22|21|20| - "lw $t6, 8(%[t]) \n" // |27|26|25|24| - "lw $t7, 12(%[t]) \n" // |31|30|29|28| - "addiu $t9, $t9, -1 \n" - "srl $t8, $t0, 16 \n" // |X|X|3|2| - "ins $t0, $t4, 16, 16 \n" // |17|16|1|0| - "ins $t4, $t8, 0, 16 \n" // |19|18|3|2| - "raddu.w.qb $t0, $t0 \n" // |17+16+1+0| - "raddu.w.qb $t4, $t4 \n" // |19+18+3+2| - "shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2 - "shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2 - "srl $t8, $t1, 16 \n" // |X|X|7|6| - "ins $t1, $t5, 16, 16 \n" // |21|20|5|4| - "ins $t5, $t8, 0, 16 \n" // |22|23|7|6| - "raddu.w.qb $t1, $t1 \n" // |21+20+5+4| - "raddu.w.qb $t5, $t5 \n" // |23+22+7+6| - "shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2 - "shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2 - "srl $t8, $t2, 16 \n" // |X|X|11|10| - "ins $t2, $t6, 16, 16 \n" // |25|24|9|8| - "ins $t6, $t8, 0, 16 \n" // |27|26|11|10| - "raddu.w.qb $t2, $t2 \n" // |25+24+9+8| - "raddu.w.qb $t6, $t6 \n" // |27+26+11+10| - "shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2 - "shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2 - "srl $t8, $t3, 16 \n" // |X|X|15|14| - "ins $t3, $t7, 16, 16 \n" // |29|28|13|12| - "ins $t7, $t8, 0, 16 \n" // |31|30|15|14| - "raddu.w.qb $t3, $t3 \n" // |29+28+13+12| - "raddu.w.qb $t7, $t7 \n" // |31+30+15+14| - "shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2 - "shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2 - "addiu %[src_ptr], %[src_ptr], 16 \n" - "addiu %[t], %[t], 16 \n" - "sb $t0, 0(%[dst]) \n" - "sb $t4, 1(%[dst]) \n" - "sb $t1, 2(%[dst]) \n" - "sb $t5, 3(%[dst]) \n" - "sb $t2, 4(%[dst]) \n" - "sb $t6, 5(%[dst]) \n" - "sb $t3, 6(%[dst]) \n" - "sb $t7, 7(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 8 \n" - - "2: \n" - "andi $t9, %[dst_width], 0x7 \n" // x = residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lwr $t1, 0(%[src_ptr]) \n" - "lwl $t1, 3(%[src_ptr]) \n" - "lwr $t2, 0(%[t]) \n" - "lwl $t2, 3(%[t]) \n" - "srl $t8, $t1, 16 \n" - "ins $t1, $t2, 16, 16 \n" - "ins $t2, $t8, 0, 16 \n" - "raddu.w.qb $t1, $t1 \n" - "raddu.w.qb $t2, $t2 \n" - "shra_r.w $t1, $t1, 2 \n" - "shra_r.w $t2, $t2, 2 \n" - "sb $t1, 0(%[dst]) \n" - "sb $t2, 1(%[dst]) \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "addiu $t9, $t9, -2 \n" - "addiu %[t], %[t], 4 \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 2 \n" - - "3: \n" - ".set pop \n" - - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst), [t] "+r" (t) - : [dst_width] "r" (dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9" - ); -} - -void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 3 \n" - "beqz $t9, 2f \n" - " nop \n" - - ".p2align 2 \n" - "1: \n" - "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28| - "precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0| - "precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8| - "precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16| - "precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24| - "precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0| - "precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu $t9, $t9, -1 \n" - "sw $t1, 0(%[dst]) \n" - "sw $t5, 4(%[dst]) \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 8 \n" - - "2: \n" - "andi $t9, %[dst_width], 7 \n" // residue - "beqz $t9, 3f \n" - " nop \n" - - "21: \n" - "lbu $t1, 0(%[src_ptr]) \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "addiu $t9, $t9, -1 \n" - "sb $t1, 0(%[dst]) \n" - "bgtz $t9, 21b \n" - " addiu %[dst], %[dst], 1 \n" - - "3: \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst) - : [dst_width] "r" (dst_width) - : "t1", "t2", "t3", "t4", "t5", - "t6", "t7", "t8", "t9" - ); -} - -void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - intptr_t stride = src_stride; - const uint8* s1 = src_ptr + stride; - const uint8* s2 = s1 + stride; - const uint8* s3 = s2 + stride; - - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - "srl $t9, %[dst_width], 1 \n" - "andi $t8, %[dst_width], 1 \n" - - ".p2align 2 \n" - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 0(%[s1]) \n" // |7|6|5|4| - "lw $t2, 0(%[s2]) \n" // |11|10|9|8| - "lw $t3, 0(%[s3]) \n" // |15|14|13|12| - "lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 4(%[s1]) \n" // |23|22|21|20| - "lw $t6, 4(%[s2]) \n" // |27|26|25|24| - "lw $t7, 4(%[s3]) \n" // |31|30|29|28| - "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0| - "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4| - "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8| - "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12| - "raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16| - "raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20| - "raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24| - "raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28| - "add $t0, $t0, $t1 \n" - "add $t1, $t2, $t3 \n" - "add $t0, $t0, $t1 \n" - "add $t4, $t4, $t5 \n" - "add $t6, $t6, $t7 \n" - "add $t4, $t4, $t6 \n" - "shra_r.w $t0, $t0, 4 \n" - "shra_r.w $t4, $t4, 4 \n" - "sb $t0, 0(%[dst]) \n" - "sb $t4, 1(%[dst]) \n" - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[s1], %[s1], 8 \n" - "addiu %[s2], %[s2], 8 \n" - "addiu %[s3], %[s3], 8 \n" - "addiu $t9, $t9, -1 \n" - "bgtz $t9, 1b \n" - " addiu %[dst], %[dst], 2 \n" - "beqz $t8, 2f \n" - " nop \n" - - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 0(%[s1]) \n" // |7|6|5|4| - "lw $t2, 0(%[s2]) \n" // |11|10|9|8| - "lw $t3, 0(%[s3]) \n" // |15|14|13|12| - "raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0| - "raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4| - "raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8| - "raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12| - "add $t0, $t0, $t1 \n" - "add $t1, $t2, $t3 \n" - "add $t0, $t0, $t1 \n" - "shra_r.w $t0, $t0, 4 \n" - "sb $t0, 0(%[dst]) \n" - - "2: \n" - ".set pop \n" - - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst), - [s1] "+r" (s1), - [s2] "+r" (s2), - [s3] "+r" (s3) - : [dst_width] "r" (dst_width) - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6","t7", "t8", "t9" - ); -} - -void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - ".p2align 2 \n" - "1: \n" - "lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28| - "precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13| - "precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30| - "addiu %[dst_width], %[dst_width], -24 \n" - "ins $t1, $t1, 8, 16 \n" // |3|1|0|X| - "ins $t4, $t0, 8, 16 \n" // |X|15|13|12| - "ins $t5, $t5, 8, 16 \n" // |19|17|16|X| - "ins $t8, $t9, 8, 16 \n" // |X|31|29|28| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5| - "packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21| - "prepend $t1, $t2, 8 \n" // |4|3|1|0| - "prepend $t3, $t4, 24 \n" // |15|13|12|11| - "prepend $t5, $t6, 8 \n" // |20|19|17|16| - "prepend $t7, $t8, 24 \n" // |31|29|28|27| - "sw $t1, 0(%[dst]) \n" - "sw $t0, 4(%[dst]) \n" - "sw $t3, 8(%[dst]) \n" - "sw $t5, 12(%[dst]) \n" - "sw $t9, 16(%[dst]) \n" - "sw $t7, 20(%[dst]) \n" - "bnez %[dst_width], 1b \n" - " addiu %[dst], %[dst], 24 \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst), - [dst_width] "+r" (dst_width) - : - : "t0", "t1", "t2", "t3", "t4", "t5", - "t6","t7", "t8", "t9" - ); -} - -void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "repl.ph $t3, 3 \n" // 0x00030003 - - ".p2align 2 \n" - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| - "rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1| - "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| - "muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3| - "muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3| - "andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1| - "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t1, $t1 \n" - "shra_r.w $t0, $t0, 1 \n" - "shra_r.w $t1, $t1, 1 \n" - "preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1| - "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1| - "rotr $t2, $t2, 16 \n" // |0|S1|0|S2| - "rotr $t6, $t6, 16 \n" // |0|T1|0|T2| - "addu.ph $t2, $t2, $t4 \n" - "addu.ph $t6, $t6, $t5 \n" - "sll $t5, $t0, 1 \n" - "add $t0, $t5, $t0 \n" - "shra_r.ph $t2, $t2, 2 \n" - "shra_r.ph $t6, $t6, 2 \n" - "shll.ph $t4, $t2, 1 \n" - "addq.ph $t4, $t4, $t2 \n" - "addu $t0, $t0, $t1 \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "shra_r.w $t0, $t0, 2 \n" - "addu.ph $t6, $t6, $t4 \n" - "shra_r.ph $t6, $t6, 2 \n" - "srl $t1, $t6, 16 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "sb $t1, 0(%[d]) \n" - "sb $t0, 1(%[d]) \n" - "sb $t6, 2(%[d]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[d], %[d], 3 \n" - "3: \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [src_stride] "+r" (src_stride), - [d] "+r" (d), - [dst_width] "+r" (dst_width) - : - : "t0", "t1", "t2", "t3", - "t4", "t5", "t6" - ); -} - -void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* d, int dst_width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - "repl.ph $t2, 3 \n" // 0x00030003 - - ".p2align 2 \n" - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0| - "rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1| - "rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1| - "muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3| - "muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3| - "andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1| - "andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t1, $t1 \n" - "shra_r.w $t0, $t0, 1 \n" - "shra_r.w $t1, $t1, 1 \n" - "preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1| - "preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1| - "rotr $t4, $t4, 16 \n" // |0|S1|0|S2| - "rotr $t6, $t6, 16 \n" // |0|T1|0|T2| - "addu.ph $t4, $t4, $t3 \n" - "addu.ph $t6, $t6, $t5 \n" - "shra_r.ph $t6, $t6, 2 \n" - "shra_r.ph $t4, $t4, 2 \n" - "addu.ph $t6, $t6, $t4 \n" - "addiu %[src_ptr], %[src_ptr], 4 \n" - "shra_r.ph $t6, $t6, 1 \n" - "addu $t0, $t0, $t1 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "shra_r.w $t0, $t0, 1 \n" - "srl $t1, $t6, 16 \n" - "sb $t1, 0(%[d]) \n" - "sb $t0, 1(%[d]) \n" - "sb $t6, 2(%[d]) \n" - "bgtz %[dst_width], 1b \n" - " addiu %[d], %[d], 3 \n" - "3: \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [src_stride] "+r" (src_stride), - [d] "+r" (d), - [dst_width] "+r" (dst_width) - : - : "t0", "t1", "t2", "t3", - "t4", "t5", "t6" - ); -} - -void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - ".p2align 2 \n" - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0| - "lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4| - "lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8| - "lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12| - "lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16| - "lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20| - "lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24| - "lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28| - "wsbh $t0, $t0 \n" // |2|3|0|1| - "wsbh $t6, $t6 \n" // |26|27|24|25| - "srl $t0, $t0, 8 \n" // |X|2|3|0| - "srl $t3, $t3, 16 \n" // |X|X|15|14| - "srl $t5, $t5, 16 \n" // |X|X|23|22| - "srl $t7, $t7, 16 \n" // |X|X|31|30| - "ins $t1, $t2, 24, 8 \n" // |8|6|5|4| - "ins $t6, $t5, 0, 8 \n" // |26|27|24|22| - "ins $t1, $t0, 0, 16 \n" // |8|6|3|0| - "ins $t6, $t7, 24, 8 \n" // |30|27|24|22| - "prepend $t2, $t3, 24 \n" // |X|15|14|11| - "ins $t4, $t4, 16, 8 \n" // |19|16|17|X| - "ins $t4, $t2, 0, 16 \n" // |19|16|14|11| - "addiu %[src_ptr], %[src_ptr], 32 \n" - "addiu %[dst_width], %[dst_width], -12 \n" - "addiu $t8,%[dst_width], -12 \n" - "sw $t1, 0(%[dst]) \n" - "sw $t4, 4(%[dst]) \n" - "sw $t6, 8(%[dst]) \n" - "bgez $t8, 1b \n" - " addiu %[dst], %[dst], 12 \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst] "+r" (dst), - [dst_width] "+r" (dst_width) - : - : "t0", "t1", "t2", "t3", "t4", - "t5", "t6", "t7", "t8" - ); -} - -void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - intptr_t stride = src_stride; - const uint8* t = src_ptr + stride; - const int c = 0x2AAA; - - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - ".p2align 2 \n" - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4| - "lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0| - "lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4| - "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6| - "packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6| - "packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4| - "raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6 - "raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4 - "precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1| - "precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3| - "srl $t4, $t4, 2 \n" // t4 / 4 - "srl $t6, $t6, 16 \n" // |0|0|S3|T3| - "raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3 - "addu $t6, $t5, $t6 \n" - "mul $t6, $t6, %[c] \n" // t6 * 0x2AAA - "sll $t0, $t0, 8 \n" // |S2|S1|S0|0| - "sll $t2, $t2, 8 \n" // |T2|T1|T0|0| - "raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0 - "raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0 - "addu $t0, $t0, $t2 \n" - "mul $t0, $t0, %[c] \n" // t0 * 0x2AAA - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[t], %[t], 8 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "addiu %[dst_ptr], %[dst_ptr], 3 \n" - "srl $t6, $t6, 16 \n" - "srl $t0, $t0, 16 \n" - "sb $t4, -1(%[dst_ptr]) \n" - "sb $t6, -2(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " sb $t0, -3(%[dst_ptr]) \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst_ptr] "+r" (dst_ptr), - [t] "+r" (t), - [dst_width] "+r" (dst_width) - : [c] "r" (c) - : "t0", "t1", "t2", "t3", "t4", "t5", "t6" - ); -} - -void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - intptr_t stride = src_stride; - const uint8* s1 = src_ptr + stride; - stride += stride; - const uint8* s2 = src_ptr + stride; - const int c1 = 0x1C71; - const int c2 = 0x2AAA; - - __asm__ __volatile__ ( - ".set push \n" - ".set noreorder \n" - - ".p2align 2 \n" - "1: \n" - "lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0| - "lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4| - "lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0| - "lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4| - "lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0| - "lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4| - "rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6| - "packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6| - "raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6 - "packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4| - "raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4 - "sll $t8, $t5, 16 \n" // |R5|R4|0|0| - "raddu.w.qb $t8, $t8 \n" // R5+R4 - "addu $t7, $t7, $t8 \n" - "srl $t8, $t5, 16 \n" // |0|0|R7|R6| - "raddu.w.qb $t8, $t8 \n" // R7 + R6 - "addu $t6, $t6, $t8 \n" - "mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA - "precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1| - "precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1| - "srl $t8, $t8, 8 \n" // |0|S3|T3|R3| - "raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3 - "addu $t7, $t7, $t8 \n" - "mul $t7, $t7, %[c1] \n" // t7 * 0x1C71 - "sll $t0, $t0, 8 \n" // |S2|S1|S0|0| - "sll $t2, $t2, 8 \n" // |T2|T1|T0|0| - "sll $t4, $t4, 8 \n" // |R2|R1|R0|0| - "raddu.w.qb $t0, $t0 \n" - "raddu.w.qb $t2, $t2 \n" - "raddu.w.qb $t4, $t4 \n" - "addu $t0, $t0, $t2 \n" - "addu $t0, $t0, $t4 \n" - "mul $t0, $t0, %[c1] \n" // t0 * 0x1C71 - "addiu %[src_ptr], %[src_ptr], 8 \n" - "addiu %[s1], %[s1], 8 \n" - "addiu %[s2], %[s2], 8 \n" - "addiu %[dst_width], %[dst_width], -3 \n" - "addiu %[dst_ptr], %[dst_ptr], 3 \n" - "srl $t6, $t6, 16 \n" - "srl $t7, $t7, 16 \n" - "srl $t0, $t0, 16 \n" - "sb $t6, -1(%[dst_ptr]) \n" - "sb $t7, -2(%[dst_ptr]) \n" - "bgtz %[dst_width], 1b \n" - " sb $t0, -3(%[dst_ptr]) \n" - ".set pop \n" - : [src_ptr] "+r" (src_ptr), - [dst_ptr] "+r" (dst_ptr), - [s1] "+r" (s1), - [s2] "+r" (s2), - [dst_width] "+r" (dst_width) - : [c1] "r" (c1), [c2] "r" (c2) - : "t0", "t1", "t2", "t3", "t4", - "t5", "t6", "t7", "t8" - ); -} - -#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_neon.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_neon.cc deleted file mode 100755 index a9df93c055..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_neon.cc +++ /dev/null @@ -1,699 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for GCC Neon. -#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) - -// NEON downscalers with interpolation. -// Provided by Fritz Koenig - -// Read 32x1 throw away even pixels, and write 16x1. -void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( -#ifdef _ANDROID - ".fpu neon\n" -#endif - ".p2align 2 \n" - "1: \n" - // load even pixels into q0, odd into q1 - "vld2.8 {q0, q1}, [%0]! \n" - "subs %2, %2, #16 \n" // 16 processed per loop - "vst1.8 {q1}, [%1]! \n" // store odd pixels - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst), // %1 - "+r"(dst_width) // %2 - : - : "q0", "q1" // Clobber List - ); -} - -// Read 32x2 average down and write 16x1. -void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( -#ifdef _ANDROID - ".fpu neon\n" -#endif - // change the stride to row 2 pointer - "add %1, %0 \n" - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc - "vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc - "subs %3, %3, #16 \n" // 16 processed per loop - "vpaddl.u8 q0, q0 \n" // row 1 add adjacent - "vpaddl.u8 q1, q1 \n" - "vpadal.u8 q0, q2 \n" // row 2 add adjacent + row1 - "vpadal.u8 q1, q3 \n" - "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack - "vrshrn.u16 d1, q1, #2 \n" - "vst1.8 {q0}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(src_stride), // %1 - "+r"(dst), // %2 - "+r"(dst_width) // %3 - : - : "q0", "q1", "q2", "q3" // Clobber List - ); -} - -void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( -#ifdef _ANDROID - ".fpu neon\n" -#endif - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 - "subs %2, %2, #8 \n" // 8 processed per loop - "vst1.8 {d2}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "q0", "q1", "memory", "cc" - ); -} - -void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( -#ifdef _ANDROID - ".fpu neon\n" -#endif - "add r4, %0, %3 \n" - "add r5, r4, %3 \n" - "add %3, r5, %3 \n" - ".p2align 2 \n" - "1: \n" - "vld1.8 {q0}, [%0]! \n" // load up 16x4 - "vld1.8 {q1}, [r4]! \n" - "vld1.8 {q2}, [r5]! \n" - "vld1.8 {q3}, [%3]! \n" - "subs %2, %2, #4 \n" - "vpaddl.u8 q0, q0 \n" - "vpadal.u8 q0, q1 \n" - "vpadal.u8 q0, q2 \n" - "vpadal.u8 q0, q3 \n" - "vpaddl.u16 q0, q0 \n" - "vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding - "vmovn.u16 d0, q0 \n" - "vst1.32 {d0[0]}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"(src_stride) // %3 - : "r4", "r5", "q0", "q1", "q2", "q3", "memory", "cc" - ); -} - -// Down scale from 4 to 3 pixels. Use the neon multilane read/write -// to load up the every 4th pixel into a 4 different registers. -// Point samples 32 pixels to 24 pixels. -void ScaleRowDown34_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( -#ifdef _ANDROID - ".fpu neon\n" -#endif - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 - "subs %2, %2, #24 \n" - "vmov d2, d3 \n" // order d0, d1, d2 - "vst3.8 {d0, d1, d2}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "d0", "d1", "d2", "d3", "memory", "cc" - ); -} - -void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vmov.u8 d24, #3 \n" - "add %3, %0 \n" - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 - "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 - "subs %2, %2, #24 \n" - - // filter src line 0 with src line 1 - // expand chars to shorts to allow for room - // when adding lines together - "vmovl.u8 q8, d4 \n" - "vmovl.u8 q9, d5 \n" - "vmovl.u8 q10, d6 \n" - "vmovl.u8 q11, d7 \n" - - // 3 * line_0 + line_1 - "vmlal.u8 q8, d0, d24 \n" - "vmlal.u8 q9, d1, d24 \n" - "vmlal.u8 q10, d2, d24 \n" - "vmlal.u8 q11, d3, d24 \n" - - // (3 * line_0 + line_1) >> 2 - "vqrshrn.u16 d0, q8, #2 \n" - "vqrshrn.u16 d1, q9, #2 \n" - "vqrshrn.u16 d2, q10, #2 \n" - "vqrshrn.u16 d3, q11, #2 \n" - - // a0 = (src[0] * 3 + s[1] * 1) >> 2 - "vmovl.u8 q8, d1 \n" - "vmlal.u8 q8, d0, d24 \n" - "vqrshrn.u16 d0, q8, #2 \n" - - // a1 = (src[1] * 1 + s[2] * 1) >> 1 - "vrhadd.u8 d1, d1, d2 \n" - - // a2 = (src[2] * 1 + s[3] * 3) >> 2 - "vmovl.u8 q8, d2 \n" - "vmlal.u8 q8, d3, d24 \n" - "vqrshrn.u16 d2, q8, #2 \n" - - "vst3.8 {d0, d1, d2}, [%1]! \n" - - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_stride) // %3 - : - : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc" - ); -} - -void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vmov.u8 d24, #3 \n" - "add %3, %0 \n" - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 - "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 - "subs %2, %2, #24 \n" - // average src line 0 with src line 1 - "vrhadd.u8 q0, q0, q2 \n" - "vrhadd.u8 q1, q1, q3 \n" - - // a0 = (src[0] * 3 + s[1] * 1) >> 2 - "vmovl.u8 q3, d1 \n" - "vmlal.u8 q3, d0, d24 \n" - "vqrshrn.u16 d0, q3, #2 \n" - - // a1 = (src[1] * 1 + s[2] * 1) >> 1 - "vrhadd.u8 d1, d1, d2 \n" - - // a2 = (src[2] * 1 + s[3] * 3) >> 2 - "vmovl.u8 q3, d2 \n" - "vmlal.u8 q3, d3, d24 \n" - "vqrshrn.u16 d2, q3, #2 \n" - - "vst3.8 {d0, d1, d2}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_stride) // %3 - : - : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc" - ); -} - -#define HAS_SCALEROWDOWN38_NEON -static uvec8 kShuf38 = - { 0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0 }; -static uvec8 kShuf38_2 = - { 0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0 }; -static vec16 kMult38_Div6 = - { 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, - 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12 }; -static vec16 kMult38_Div9 = - { 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, - 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 }; - -// 32 -> 12 -void ScaleRowDown38_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vld1.8 {q3}, [%3] \n" - ".p2align 2 \n" - "1: \n" - "vld1.8 {d0, d1, d2, d3}, [%0]! \n" - "subs %2, %2, #12 \n" - "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n" - "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n" - "vst1.8 {d4}, [%1]! \n" - "vst1.32 {d5[0]}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"(&kShuf38) // %3 - : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc" - ); -} - -// 32x3 -> 12x1 -void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vld1.16 {q13}, [%4] \n" - "vld1.8 {q14}, [%5] \n" - "vld1.8 {q15}, [%6] \n" - "add r4, %0, %3, lsl #1 \n" - "add %3, %0 \n" - ".p2align 2 \n" - "1: \n" - - // d0 = 00 40 01 41 02 42 03 43 - // d1 = 10 50 11 51 12 52 13 53 - // d2 = 20 60 21 61 22 62 23 63 - // d3 = 30 70 31 71 32 72 33 73 - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" - "vld4.8 {d4, d5, d6, d7}, [%3]! \n" - "vld4.8 {d16, d17, d18, d19}, [r4]! \n" - "subs %2, %2, #12 \n" - - // Shuffle the input data around to get align the data - // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 - // d0 = 00 10 01 11 02 12 03 13 - // d1 = 40 50 41 51 42 52 43 53 - "vtrn.u8 d0, d1 \n" - "vtrn.u8 d4, d5 \n" - "vtrn.u8 d16, d17 \n" - - // d2 = 20 30 21 31 22 32 23 33 - // d3 = 60 70 61 71 62 72 63 73 - "vtrn.u8 d2, d3 \n" - "vtrn.u8 d6, d7 \n" - "vtrn.u8 d18, d19 \n" - - // d0 = 00+10 01+11 02+12 03+13 - // d2 = 40+50 41+51 42+52 43+53 - "vpaddl.u8 q0, q0 \n" - "vpaddl.u8 q2, q2 \n" - "vpaddl.u8 q8, q8 \n" - - // d3 = 60+70 61+71 62+72 63+73 - "vpaddl.u8 d3, d3 \n" - "vpaddl.u8 d7, d7 \n" - "vpaddl.u8 d19, d19 \n" - - // combine source lines - "vadd.u16 q0, q2 \n" - "vadd.u16 q0, q8 \n" - "vadd.u16 d4, d3, d7 \n" - "vadd.u16 d4, d19 \n" - - // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0] - // + s[6 + st * 1] + s[7 + st * 1] - // + s[6 + st * 2] + s[7 + st * 2]) / 6 - "vqrdmulh.s16 q2, q2, q13 \n" - "vmovn.u16 d4, q2 \n" - - // Shuffle 2,3 reg around so that 2 can be added to the - // 0,1 reg and 3 can be added to the 4,5 reg. This - // requires expanding from u8 to u16 as the 0,1 and 4,5 - // registers are already expanded. Then do transposes - // to get aligned. - // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 - "vmovl.u8 q1, d2 \n" - "vmovl.u8 q3, d6 \n" - "vmovl.u8 q9, d18 \n" - - // combine source lines - "vadd.u16 q1, q3 \n" - "vadd.u16 q1, q9 \n" - - // d4 = xx 20 xx 30 xx 22 xx 32 - // d5 = xx 21 xx 31 xx 23 xx 33 - "vtrn.u32 d2, d3 \n" - - // d4 = xx 20 xx 21 xx 22 xx 23 - // d5 = xx 30 xx 31 xx 32 xx 33 - "vtrn.u16 d2, d3 \n" - - // 0+1+2, 3+4+5 - "vadd.u16 q0, q1 \n" - - // Need to divide, but can't downshift as the the value - // isn't a power of 2. So multiply by 65536 / n - // and take the upper 16 bits. - "vqrdmulh.s16 q0, q0, q15 \n" - - // Align for table lookup, vtbl requires registers to - // be adjacent - "vmov.u8 d2, d4 \n" - - "vtbl.u8 d3, {d0, d1, d2}, d28 \n" - "vtbl.u8 d4, {d0, d1, d2}, d29 \n" - - "vst1.8 {d3}, [%1]! \n" - "vst1.32 {d4[0]}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_stride) // %3 - : "r"(&kMult38_Div6), // %4 - "r"(&kShuf38_2), // %5 - "r"(&kMult38_Div9) // %6 - : "r4", "q0", "q1", "q2", "q3", "q8", "q9", - "q13", "q14", "q15", "memory", "cc" - ); -} - -// 32x2 -> 12x1 -void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "vld1.16 {q13}, [%4] \n" - "vld1.8 {q14}, [%5] \n" - "add %3, %0 \n" - ".p2align 2 \n" - "1: \n" - - // d0 = 00 40 01 41 02 42 03 43 - // d1 = 10 50 11 51 12 52 13 53 - // d2 = 20 60 21 61 22 62 23 63 - // d3 = 30 70 31 71 32 72 33 73 - "vld4.8 {d0, d1, d2, d3}, [%0]! \n" - "vld4.8 {d4, d5, d6, d7}, [%3]! \n" - "subs %2, %2, #12 \n" - - // Shuffle the input data around to get align the data - // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 - // d0 = 00 10 01 11 02 12 03 13 - // d1 = 40 50 41 51 42 52 43 53 - "vtrn.u8 d0, d1 \n" - "vtrn.u8 d4, d5 \n" - - // d2 = 20 30 21 31 22 32 23 33 - // d3 = 60 70 61 71 62 72 63 73 - "vtrn.u8 d2, d3 \n" - "vtrn.u8 d6, d7 \n" - - // d0 = 00+10 01+11 02+12 03+13 - // d2 = 40+50 41+51 42+52 43+53 - "vpaddl.u8 q0, q0 \n" - "vpaddl.u8 q2, q2 \n" - - // d3 = 60+70 61+71 62+72 63+73 - "vpaddl.u8 d3, d3 \n" - "vpaddl.u8 d7, d7 \n" - - // combine source lines - "vadd.u16 q0, q2 \n" - "vadd.u16 d4, d3, d7 \n" - - // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4 - "vqrshrn.u16 d4, q2, #2 \n" - - // Shuffle 2,3 reg around so that 2 can be added to the - // 0,1 reg and 3 can be added to the 4,5 reg. This - // requires expanding from u8 to u16 as the 0,1 and 4,5 - // registers are already expanded. Then do transposes - // to get aligned. - // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 - "vmovl.u8 q1, d2 \n" - "vmovl.u8 q3, d6 \n" - - // combine source lines - "vadd.u16 q1, q3 \n" - - // d4 = xx 20 xx 30 xx 22 xx 32 - // d5 = xx 21 xx 31 xx 23 xx 33 - "vtrn.u32 d2, d3 \n" - - // d4 = xx 20 xx 21 xx 22 xx 23 - // d5 = xx 30 xx 31 xx 32 xx 33 - "vtrn.u16 d2, d3 \n" - - // 0+1+2, 3+4+5 - "vadd.u16 q0, q1 \n" - - // Need to divide, but can't downshift as the the value - // isn't a power of 2. So multiply by 65536 / n - // and take the upper 16 bits. - "vqrdmulh.s16 q0, q0, q13 \n" - - // Align for table lookup, vtbl requires registers to - // be adjacent - "vmov.u8 d2, d4 \n" - - "vtbl.u8 d3, {d0, d1, d2}, d28 \n" - "vtbl.u8 d4, {d0, d1, d2}, d29 \n" - - "vst1.8 {d3}, [%1]! \n" - "vst1.32 {d4[0]}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(src_stride) // %3 - : "r"(&kMult38_Div6), // %4 - "r"(&kShuf38_2) // %5 - : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc" - ); -} - -// 16x2 -> 16x1 -void ScaleFilterRows_NEON(uint8* dst_ptr, - const uint8* src_ptr, ptrdiff_t src_stride, - int dst_width, int source_y_fraction) { - asm volatile ( - "cmp %4, #0 \n" - "beq 100f \n" - "add %2, %1 \n" - "cmp %4, #64 \n" - "beq 75f \n" - "cmp %4, #128 \n" - "beq 50f \n" - "cmp %4, #192 \n" - "beq 25f \n" - - "vdup.8 d5, %4 \n" - "rsb %4, #256 \n" - "vdup.8 d4, %4 \n" - // General purpose row blend. - "1: \n" - "vld1.8 {q0}, [%1]! \n" - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vmull.u8 q13, d0, d4 \n" - "vmull.u8 q14, d1, d4 \n" - "vmlal.u8 q13, d2, d5 \n" - "vmlal.u8 q14, d3, d5 \n" - "vrshrn.u16 d0, q13, #8 \n" - "vrshrn.u16 d1, q14, #8 \n" - "vst1.8 {q0}, [%0]! \n" - "bgt 1b \n" - "b 99f \n" - - // Blend 25 / 75. - "25: \n" - "vld1.8 {q0}, [%1]! \n" - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vrhadd.u8 q0, q1 \n" - "vrhadd.u8 q0, q1 \n" - "vst1.8 {q0}, [%0]! \n" - "bgt 25b \n" - "b 99f \n" - - // Blend 50 / 50. - "50: \n" - "vld1.8 {q0}, [%1]! \n" - "vld1.8 {q1}, [%2]! \n" - "subs %3, %3, #16 \n" - "vrhadd.u8 q0, q1 \n" - "vst1.8 {q0}, [%0]! \n" - "bgt 50b \n" - "b 99f \n" - - // Blend 75 / 25. - "75: \n" - "vld1.8 {q1}, [%1]! \n" - "vld1.8 {q0}, [%2]! \n" - "subs %3, %3, #16 \n" - "vrhadd.u8 q0, q1 \n" - "vrhadd.u8 q0, q1 \n" - "vst1.8 {q0}, [%0]! \n" - "bgt 75b \n" - "b 99f \n" - - // Blend 100 / 0 - Copy row unchanged. - "100: \n" - "vld1.8 {q0}, [%1]! \n" - "subs %3, %3, #16 \n" - "vst1.8 {q0}, [%0]! \n" - "bgt 100b \n" - - "99: \n" - "vst1.8 {d1[7]}, [%0] \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(src_stride), // %2 - "+r"(dst_width), // %3 - "+r"(source_y_fraction) // %4 - : - : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc" - ); -} - -void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - ".p2align 2 \n" - "1: \n" - // load even pixels into q0, odd into q1 - "vld2.32 {q0, q1}, [%0]! \n" - "vld2.32 {q2, q3}, [%0]! \n" - "subs %2, %2, #8 \n" // 8 processed per loop - "vst1.8 {q1}, [%1]! \n" // store odd pixels - "vst1.8 {q3}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List - ); -} - -void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst, int dst_width) { - asm volatile ( - // change the stride to row 2 pointer - "add %1, %1, %0 \n" - ".p2align 2 \n" - "1: \n" - "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. - "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. - "subs %3, %3, #8 \n" // 8 processed per loop. - "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. - "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. - "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. - "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. - "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels. - "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels. - "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts. - "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts. - "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts. - "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts. - "vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack - "vrshrn.u16 d1, q1, #2 \n" - "vrshrn.u16 d2, q2, #2 \n" - "vrshrn.u16 d3, q3, #2 \n" - "vst4.8 {d0, d1, d2, d3}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_ptr), // %0 - "+r"(src_stride), // %1 - "+r"(dst), // %2 - "+r"(dst_width) // %3 - : - : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" - ); -} - -// Reads 4 pixels at a time. -// Alignment requirement: src_argb 4 byte aligned. -void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, uint8* dst_argb, int dst_width) { - asm volatile ( - "mov r12, %3, lsl #2 \n" - ".p2align 2 \n" - "1: \n" - "vld1.32 {d0[0]}, [%0], r12 \n" - "vld1.32 {d0[1]}, [%0], r12 \n" - "vld1.32 {d1[0]}, [%0], r12 \n" - "vld1.32 {d1[1]}, [%0], r12 \n" - "subs %2, %2, #4 \n" // 4 pixels per loop. - "vst1.8 {q0}, [%1]! \n" - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : "r"(src_stepx) // %3 - : "memory", "cc", "r12", "q0" - ); -} - -// Reads 4 pixels at a time. -// Alignment requirement: src_argb 4 byte aligned. -void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width) { - asm volatile ( - "mov r12, %4, lsl #2 \n" - "add %1, %1, %0 \n" - ".p2align 2 \n" - "1: \n" - "vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1 - "vld1.8 {d1}, [%1], r12 \n" - "vld1.8 {d2}, [%0], r12 \n" - "vld1.8 {d3}, [%1], r12 \n" - "vld1.8 {d4}, [%0], r12 \n" - "vld1.8 {d5}, [%1], r12 \n" - "vld1.8 {d6}, [%0], r12 \n" - "vld1.8 {d7}, [%1], r12 \n" - "vaddl.u8 q0, d0, d1 \n" - "vaddl.u8 q1, d2, d3 \n" - "vaddl.u8 q2, d4, d5 \n" - "vaddl.u8 q3, d6, d7 \n" - "vswp.8 d1, d2 \n" // ab_cd -> ac_bd - "vswp.8 d5, d6 \n" // ef_gh -> eg_fh - "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d) - "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h) - "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels. - "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels. - "subs %3, %3, #4 \n" // 4 pixels per loop. - "vst1.8 {q0}, [%2]! \n" - "bgt 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stride), // %1 - "+r"(dst_argb), // %2 - "+r"(dst_width) // %3 - : "r"(src_stepx) // %4 - : "memory", "cc", "r12", "q0", "q1", "q2", "q3" - ); -} - -#endif // __ARM_NEON__ - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_posix.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_posix.cc deleted file mode 100644 index 352e667822..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_posix.cc +++ /dev/null @@ -1,1315 +0,0 @@ -/* - * Copyright 2013 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for GCC x86 and x64. -#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__)) - -// Offsets for source bytes 0 to 9 -static uvec8 kShuf0 = - { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 }; - -// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12. -static uvec8 kShuf1 = - { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 }; - -// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf2 = - { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 }; - -// Offsets for source bytes 0 to 10 -static uvec8 kShuf01 = - { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 }; - -// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13. -static uvec8 kShuf11 = - { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 }; - -// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf21 = - { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 }; - -// Coefficients for source bytes 0 to 10 -static uvec8 kMadd01 = - { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 }; - -// Coefficients for source bytes 10 to 21 -static uvec8 kMadd11 = - { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 }; - -// Coefficients for source bytes 21 to 31 -static uvec8 kMadd21 = - { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 }; - -// Coefficients for source bytes 21 to 31 -static vec16 kRound34 = - { 2, 2, 2, 2, 2, 2, 2, 2 }; - -static uvec8 kShuf38a = - { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - -static uvec8 kShuf38b = - { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 }; - -// Arrange words 0,3,6 into 0,1,2 -static uvec8 kShufAc = - { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - -// Arrange words 0,3,6 into 3,4,5 -static uvec8 kShufAc3 = - { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 }; - -// Scaling values for boxes of 3x3 and 2x3 -static uvec16 kScaleAc33 = - { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 }; - -// Arrange first value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb0 = - { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 }; - -// Arrange second value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb1 = - { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 }; - -// Arrange third value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb2 = - { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 }; - -// Scaling values for boxes of 3x2 and 2x2 -static uvec16 kScaleAb2 = - { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 }; - -// GCC versions of row functions are verbatim conversions from Visual C. -// Generated using gcc disassembly on Visual C object file: -// objdump -D yuvscaler.obj >yuvscaler.txt - -void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} - -void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10, 0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "psrlw $0x8,%%xmm0 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psrlw $0x8,%%xmm1 \n" - "pand %%xmm5,%%xmm2 \n" - "pand %%xmm5,%%xmm3 \n" - "pavgw %%xmm2,%%xmm0 \n" - "pavgw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} - -void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqa,0x00,0,3,1,xmm2) // movdqa (%0,%3,1),%%xmm2 - BUNDLEALIGN - MEMOPREG(movdqa,0x10,0,3,1,xmm3) // movdqa 0x10(%0,%3,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "psrlw $0x8,%%xmm0 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psrlw $0x8,%%xmm1 \n" - "pand %%xmm5,%%xmm2 \n" - "pand %%xmm5,%%xmm3 \n" - "pavgw %%xmm2,%%xmm0 \n" - "pavgw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} - -void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "psrlw $0x8,%%xmm0 \n" - "psrlw $0x8,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} - -void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "psrlw $0x8,%%xmm0 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psrlw $0x8,%%xmm1 \n" - "pand %%xmm5,%%xmm2 \n" - "pand %%xmm5,%%xmm3 \n" - "pavgw %%xmm2,%%xmm0 \n" - "pavgw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} - -void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrlw $0x8,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqu " MEMACCESS(0) ",%%xmm0 \n" - "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 - BUNDLEALIGN - MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "psrlw $0x8,%%xmm0 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psrlw $0x8,%%xmm1 \n" - "pand %%xmm5,%%xmm2 \n" - "pand %%xmm5,%%xmm3 \n" - "pavgw %%xmm2,%%xmm0 \n" - "pavgw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqu %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "sub $0x10,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" -#endif - ); -} - -void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "pcmpeqb %%xmm5,%%xmm5 \n" - "psrld $0x18,%%xmm5 \n" - "pslld $0x10,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pand %%xmm5,%%xmm0 \n" - "pand %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "psrlw $0x8,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm5" -#endif - ); -} - -void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - intptr_t stridex3 = 0; - asm volatile ( - "pcmpeqb %%xmm7,%%xmm7 \n" - "psrlw $0x8,%%xmm7 \n" - "lea " MEMLEA4(0x00,4,4,2) ",%3 \n" - - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - MEMOPREG(movdqa,0x00,0,4,1,xmm2) // movdqa (%0,%4,1),%%xmm2 - BUNDLEALIGN - MEMOPREG(movdqa,0x10,0,4,1,xmm3) // movdqa 0x10(%0,%4,1),%%xmm3 - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - MEMOPREG(movdqa,0x00,0,4,2,xmm2) // movdqa (%0,%4,2),%%xmm2 - BUNDLEALIGN - MEMOPREG(movdqa,0x10,0,4,2,xmm3) // movdqa 0x10(%0,%4,2),%%xmm3 - MEMOPREG(movdqa,0x00,0,3,1,xmm4) // movdqa (%0,%3,1),%%xmm4 - MEMOPREG(movdqa,0x10,0,3,1,xmm5) // movdqa 0x10(%0,%3,1),%%xmm5 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm4,%%xmm2 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm5,%%xmm3 \n" - "pavgb %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "psrlw $0x8,%%xmm0 \n" - "movdqa %%xmm1,%%xmm3 \n" - "psrlw $0x8,%%xmm1 \n" - "pand %%xmm7,%%xmm2 \n" - "pand %%xmm7,%%xmm3 \n" - "pavgw %%xmm2,%%xmm0 \n" - "pavgw %%xmm3,%%xmm1 \n" - "packuswb %%xmm1,%%xmm0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "psrlw $0x8,%%xmm0 \n" - "pand %%xmm7,%%xmm2 \n" - "pavgw %%xmm2,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x8,1) ",%1 \n" - "sub $0x8,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width), // %2 - "+r"(stridex3) // %3 - : "r"((intptr_t)(src_stride)) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7" -#endif - ); -} - -void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %0,%%xmm3 \n" - "movdqa %1,%%xmm4 \n" - "movdqa %2,%%xmm5 \n" - : - : "m"(kShuf0), // %0 - "m"(kShuf1), // %1 - "m"(kShuf2) // %2 - ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm2 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm2,%%xmm1 \n" - "palignr $0x8,%%xmm0,%%xmm1 \n" - "pshufb %%xmm3,%%xmm0 \n" - "pshufb %%xmm4,%%xmm1 \n" - "pshufb %%xmm5,%%xmm2 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "movq %%xmm1," MEMACCESS2(0x8,1) " \n" - "movq %%xmm2," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x18,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" -#endif - ); -} - -void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %0,%%xmm2 \n" // kShuf01 - "movdqa %1,%%xmm3 \n" // kShuf11 - "movdqa %2,%%xmm4 \n" // kShuf21 - : - : "m"(kShuf01), // %0 - "m"(kShuf11), // %1 - "m"(kShuf21) // %2 - ); - asm volatile ( - "movdqa %0,%%xmm5 \n" // kMadd01 - "movdqa %1,%%xmm0 \n" // kMadd11 - "movdqa %2,%%xmm1 \n" // kRound34 - : - : "m"(kMadd01), // %0 - "m"(kMadd11), // %1 - "m"(kRound34) // %2 - ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm6 \n" - MEMOPREG(movdqa,0x00,0,3,1,xmm7) // movdqa (%0,%3),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm5,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS(1) " \n" - "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3),%%xmm7 - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm3,%%xmm6 \n" - "pmaddubsw %%xmm0,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x8,1) " \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm6 \n" - BUNDLEALIGN - MEMOPREG(movdqa,0x10,0,3,1,xmm7) // movdqa 0x10(%0,%3),%%xmm7 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm4,%%xmm6 \n" - "pmaddubsw %4,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x18,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "m"(kMadd21) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} - -void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %0,%%xmm2 \n" // kShuf01 - "movdqa %1,%%xmm3 \n" // kShuf11 - "movdqa %2,%%xmm4 \n" // kShuf21 - : - : "m"(kShuf01), // %0 - "m"(kShuf11), // %1 - "m"(kShuf21) // %2 - ); - asm volatile ( - "movdqa %0,%%xmm5 \n" // kMadd01 - "movdqa %1,%%xmm0 \n" // kMadd11 - "movdqa %2,%%xmm1 \n" // kRound34 - : - : "m"(kMadd01), // %0 - "m"(kMadd11), // %1 - "m"(kRound34) // %2 - ); - - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm6 \n" - MEMOPREG(movdqa,0x00,0,3,1,xmm7) // movdqa (%0,%3,1),%%xmm7 - "pavgb %%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm2,%%xmm6 \n" - "pmaddubsw %%xmm5,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS(1) " \n" - "movdqu " MEMACCESS2(0x8,0) ",%%xmm6 \n" - MEMOPREG(movdqu,0x8,0,3,1,xmm7) // movdqu 0x8(%0,%3,1),%%xmm7 - "pavgb %%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm3,%%xmm6 \n" - "pmaddubsw %%xmm0,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x8,1) " \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm6 \n" - MEMOPREG(movdqa,0x10,0,3,1,xmm7) // movdqa 0x10(%0,%3,1),%%xmm7 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm6,%%xmm7 \n" - "pavgb %%xmm7,%%xmm6 \n" - "pshufb %%xmm4,%%xmm6 \n" - "pmaddubsw %4,%%xmm6 \n" - "paddsw %%xmm1,%%xmm6 \n" - "psrlw $0x2,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "movq %%xmm6," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x18,1) ",%1 \n" - "sub $0x18,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)), // %3 - "m"(kMadd21) // %4 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} - -void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %3,%%xmm4 \n" - "movdqa %4,%%xmm5 \n" - - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "pshufb %%xmm4,%%xmm0 \n" - "pshufb %%xmm5,%%xmm1 \n" - "paddusb %%xmm1,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(1) " \n" - "movhlps %%xmm0,%%xmm1 \n" - "movd %%xmm1," MEMACCESS2(0x8,1) " \n" - "lea " MEMLEA(0xc,1) ",%1 \n" - "sub $0xc,%2 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "m"(kShuf38a), // %3 - "m"(kShuf38b) // %4 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm4", "xmm5" -#endif - ); -} - -void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %0,%%xmm2 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm4 \n" - "movdqa %3,%%xmm5 \n" - : - : "m"(kShufAb0), // %0 - "m"(kShufAb1), // %1 - "m"(kShufAb2), // %2 - "m"(kScaleAb2) // %3 - ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(pavgb,0x00,0,3,1,xmm0) // pavgb (%0,%3,1),%%xmm0 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "pshufb %%xmm2,%%xmm1 \n" - "movdqa %%xmm0,%%xmm6 \n" - "pshufb %%xmm3,%%xmm6 \n" - "paddusw %%xmm6,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "paddusw %%xmm0,%%xmm1 \n" - "pmulhuw %%xmm5,%%xmm1 \n" - "packuswb %%xmm1,%%xmm1 \n" - "sub $0x6,%2 \n" - "movd %%xmm1," MEMACCESS(1) " \n" - "psrlq $0x10,%%xmm1 \n" - "movd %%xmm1," MEMACCESS2(0x2,1) " \n" - "lea " MEMLEA(0x6,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" -#endif - ); -} - -void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - asm volatile ( - "movdqa %0,%%xmm2 \n" - "movdqa %1,%%xmm3 \n" - "movdqa %2,%%xmm4 \n" - "pxor %%xmm5,%%xmm5 \n" - : - : "m"(kShufAc), // %0 - "m"(kShufAc3), // %1 - "m"(kScaleAc33) // %2 - ); - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movdqa,0x00,0,3,1,xmm6) // movdqa (%0,%3,1),%%xmm6 - "movhlps %%xmm0,%%xmm1 \n" - "movhlps %%xmm6,%%xmm7 \n" - "punpcklbw %%xmm5,%%xmm0 \n" - "punpcklbw %%xmm5,%%xmm1 \n" - "punpcklbw %%xmm5,%%xmm6 \n" - "punpcklbw %%xmm5,%%xmm7 \n" - "paddusw %%xmm6,%%xmm0 \n" - "paddusw %%xmm7,%%xmm1 \n" - MEMOPREG(movdqa,0x00,0,3,2,xmm6) // movdqa (%0,%3,2),%%xmm6 - "lea " MEMLEA(0x10,0) ",%0 \n" - "movhlps %%xmm6,%%xmm7 \n" - "punpcklbw %%xmm5,%%xmm6 \n" - "punpcklbw %%xmm5,%%xmm7 \n" - "paddusw %%xmm6,%%xmm0 \n" - "paddusw %%xmm7,%%xmm1 \n" - "movdqa %%xmm0,%%xmm6 \n" - "psrldq $0x2,%%xmm0 \n" - "paddusw %%xmm0,%%xmm6 \n" - "psrldq $0x2,%%xmm0 \n" - "paddusw %%xmm0,%%xmm6 \n" - "pshufb %%xmm2,%%xmm6 \n" - "movdqa %%xmm1,%%xmm7 \n" - "psrldq $0x2,%%xmm1 \n" - "paddusw %%xmm1,%%xmm7 \n" - "psrldq $0x2,%%xmm1 \n" - "paddusw %%xmm1,%%xmm7 \n" - "pshufb %%xmm3,%%xmm7 \n" - "paddusw %%xmm7,%%xmm6 \n" - "pmulhuw %%xmm4,%%xmm6 \n" - "packuswb %%xmm6,%%xmm6 \n" - "sub $0x6,%2 \n" - "movd %%xmm6," MEMACCESS(1) " \n" - "psrlq $0x10,%%xmm6 \n" - "movd %%xmm6," MEMACCESS2(0x2,1) " \n" - "lea " MEMLEA(0x6,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#endif - ); -} - -void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, int src_height) { - int tmp_height = 0; - intptr_t tmp_src = 0; - asm volatile ( - "pxor %%xmm4,%%xmm4 \n" - "sub $0x1,%5 \n" - - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "mov %0,%3 \n" - "add %6,%0 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm4,%%xmm0 \n" - "punpckhbw %%xmm4,%%xmm1 \n" - "mov %5,%2 \n" - "test %2,%2 \n" - "je 3f \n" - - LABELALIGN - "2: \n" - "movdqa " MEMACCESS(0) ",%%xmm2 \n" - "add %6,%0 \n" - "movdqa %%xmm2,%%xmm3 \n" - "punpcklbw %%xmm4,%%xmm2 \n" - "punpckhbw %%xmm4,%%xmm3 \n" - "paddusw %%xmm2,%%xmm0 \n" - "paddusw %%xmm3,%%xmm1 \n" - "sub $0x1,%2 \n" - "jg 2b \n" - - LABELALIGN - "3: \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,1) " \n" - "lea " MEMLEA(0x10,3) ",%0 \n" - "lea " MEMLEA(0x20,1) ",%1 \n" - "sub $0x10,%4 \n" - "jg 1b \n" - : "+r"(src_ptr), // %0 - "+r"(dst_ptr), // %1 - "+r"(tmp_height), // %2 - "+r"(tmp_src), // %3 - "+r"(src_width), // %4 - "+rm"(src_height) // %5 - : "rm"((intptr_t)(src_stride)) // %6 - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" -#endif - ); -} - -// Bilinear column filtering. SSSE3 version. -void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { - intptr_t x0 = 0, x1 = 0, temp_pixel = 0; - asm volatile ( - "movd %6,%%xmm2 \n" - "movd %7,%%xmm3 \n" - "movl $0x04040000,%k2 \n" - "movd %k2,%%xmm5 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrlw $0x9,%%xmm6 \n" - "pextrw $0x1,%%xmm2,%k3 \n" - "subl $0x2,%5 \n" - "jl 29f \n" - "movdqa %%xmm2,%%xmm0 \n" - "paddd %%xmm3,%%xmm0 \n" - "punpckldq %%xmm0,%%xmm2 \n" - "punpckldq %%xmm3,%%xmm3 \n" - "paddd %%xmm3,%%xmm3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" - - LABELALIGN - "2: \n" - "movdqa %%xmm2,%%xmm1 \n" - "paddd %%xmm3,%%xmm2 \n" - MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2 - "movd %k2,%%xmm0 \n" - "psrlw $0x9,%%xmm1 \n" - BUNDLEALIGN - MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2 - "movd %k2,%%xmm4 \n" - "pshufb %%xmm5,%%xmm1 \n" - "punpcklwd %%xmm4,%%xmm0 \n" - "pxor %%xmm6,%%xmm1 \n" - "pmaddubsw %%xmm1,%%xmm0 \n" - "pextrw $0x1,%%xmm2,%k3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movd %%xmm0,%k2 \n" - "mov %w2," MEMACCESS(0) " \n" - "lea " MEMLEA(0x2,0) ",%0 \n" - "sub $0x2,%5 \n" - "jge 2b \n" - - LABELALIGN - "29: \n" - "addl $0x1,%5 \n" - "jl 99f \n" - MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2 - "movd %k2,%%xmm0 \n" - "psrlw $0x9,%%xmm2 \n" - "pshufb %%xmm5,%%xmm2 \n" - "pxor %%xmm6,%%xmm2 \n" - "pmaddubsw %%xmm2,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movd %%xmm0,%k2 \n" - "mov %b2," MEMACCESS(0) " \n" - "99: \n" - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+a"(temp_pixel), // %2 - "+r"(x0), // %3 - "+r"(x1), // %4 - "+rm"(dst_width) // %5 - : "rm"(x), // %6 - "rm"(dx) // %7 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" -#endif - ); -} - -// Reads 4 pixels, duplicates them and writes 8 pixels. -// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpcklbw %%xmm0,%%xmm0 \n" - "punpckhbw %%xmm1,%%xmm1 \n" - "sub $0x20,%2 \n" - "movdqa %%xmm0," MEMACCESS(0) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,0) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "jg 1b \n" - - : "+r"(dst_ptr), // %0 - "+r"(src_ptr), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} - -void ScaleARGBRowDown2_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "shufps $0xdd,%%xmm1,%%xmm0 \n" - "sub $0x4,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} - -void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "movdqa %%xmm0,%%xmm2 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm2 \n" - "pavgb %%xmm2,%%xmm0 \n" - "sub $0x4,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc" -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} - -void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(0) ",%%xmm0 \n" - "movdqa " MEMACCESS2(0x10,0) ",%%xmm1 \n" - BUNDLEALIGN - MEMOPREG(movdqa,0x00,0,3,1,xmm2) // movdqa (%0,%3,1),%%xmm2 - MEMOPREG(movdqa,0x10,0,3,1,xmm3) // movdqa 0x10(%0,%3,1),%%xmm3 - "lea " MEMLEA(0x20,0) ",%0 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm2 \n" - "pavgb %%xmm2,%%xmm0 \n" - "sub $0x4,%2 \n" - "movdqa %%xmm0," MEMACCESS(1) " \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(dst_argb), // %1 - "+r"(dst_width) // %2 - : "r"((intptr_t)(src_stride)) // %3 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3" -#endif - ); -} - -// Reads 4 pixels at a time. -// Alignment requirement: dst_argb 16 byte aligned. -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width) { - intptr_t src_stepx_x4 = (intptr_t)(src_stepx); - intptr_t src_stepx_x12 = 0; - asm volatile ( - "lea " MEMLEA3(0x00,1,4) ",%1 \n" - "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" - LABELALIGN - "1: \n" - "movd " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 - "punpckldq %%xmm1,%%xmm0 \n" - BUNDLEALIGN - MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2 - MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3 - "lea " MEMLEA4(0x00,0,1,4) ",%0 \n" - "punpckldq %%xmm3,%%xmm2 \n" - "punpcklqdq %%xmm2,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqa %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stepx_x4), // %1 - "+r"(dst_argb), // %2 - "+r"(dst_width), // %3 - "+r"(src_stepx_x12) // %4 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3" -#endif - ); -} - -// Blends four 2x2 to 4x1. -// Alignment requirement: dst_argb 16 byte aligned. -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, int src_stepx, - uint8* dst_argb, int dst_width) { - intptr_t src_stepx_x4 = (intptr_t)(src_stepx); - intptr_t src_stepx_x12 = 0; - intptr_t row1 = (intptr_t)(src_stride); - asm volatile ( - "lea " MEMLEA3(0x00,1,4) ",%1 \n" - "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" - "lea " MEMLEA4(0x00,0,5,1) ",%5 \n" - - LABELALIGN - "1: \n" - "movq " MEMACCESS(0) ",%%xmm0 \n" - MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0 - MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1 - BUNDLEALIGN - MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1 - "lea " MEMLEA4(0x00,0,1,4) ",%0 \n" - "movq " MEMACCESS(5) ",%%xmm2 \n" - BUNDLEALIGN - MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2 - MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3 - MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3 - "lea " MEMLEA4(0x00,5,1,4) ",%5 \n" - "pavgb %%xmm2,%%xmm0 \n" - "pavgb %%xmm3,%%xmm1 \n" - "movdqa %%xmm0,%%xmm2 \n" - "shufps $0x88,%%xmm1,%%xmm0 \n" - "shufps $0xdd,%%xmm1,%%xmm2 \n" - "pavgb %%xmm2,%%xmm0 \n" - "sub $0x4,%3 \n" - "movdqa %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jg 1b \n" - : "+r"(src_argb), // %0 - "+r"(src_stepx_x4), // %1 - "+r"(dst_argb), // %2 - "+rm"(dst_width), // %3 - "+r"(src_stepx_x12), // %4 - "+r"(row1) // %5 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3" -#endif - ); -} - -void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - intptr_t x0 = 0, x1 = 0; - asm volatile ( - "movd %5,%%xmm2 \n" - "movd %6,%%xmm3 \n" - "pshufd $0x0,%%xmm2,%%xmm2 \n" - "pshufd $0x11,%%xmm3,%%xmm0 \n" - "paddd %%xmm0,%%xmm2 \n" - "paddd %%xmm3,%%xmm3 \n" - "pshufd $0x5,%%xmm3,%%xmm0 \n" - "paddd %%xmm0,%%xmm2 \n" - "paddd %%xmm3,%%xmm3 \n" - "pshufd $0x0,%%xmm3,%%xmm3 \n" - "pextrw $0x1,%%xmm2,%k0 \n" - "pextrw $0x3,%%xmm2,%k1 \n" - "cmp $0x0,%4 \n" - "jl 99f \n" - "sub $0x4,%4 \n" - "jl 49f \n" - - LABELALIGN - "40: \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 - "pextrw $0x5,%%xmm2,%k0 \n" - "pextrw $0x7,%%xmm2,%k1 \n" - "paddd %%xmm3,%%xmm2 \n" - "punpckldq %%xmm1,%%xmm0 \n" - MEMOPREG(movd,0x00,3,0,4,xmm1) // movd (%3,%0,4),%%xmm1 - MEMOPREG(movd,0x00,3,1,4,xmm4) // movd (%3,%1,4),%%xmm4 - "pextrw $0x1,%%xmm2,%k0 \n" - "pextrw $0x3,%%xmm2,%k1 \n" - "punpckldq %%xmm4,%%xmm1 \n" - "punpcklqdq %%xmm1,%%xmm0 \n" - "sub $0x4,%4 \n" - "movdqu %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x10,2) ",%2 \n" - "jge 40b \n" - - "49: \n" - "test $0x2,%4 \n" - "je 29f \n" - BUNDLEALIGN - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - MEMOPREG(movd,0x00,3,1,4,xmm1) // movd (%3,%1,4),%%xmm1 - "pextrw $0x5,%%xmm2,%k0 \n" - "punpckldq %%xmm1,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(2) " \n" - "lea " MEMLEA(0x8,2) ",%2 \n" - "29: \n" - "test $0x1,%4 \n" - "je 99f \n" - MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 - "movd %%xmm0," MEMACCESS(2) " \n" - "99: \n" - : "+a"(x0), // %0 - "+d"(x1), // %1 - "+r"(dst_argb), // %2 - "+r"(src_argb), // %3 - "+r"(dst_width) // %4 - : "rm"(x), // %5 - "rm"(dx) // %6 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" -#endif - ); -} - -// Reads 4 pixels, duplicates them and writes 8 pixels. -// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - asm volatile ( - LABELALIGN - "1: \n" - "movdqa " MEMACCESS(1) ",%%xmm0 \n" - "lea " MEMLEA(0x10,1) ",%1 \n" - "movdqa %%xmm0,%%xmm1 \n" - "punpckldq %%xmm0,%%xmm0 \n" - "punpckhdq %%xmm1,%%xmm1 \n" - "sub $0x8,%2 \n" - "movdqa %%xmm0," MEMACCESS(0) " \n" - "movdqa %%xmm1," MEMACCESS2(0x10,0) " \n" - "lea " MEMLEA(0x20,0) ",%0 \n" - "jg 1b \n" - - : "+r"(dst_argb), // %0 - "+r"(src_argb), // %1 - "+r"(dst_width) // %2 - : - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1" -#endif - ); -} - -// Shuffle table for arranging 2 pixels into pairs for pmaddubsw -static uvec8 kShuffleColARGB = { - 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel - 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel -}; - -// Shuffle table for duplicating 2 fractions into 8 bytes each -static uvec8 kShuffleFractions = { - 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, -}; - -// Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version -void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - intptr_t x0 = 0, x1 = 0; - asm volatile ( - "movdqa %0,%%xmm4 \n" - "movdqa %1,%%xmm5 \n" - : - : "m"(kShuffleColARGB), // %0 - "m"(kShuffleFractions) // %1 - ); - - asm volatile ( - "movd %5,%%xmm2 \n" - "movd %6,%%xmm3 \n" - "pcmpeqb %%xmm6,%%xmm6 \n" - "psrlw $0x9,%%xmm6 \n" - "pextrw $0x1,%%xmm2,%k3 \n" - "sub $0x2,%2 \n" - "jl 29f \n" - "movdqa %%xmm2,%%xmm0 \n" - "paddd %%xmm3,%%xmm0 \n" - "punpckldq %%xmm0,%%xmm2 \n" - "punpckldq %%xmm3,%%xmm3 \n" - "paddd %%xmm3,%%xmm3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" - - LABELALIGN - "2: \n" - "movdqa %%xmm2,%%xmm1 \n" - "paddd %%xmm3,%%xmm2 \n" - MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 - "psrlw $0x9,%%xmm1 \n" - BUNDLEALIGN - MEMOPREG(movhps,0x00,1,4,4,xmm0) // movhps (%1,%4,4),%%xmm0 - "pshufb %%xmm5,%%xmm1 \n" - "pshufb %%xmm4,%%xmm0 \n" - "pxor %%xmm6,%%xmm1 \n" - "pmaddubsw %%xmm1,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "pextrw $0x1,%%xmm2,%k3 \n" - "pextrw $0x3,%%xmm2,%k4 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movq %%xmm0," MEMACCESS(0) " \n" - "lea " MEMLEA(0x8,0) ",%0 \n" - "sub $0x2,%2 \n" - "jge 2b \n" - - LABELALIGN - "29: \n" - "add $0x1,%2 \n" - "jl 99f \n" - "psrlw $0x9,%%xmm2 \n" - BUNDLEALIGN - MEMOPREG(movq,0x00,1,3,4,xmm0) // movq (%1,%3,4),%%xmm0 - "pshufb %%xmm5,%%xmm2 \n" - "pshufb %%xmm4,%%xmm0 \n" - "pxor %%xmm6,%%xmm2 \n" - "pmaddubsw %%xmm2,%%xmm0 \n" - "psrlw $0x7,%%xmm0 \n" - "packuswb %%xmm0,%%xmm0 \n" - "movd %%xmm0," MEMACCESS(0) " \n" - - LABELALIGN - "99: \n" - : "+r"(dst_argb), // %0 - "+r"(src_argb), // %1 - "+rm"(dst_width), // %2 - "+r"(x0), // %3 - "+r"(x1) // %4 - : "rm"(x), // %5 - "rm"(dx) // %6 - : "memory", "cc" -#if defined(__native_client__) && defined(__x86_64__) - , "r14" -#endif -#if defined(__SSE2__) - , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" -#endif - ); -} - -// Divide num by div and return as 16.16 fixed point result. -int FixedDiv_X86(int num, int div) { - asm volatile ( - "cdq \n" - "shld $0x10,%%eax,%%edx \n" - "shl $0x10,%%eax \n" - "idiv %1 \n" - "mov %0, %%eax \n" - : "+a"(num) // %0 - : "c"(div) // %1 - : "memory", "cc", "edx" - ); - return num; -} - -// Divide num - 1 by div - 1 and return as 16.16 fixed point result. -int FixedDiv1_X86(int num, int div) { - asm volatile ( - "cdq \n" - "shld $0x10,%%eax,%%edx \n" - "shl $0x10,%%eax \n" - "sub $0x10001,%%eax \n" - "sbb $0x0,%%edx \n" - "sub $0x1,%1 \n" - "idiv %1 \n" - "mov %0, %%eax \n" - : "+a"(num) // %0 - : "c"(div) // %1 - : "memory", "cc", "edx" - ); - return num; -} - -#endif // defined(__x86_64__) || defined(__i386__) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/scale_win.cc b/drivers/theoraplayer/src/YUV/libyuv/src/scale_win.cc deleted file mode 100644 index 840b9738da..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/scale_win.cc +++ /dev/null @@ -1,1320 +0,0 @@ -/* - * Copyright 2013 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "libyuv/row.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -// This module is for Visual C x86. -#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) - -// Offsets for source bytes 0 to 9 -static uvec8 kShuf0 = - { 0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128 }; - -// Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12. -static uvec8 kShuf1 = - { 3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128 }; - -// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf2 = - { 5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128 }; - -// Offsets for source bytes 0 to 10 -static uvec8 kShuf01 = - { 0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10 }; - -// Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13. -static uvec8 kShuf11 = - { 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13 }; - -// Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. -static uvec8 kShuf21 = - { 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15 }; - -// Coefficients for source bytes 0 to 10 -static uvec8 kMadd01 = - { 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2 }; - -// Coefficients for source bytes 10 to 21 -static uvec8 kMadd11 = - { 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1 }; - -// Coefficients for source bytes 21 to 31 -static uvec8 kMadd21 = - { 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3 }; - -// Coefficients for source bytes 21 to 31 -static vec16 kRound34 = - { 2, 2, 2, 2, 2, 2, 2, 2 }; - -static uvec8 kShuf38a = - { 0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - -static uvec8 kShuf38b = - { 128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128 }; - -// Arrange words 0,3,6 into 0,1,2 -static uvec8 kShufAc = - { 0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; - -// Arrange words 0,3,6 into 3,4,5 -static uvec8 kShufAc3 = - { 128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128 }; - -// Scaling values for boxes of 3x3 and 2x3 -static uvec16 kScaleAc33 = - { 65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0 }; - -// Arrange first value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb0 = - { 0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128 }; - -// Arrange second value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb1 = - { 1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128 }; - -// Arrange third value for pixels 0,1,2,3,4,5 -static uvec8 kShufAb2 = - { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 }; - -// Scaling values for boxes of 3x2 and 2x2 -static uvec16 kScaleAb2 = - { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 }; - -// Reads 32 pixels, throws half away and writes 16 pixels. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - psrlw xmm0, 8 // isolate odd pixels. - psrlw xmm1, 8 - packuswb xmm0, xmm1 - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - ret - } -} - -// Blends 32x1 rectangle to 16x1. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - mov eax, [esp + 4] // src_ptr - // src_stride - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - - movdqa xmm2, xmm0 // average columns (32 to 16 pixels) - psrlw xmm0, 8 - movdqa xmm3, xmm1 - psrlw xmm1, 8 - pand xmm2, xmm5 - pand xmm3, xmm5 - pavgw xmm0, xmm2 - pavgw xmm1, xmm3 - packuswb xmm0, xmm1 - - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - ret - } -} - -// Blends 32x2 rectangle to 16x1. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + esi] - movdqa xmm3, [eax + esi + 16] - lea eax, [eax + 32] - pavgb xmm0, xmm2 // average rows - pavgb xmm1, xmm3 - - movdqa xmm2, xmm0 // average columns (32 to 16 pixels) - psrlw xmm0, 8 - movdqa xmm3, xmm1 - psrlw xmm1, 8 - pand xmm2, xmm5 - pand xmm3, xmm5 - pavgw xmm0, xmm2 - pavgw xmm1, xmm3 - packuswb xmm0, xmm1 - - sub ecx, 16 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - pop esi - ret - } -} - -// Reads 32 pixels, throws half away and writes 16 pixels. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width - - align 4 - wloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - lea eax, [eax + 32] - psrlw xmm0, 8 // isolate odd pixels. - psrlw xmm1, 8 - packuswb xmm0, xmm1 - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - ret - } -} - -// Blends 32x1 rectangle to 16x1. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - mov eax, [esp + 4] // src_ptr - // src_stride - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - - align 4 - wloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - lea eax, [eax + 32] - - movdqa xmm2, xmm0 // average columns (32 to 16 pixels) - psrlw xmm0, 8 - movdqa xmm3, xmm1 - psrlw xmm1, 8 - pand xmm2, xmm5 - pand xmm3, xmm5 - pavgw xmm0, xmm2 - pavgw xmm1, xmm3 - packuswb xmm0, xmm1 - - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - ret - } -} - -// Blends 32x2 rectangle to 16x1. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff - psrlw xmm5, 8 - - align 4 - wloop: - movdqu xmm0, [eax] - movdqu xmm1, [eax + 16] - movdqu xmm2, [eax + esi] - movdqu xmm3, [eax + esi + 16] - lea eax, [eax + 32] - pavgb xmm0, xmm2 // average rows - pavgb xmm1, xmm3 - - movdqa xmm2, xmm0 // average columns (32 to 16 pixels) - psrlw xmm0, 8 - movdqa xmm3, xmm1 - psrlw xmm1, 8 - pand xmm2, xmm5 - pand xmm3, xmm5 - pavgw xmm0, xmm2 - pavgw xmm1, xmm3 - packuswb xmm0, xmm1 - - sub ecx, 16 - movdqu [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - pop esi - ret - } -} - -// Point samples 32 pixels to 8 pixels. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width - pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000 - psrld xmm5, 24 - pslld xmm5, 16 - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - pand xmm0, xmm5 - pand xmm1, xmm5 - packuswb xmm0, xmm1 - psrlw xmm0, 8 - packuswb xmm0, xmm0 - sub ecx, 8 - movq qword ptr [edx], xmm0 - lea edx, [edx + 8] - jg wloop - - ret - } -} - -// Blends 32x4 rectangle to 8x1. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - push esi - push edi - mov eax, [esp + 8 + 4] // src_ptr - mov esi, [esp + 8 + 8] // src_stride - mov edx, [esp + 8 + 12] // dst_ptr - mov ecx, [esp + 8 + 16] // dst_width - lea edi, [esi + esi * 2] // src_stride * 3 - pcmpeqb xmm7, xmm7 // generate mask 0x00ff00ff - psrlw xmm7, 8 - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + esi] - movdqa xmm3, [eax + esi + 16] - pavgb xmm0, xmm2 // average rows - pavgb xmm1, xmm3 - movdqa xmm2, [eax + esi * 2] - movdqa xmm3, [eax + esi * 2 + 16] - movdqa xmm4, [eax + edi] - movdqa xmm5, [eax + edi + 16] - lea eax, [eax + 32] - pavgb xmm2, xmm4 - pavgb xmm3, xmm5 - pavgb xmm0, xmm2 - pavgb xmm1, xmm3 - - movdqa xmm2, xmm0 // average columns (32 to 16 pixels) - psrlw xmm0, 8 - movdqa xmm3, xmm1 - psrlw xmm1, 8 - pand xmm2, xmm7 - pand xmm3, xmm7 - pavgw xmm0, xmm2 - pavgw xmm1, xmm3 - packuswb xmm0, xmm1 - - movdqa xmm2, xmm0 // average columns (16 to 8 pixels) - psrlw xmm0, 8 - pand xmm2, xmm7 - pavgw xmm0, xmm2 - packuswb xmm0, xmm0 - - sub ecx, 8 - movq qword ptr [edx], xmm0 - lea edx, [edx + 8] - jg wloop - - pop edi - pop esi - ret - } -} - -// Point samples 32 pixels to 24 pixels. -// Produces three 8 byte values. For each 8 bytes, 16 bytes are read. -// Then shuffled to do the scaling. - -// Note that movdqa+palign may be better than movdqu. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width - movdqa xmm3, kShuf0 - movdqa xmm4, kShuf1 - movdqa xmm5, kShuf2 - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - movdqa xmm2, xmm1 - palignr xmm1, xmm0, 8 - pshufb xmm0, xmm3 - pshufb xmm1, xmm4 - pshufb xmm2, xmm5 - movq qword ptr [edx], xmm0 - movq qword ptr [edx + 8], xmm1 - movq qword ptr [edx + 16], xmm2 - lea edx, [edx + 24] - sub ecx, 24 - jg wloop - - ret - } -} - -// Blends 32x2 rectangle to 24x1 -// Produces three 8 byte values. For each 8 bytes, 16 bytes are read. -// Then shuffled to do the scaling. - -// Register usage: -// xmm0 src_row 0 -// xmm1 src_row 1 -// xmm2 shuf 0 -// xmm3 shuf 1 -// xmm4 shuf 2 -// xmm5 madd 0 -// xmm6 madd 1 -// xmm7 kRound34 - -// Note that movdqa+palign may be better than movdqu. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width - movdqa xmm2, kShuf01 - movdqa xmm3, kShuf11 - movdqa xmm4, kShuf21 - movdqa xmm5, kMadd01 - movdqa xmm6, kMadd11 - movdqa xmm7, kRound34 - - align 4 - wloop: - movdqa xmm0, [eax] // pixels 0..7 - movdqa xmm1, [eax + esi] - pavgb xmm0, xmm1 - pshufb xmm0, xmm2 - pmaddubsw xmm0, xmm5 - paddsw xmm0, xmm7 - psrlw xmm0, 2 - packuswb xmm0, xmm0 - movq qword ptr [edx], xmm0 - movdqu xmm0, [eax + 8] // pixels 8..15 - movdqu xmm1, [eax + esi + 8] - pavgb xmm0, xmm1 - pshufb xmm0, xmm3 - pmaddubsw xmm0, xmm6 - paddsw xmm0, xmm7 - psrlw xmm0, 2 - packuswb xmm0, xmm0 - movq qword ptr [edx + 8], xmm0 - movdqa xmm0, [eax + 16] // pixels 16..23 - movdqa xmm1, [eax + esi + 16] - lea eax, [eax + 32] - pavgb xmm0, xmm1 - pshufb xmm0, xmm4 - movdqa xmm1, kMadd21 - pmaddubsw xmm0, xmm1 - paddsw xmm0, xmm7 - psrlw xmm0, 2 - packuswb xmm0, xmm0 - sub ecx, 24 - movq qword ptr [edx + 16], xmm0 - lea edx, [edx + 24] - jg wloop - - pop esi - ret - } -} - -// Note that movdqa+palign may be better than movdqu. -// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width - movdqa xmm2, kShuf01 - movdqa xmm3, kShuf11 - movdqa xmm4, kShuf21 - movdqa xmm5, kMadd01 - movdqa xmm6, kMadd11 - movdqa xmm7, kRound34 - - align 4 - wloop: - movdqa xmm0, [eax] // pixels 0..7 - movdqa xmm1, [eax + esi] - pavgb xmm1, xmm0 - pavgb xmm0, xmm1 - pshufb xmm0, xmm2 - pmaddubsw xmm0, xmm5 - paddsw xmm0, xmm7 - psrlw xmm0, 2 - packuswb xmm0, xmm0 - movq qword ptr [edx], xmm0 - movdqu xmm0, [eax + 8] // pixels 8..15 - movdqu xmm1, [eax + esi + 8] - pavgb xmm1, xmm0 - pavgb xmm0, xmm1 - pshufb xmm0, xmm3 - pmaddubsw xmm0, xmm6 - paddsw xmm0, xmm7 - psrlw xmm0, 2 - packuswb xmm0, xmm0 - movq qword ptr [edx + 8], xmm0 - movdqa xmm0, [eax + 16] // pixels 16..23 - movdqa xmm1, [eax + esi + 16] - lea eax, [eax + 32] - pavgb xmm1, xmm0 - pavgb xmm0, xmm1 - pshufb xmm0, xmm4 - movdqa xmm1, kMadd21 - pmaddubsw xmm0, xmm1 - paddsw xmm0, xmm7 - psrlw xmm0, 2 - packuswb xmm0, xmm0 - sub ecx, 24 - movq qword ptr [edx + 16], xmm0 - lea edx, [edx+24] - jg wloop - - pop esi - ret - } -} - -// 3/8 point sampler - -// Scale 32 pixels to 12 -__declspec(naked) __declspec(align(16)) -void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - mov eax, [esp + 4] // src_ptr - // src_stride ignored - mov edx, [esp + 12] // dst_ptr - mov ecx, [esp + 16] // dst_width - movdqa xmm4, kShuf38a - movdqa xmm5, kShuf38b - - align 4 - xloop: - movdqa xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5 - movdqa xmm1, [eax + 16] // 16 pixels -> 6,7,8,9,10,11 - lea eax, [eax + 32] - pshufb xmm0, xmm4 - pshufb xmm1, xmm5 - paddusb xmm0, xmm1 - - sub ecx, 12 - movq qword ptr [edx], xmm0 // write 12 pixels - movhlps xmm1, xmm0 - movd [edx + 8], xmm1 - lea edx, [edx + 12] - jg xloop - - ret - } -} - -// Scale 16x3 pixels to 6x1 with interpolation -__declspec(naked) __declspec(align(16)) -void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width - movdqa xmm2, kShufAc - movdqa xmm3, kShufAc3 - movdqa xmm4, kScaleAc33 - pxor xmm5, xmm5 - - align 4 - xloop: - movdqa xmm0, [eax] // sum up 3 rows into xmm0/1 - movdqa xmm6, [eax + esi] - movhlps xmm1, xmm0 - movhlps xmm7, xmm6 - punpcklbw xmm0, xmm5 - punpcklbw xmm1, xmm5 - punpcklbw xmm6, xmm5 - punpcklbw xmm7, xmm5 - paddusw xmm0, xmm6 - paddusw xmm1, xmm7 - movdqa xmm6, [eax + esi * 2] - lea eax, [eax + 16] - movhlps xmm7, xmm6 - punpcklbw xmm6, xmm5 - punpcklbw xmm7, xmm5 - paddusw xmm0, xmm6 - paddusw xmm1, xmm7 - - movdqa xmm6, xmm0 // 8 pixels -> 0,1,2 of xmm6 - psrldq xmm0, 2 - paddusw xmm6, xmm0 - psrldq xmm0, 2 - paddusw xmm6, xmm0 - pshufb xmm6, xmm2 - - movdqa xmm7, xmm1 // 8 pixels -> 3,4,5 of xmm6 - psrldq xmm1, 2 - paddusw xmm7, xmm1 - psrldq xmm1, 2 - paddusw xmm7, xmm1 - pshufb xmm7, xmm3 - paddusw xmm6, xmm7 - - pmulhuw xmm6, xmm4 // divide by 9,9,6, 9,9,6 - packuswb xmm6, xmm6 - - sub ecx, 6 - movd [edx], xmm6 // write 6 pixels - psrlq xmm6, 16 - movd [edx + 2], xmm6 - lea edx, [edx + 6] - jg xloop - - pop esi - ret - } -} - -// Scale 16x2 pixels to 6x1 with interpolation -__declspec(naked) __declspec(align(16)) -void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr, - ptrdiff_t src_stride, - uint8* dst_ptr, int dst_width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_ptr - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_ptr - mov ecx, [esp + 4 + 16] // dst_width - movdqa xmm2, kShufAb0 - movdqa xmm3, kShufAb1 - movdqa xmm4, kShufAb2 - movdqa xmm5, kScaleAb2 - - align 4 - xloop: - movdqa xmm0, [eax] // average 2 rows into xmm0 - pavgb xmm0, [eax + esi] - lea eax, [eax + 16] - - movdqa xmm1, xmm0 // 16 pixels -> 0,1,2,3,4,5 of xmm1 - pshufb xmm1, xmm2 - movdqa xmm6, xmm0 - pshufb xmm6, xmm3 - paddusw xmm1, xmm6 - pshufb xmm0, xmm4 - paddusw xmm1, xmm0 - - pmulhuw xmm1, xmm5 // divide by 3,3,2, 3,3,2 - packuswb xmm1, xmm1 - - sub ecx, 6 - movd [edx], xmm1 // write 6 pixels - psrlq xmm1, 16 - movd [edx + 2], xmm1 - lea edx, [edx + 6] - jg xloop - - pop esi - ret - } -} - -// Reads 16xN bytes and produces 16 shorts at a time. -// TODO(fbarchard): Make this handle 4xN bytes for any width ARGB. -__declspec(naked) __declspec(align(16)) -void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, - uint16* dst_ptr, int src_width, - int src_height) { - __asm { - push esi - push edi - push ebx - push ebp - mov esi, [esp + 16 + 4] // src_ptr - mov edx, [esp + 16 + 8] // src_stride - mov edi, [esp + 16 + 12] // dst_ptr - mov ecx, [esp + 16 + 16] // dst_width - mov ebx, [esp + 16 + 20] // height - pxor xmm4, xmm4 - dec ebx - - align 4 - xloop: - // first row - movdqa xmm0, [esi] - lea eax, [esi + edx] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm4 - punpckhbw xmm1, xmm4 - lea esi, [esi + 16] - mov ebp, ebx - test ebp, ebp - je ydone - - // sum remaining rows - align 4 - yloop: - movdqa xmm2, [eax] // read 16 pixels - lea eax, [eax + edx] // advance to next row - movdqa xmm3, xmm2 - punpcklbw xmm2, xmm4 - punpckhbw xmm3, xmm4 - paddusw xmm0, xmm2 // sum 16 words - paddusw xmm1, xmm3 - sub ebp, 1 - jg yloop - - align 4 - ydone: - movdqa [edi], xmm0 - movdqa [edi + 16], xmm1 - lea edi, [edi + 32] - - sub ecx, 16 - jg xloop - - pop ebp - pop ebx - pop edi - pop esi - ret - } -} - -// Bilinear column filtering. SSSE3 version. -// TODO(fbarchard): Port to Neon -// TODO(fbarchard): Switch the following: -// xor ebx, ebx -// mov bx, word ptr [esi + eax] // 2 source x0 pixels -// To -// movzx ebx, word ptr [esi + eax] // 2 source x0 pixels -// when drmemory bug fixed. -// https://code.google.com/p/drmemory/issues/detail?id=1396 - -__declspec(naked) __declspec(align(16)) -void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { - __asm { - push ebx - push esi - push edi - mov edi, [esp + 12 + 4] // dst_ptr - mov esi, [esp + 12 + 8] // src_ptr - mov ecx, [esp + 12 + 12] // dst_width - movd xmm2, [esp + 12 + 16] // x - movd xmm3, [esp + 12 + 20] // dx - mov eax, 0x04040000 // shuffle to line up fractions with pixel. - movd xmm5, eax - pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction. - psrlw xmm6, 9 - pextrw eax, xmm2, 1 // get x0 integer. preroll - sub ecx, 2 - jl xloop29 - - movdqa xmm0, xmm2 // x1 = x0 + dx - paddd xmm0, xmm3 - punpckldq xmm2, xmm0 // x0 x1 - punpckldq xmm3, xmm3 // dx dx - paddd xmm3, xmm3 // dx * 2, dx * 2 - pextrw edx, xmm2, 3 // get x1 integer. preroll - - // 2 Pixel loop. - align 4 - xloop2: - movdqa xmm1, xmm2 // x0, x1 fractions. - paddd xmm2, xmm3 // x += dx - movzx ebx, word ptr [esi + eax] // 2 source x0 pixels - movd xmm0, ebx - psrlw xmm1, 9 // 7 bit fractions. - movzx ebx, word ptr [esi + edx] // 2 source x1 pixels - movd xmm4, ebx - pshufb xmm1, xmm5 // 0011 - punpcklwd xmm0, xmm4 - pxor xmm1, xmm6 // 0..7f and 7f..0 - pmaddubsw xmm0, xmm1 // 16 bit, 2 pixels. - pextrw eax, xmm2, 1 // get x0 integer. next iteration. - pextrw edx, xmm2, 3 // get x1 integer. next iteration. - psrlw xmm0, 7 // 8.7 fixed point to low 8 bits. - packuswb xmm0, xmm0 // 8 bits, 2 pixels. - movd ebx, xmm0 - mov [edi], bx - lea edi, [edi + 2] - sub ecx, 2 // 2 pixels - jge xloop2 - - align 4 - xloop29: - - add ecx, 2 - 1 - jl xloop99 - - // 1 pixel remainder - movzx ebx, word ptr [esi + eax] // 2 source x0 pixels - movd xmm0, ebx - psrlw xmm2, 9 // 7 bit fractions. - pshufb xmm2, xmm5 // 0011 - pxor xmm2, xmm6 // 0..7f and 7f..0 - pmaddubsw xmm0, xmm2 // 16 bit - psrlw xmm0, 7 // 8.7 fixed point to low 8 bits. - packuswb xmm0, xmm0 // 8 bits - movd ebx, xmm0 - mov [edi], bl - - align 4 - xloop99: - - pop edi - pop esi - pop ebx - ret - } -} - -// Reads 16 pixels, duplicates them and writes 32 pixels. -// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, - int dst_width, int x, int dx) { - __asm { - mov edx, [esp + 4] // dst_ptr - mov eax, [esp + 8] // src_ptr - mov ecx, [esp + 12] // dst_width - - align 4 - wloop: - movdqa xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm0 - punpckhbw xmm1, xmm1 - sub ecx, 32 - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - jg wloop - - ret - } -} - -// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6) -// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleARGBRowDown2_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - __asm { - mov eax, [esp + 4] // src_argb - // src_stride ignored - mov edx, [esp + 12] // dst_argb - mov ecx, [esp + 16] // dst_width - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - shufps xmm0, xmm1, 0xdd - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - ret - } -} - -// Blends 8x1 rectangle to 4x1. -// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - __asm { - mov eax, [esp + 4] // src_argb - // src_stride ignored - mov edx, [esp + 12] // dst_argb - mov ecx, [esp + 16] // dst_width - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - lea eax, [eax + 32] - movdqa xmm2, xmm0 - shufps xmm0, xmm1, 0x88 // even pixels - shufps xmm2, xmm1, 0xdd // odd pixels - pavgb xmm0, xmm2 - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - ret - } -} - -// Blends 8x2 rectangle to 4x1. -// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - uint8* dst_argb, int dst_width) { - __asm { - push esi - mov eax, [esp + 4 + 4] // src_argb - mov esi, [esp + 4 + 8] // src_stride - mov edx, [esp + 4 + 12] // dst_argb - mov ecx, [esp + 4 + 16] // dst_width - - align 4 - wloop: - movdqa xmm0, [eax] - movdqa xmm1, [eax + 16] - movdqa xmm2, [eax + esi] - movdqa xmm3, [eax + esi + 16] - lea eax, [eax + 32] - pavgb xmm0, xmm2 // average rows - pavgb xmm1, xmm3 - movdqa xmm2, xmm0 // average columns (8 to 4 pixels) - shufps xmm0, xmm1, 0x88 // even pixels - shufps xmm2, xmm1, 0xdd // odd pixels - pavgb xmm0, xmm2 - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - pop esi - ret - } -} - -// Reads 4 pixels at a time. -// Alignment requirement: dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width) { - __asm { - push ebx - push edi - mov eax, [esp + 8 + 4] // src_argb - // src_stride ignored - mov ebx, [esp + 8 + 12] // src_stepx - mov edx, [esp + 8 + 16] // dst_argb - mov ecx, [esp + 8 + 20] // dst_width - lea ebx, [ebx * 4] - lea edi, [ebx + ebx * 2] - - align 4 - wloop: - movd xmm0, [eax] - movd xmm1, [eax + ebx] - punpckldq xmm0, xmm1 - movd xmm2, [eax + ebx * 2] - movd xmm3, [eax + edi] - lea eax, [eax + ebx * 4] - punpckldq xmm2, xmm3 - punpcklqdq xmm0, xmm2 - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - pop edi - pop ebx - ret - } -} - -// Blends four 2x2 to 4x1. -// Alignment requirement: dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, - ptrdiff_t src_stride, - int src_stepx, - uint8* dst_argb, int dst_width) { - __asm { - push ebx - push esi - push edi - mov eax, [esp + 12 + 4] // src_argb - mov esi, [esp + 12 + 8] // src_stride - mov ebx, [esp + 12 + 12] // src_stepx - mov edx, [esp + 12 + 16] // dst_argb - mov ecx, [esp + 12 + 20] // dst_width - lea esi, [eax + esi] // row1 pointer - lea ebx, [ebx * 4] - lea edi, [ebx + ebx * 2] - - align 4 - wloop: - movq xmm0, qword ptr [eax] // row0 4 pairs - movhps xmm0, qword ptr [eax + ebx] - movq xmm1, qword ptr [eax + ebx * 2] - movhps xmm1, qword ptr [eax + edi] - lea eax, [eax + ebx * 4] - movq xmm2, qword ptr [esi] // row1 4 pairs - movhps xmm2, qword ptr [esi + ebx] - movq xmm3, qword ptr [esi + ebx * 2] - movhps xmm3, qword ptr [esi + edi] - lea esi, [esi + ebx * 4] - pavgb xmm0, xmm2 // average rows - pavgb xmm1, xmm3 - movdqa xmm2, xmm0 // average columns (8 to 4 pixels) - shufps xmm0, xmm1, 0x88 // even pixels - shufps xmm2, xmm1, 0xdd // odd pixels - pavgb xmm0, xmm2 - sub ecx, 4 - movdqa [edx], xmm0 - lea edx, [edx + 16] - jg wloop - - pop edi - pop esi - pop ebx - ret - } -} - -// Column scaling unfiltered. SSE2 version. -__declspec(naked) __declspec(align(16)) -void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - __asm { - push edi - push esi - mov edi, [esp + 8 + 4] // dst_argb - mov esi, [esp + 8 + 8] // src_argb - mov ecx, [esp + 8 + 12] // dst_width - movd xmm2, [esp + 8 + 16] // x - movd xmm3, [esp + 8 + 20] // dx - - pshufd xmm2, xmm2, 0 // x0 x0 x0 x0 - pshufd xmm0, xmm3, 0x11 // dx 0 dx 0 - paddd xmm2, xmm0 - paddd xmm3, xmm3 // 0, 0, 0, dx * 2 - pshufd xmm0, xmm3, 0x05 // dx * 2, dx * 2, 0, 0 - paddd xmm2, xmm0 // x3 x2 x1 x0 - paddd xmm3, xmm3 // 0, 0, 0, dx * 4 - pshufd xmm3, xmm3, 0 // dx * 4, dx * 4, dx * 4, dx * 4 - - pextrw eax, xmm2, 1 // get x0 integer. - pextrw edx, xmm2, 3 // get x1 integer. - - cmp ecx, 0 - jle xloop99 - sub ecx, 4 - jl xloop49 - - // 4 Pixel loop. - align 4 - xloop4: - movd xmm0, [esi + eax * 4] // 1 source x0 pixels - movd xmm1, [esi + edx * 4] // 1 source x1 pixels - pextrw eax, xmm2, 5 // get x2 integer. - pextrw edx, xmm2, 7 // get x3 integer. - paddd xmm2, xmm3 // x += dx - punpckldq xmm0, xmm1 // x0 x1 - - movd xmm1, [esi + eax * 4] // 1 source x2 pixels - movd xmm4, [esi + edx * 4] // 1 source x3 pixels - pextrw eax, xmm2, 1 // get x0 integer. next iteration. - pextrw edx, xmm2, 3 // get x1 integer. next iteration. - punpckldq xmm1, xmm4 // x2 x3 - punpcklqdq xmm0, xmm1 // x0 x1 x2 x3 - sub ecx, 4 // 4 pixels - movdqu [edi], xmm0 - lea edi, [edi + 16] - jge xloop4 - - align 4 - xloop49: - test ecx, 2 - je xloop29 - - // 2 Pixels. - movd xmm0, [esi + eax * 4] // 1 source x0 pixels - movd xmm1, [esi + edx * 4] // 1 source x1 pixels - pextrw eax, xmm2, 5 // get x2 integer. - punpckldq xmm0, xmm1 // x0 x1 - - movq qword ptr [edi], xmm0 - lea edi, [edi + 8] - - xloop29: - test ecx, 1 - je xloop99 - - // 1 Pixels. - movd xmm0, [esi + eax * 4] // 1 source x2 pixels - movd dword ptr [edi], xmm0 - align 4 - xloop99: - - pop esi - pop edi - ret - } -} - -// Bilinear row filtering combines 2x1 -> 1x1. SSSE3 version. -// TODO(fbarchard): Port to Neon - -// Shuffle table for arranging 2 pixels into pairs for pmaddubsw -static uvec8 kShuffleColARGB = { - 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel - 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel -}; - -// Shuffle table for duplicating 2 fractions into 8 bytes each -static uvec8 kShuffleFractions = { - 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, -}; - -__declspec(naked) __declspec(align(16)) -void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - __asm { - push esi - push edi - mov edi, [esp + 8 + 4] // dst_argb - mov esi, [esp + 8 + 8] // src_argb - mov ecx, [esp + 8 + 12] // dst_width - movd xmm2, [esp + 8 + 16] // x - movd xmm3, [esp + 8 + 20] // dx - movdqa xmm4, kShuffleColARGB - movdqa xmm5, kShuffleFractions - pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction. - psrlw xmm6, 9 - pextrw eax, xmm2, 1 // get x0 integer. preroll - sub ecx, 2 - jl xloop29 - - movdqa xmm0, xmm2 // x1 = x0 + dx - paddd xmm0, xmm3 - punpckldq xmm2, xmm0 // x0 x1 - punpckldq xmm3, xmm3 // dx dx - paddd xmm3, xmm3 // dx * 2, dx * 2 - pextrw edx, xmm2, 3 // get x1 integer. preroll - - // 2 Pixel loop. - align 4 - xloop2: - movdqa xmm1, xmm2 // x0, x1 fractions. - paddd xmm2, xmm3 // x += dx - movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels - psrlw xmm1, 9 // 7 bit fractions. - movhps xmm0, qword ptr [esi + edx * 4] // 2 source x1 pixels - pshufb xmm1, xmm5 // 0000000011111111 - pshufb xmm0, xmm4 // arrange pixels into pairs - pxor xmm1, xmm6 // 0..7f and 7f..0 - pmaddubsw xmm0, xmm1 // argb_argb 16 bit, 2 pixels. - pextrw eax, xmm2, 1 // get x0 integer. next iteration. - pextrw edx, xmm2, 3 // get x1 integer. next iteration. - psrlw xmm0, 7 // argb 8.7 fixed point to low 8 bits. - packuswb xmm0, xmm0 // argb_argb 8 bits, 2 pixels. - movq qword ptr [edi], xmm0 - lea edi, [edi + 8] - sub ecx, 2 // 2 pixels - jge xloop2 - - align 4 - xloop29: - - add ecx, 2 - 1 - jl xloop99 - - // 1 pixel remainder - psrlw xmm2, 9 // 7 bit fractions. - movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels - pshufb xmm2, xmm5 // 00000000 - pshufb xmm0, xmm4 // arrange pixels into pairs - pxor xmm2, xmm6 // 0..7f and 7f..0 - pmaddubsw xmm0, xmm2 // argb 16 bit, 1 pixel. - psrlw xmm0, 7 - packuswb xmm0, xmm0 // argb 8 bits, 1 pixel. - movd [edi], xmm0 - - align 4 - xloop99: - - pop edi - pop esi - ret - } -} - -// Reads 4 pixels, duplicates them and writes 8 pixels. -// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. -__declspec(naked) __declspec(align(16)) -void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb, - int dst_width, int x, int dx) { - __asm { - mov edx, [esp + 4] // dst_argb - mov eax, [esp + 8] // src_argb - mov ecx, [esp + 12] // dst_width - - align 4 - wloop: - movdqa xmm0, [eax] - lea eax, [eax + 16] - movdqa xmm1, xmm0 - punpckldq xmm0, xmm0 - punpckhdq xmm1, xmm1 - sub ecx, 8 - movdqa [edx], xmm0 - movdqa [edx + 16], xmm1 - lea edx, [edx + 32] - jg wloop - - ret - } -} - -// Divide num by div and return as 16.16 fixed point result. -__declspec(naked) __declspec(align(16)) -int FixedDiv_X86(int num, int div) { - __asm { - mov eax, [esp + 4] // num - cdq // extend num to 64 bits - shld edx, eax, 16 // 32.16 - shl eax, 16 - idiv dword ptr [esp + 8] - ret - } -} - -// Divide num by div and return as 16.16 fixed point result. -__declspec(naked) __declspec(align(16)) -int FixedDiv1_X86(int num, int div) { - __asm { - mov eax, [esp + 4] // num - mov ecx, [esp + 8] // denom - cdq // extend num to 64 bits - shld edx, eax, 16 // 32.16 - shl eax, 16 - sub eax, 0x00010001 - sbb edx, 0 - sub ecx, 1 - idiv ecx - ret - } -} - -#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/video_common.cc b/drivers/theoraplayer/src/YUV/libyuv/src/video_common.cc deleted file mode 100755 index efbedf46e2..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/video_common.cc +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2011 The LibYuv Project Authors. All rights reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#include "libyuv/video_common.h" - -#ifdef __cplusplus -namespace libyuv { -extern "C" { -#endif - -#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0])) - -struct FourCCAliasEntry { - uint32 alias; - uint32 canonical; -}; - -static const struct FourCCAliasEntry kFourCCAliases[] = { - {FOURCC_IYUV, FOURCC_I420}, - {FOURCC_YU16, FOURCC_I422}, - {FOURCC_YU24, FOURCC_I444}, - {FOURCC_YUYV, FOURCC_YUY2}, - {FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs - {FOURCC_HDYC, FOURCC_UYVY}, - {FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8 - {FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not. - {FOURCC_DMB1, FOURCC_MJPG}, - {FOURCC_BA81, FOURCC_BGGR}, - {FOURCC_RGB3, FOURCC_RAW }, - {FOURCC_BGR3, FOURCC_24BG}, - {FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB - {FOURCC_CM24, FOURCC_RAW }, // kCMPixelFormat_24RGB - {FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555 - {FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565 - {FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551 -}; -// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB. -// {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA - -LIBYUV_API -uint32 CanonicalFourCC(uint32 fourcc) { - int i; - for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) { - if (kFourCCAliases[i].alias == fourcc) { - return kFourCCAliases[i].canonical; - } - } - // Not an alias, so return it as-is. - return fourcc; -} - -#ifdef __cplusplus -} // extern "C" -} // namespace libyuv -#endif - diff --git a/drivers/theoraplayer/src/YUV/libyuv/src/x86inc.asm b/drivers/theoraplayer/src/YUV/libyuv/src/x86inc.asm deleted file mode 100755 index cb5c32df3a..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/src/x86inc.asm +++ /dev/null @@ -1,1136 +0,0 @@ -;***************************************************************************** -;* x86inc.asm: x264asm abstraction layer -;***************************************************************************** -;* Copyright (C) 2005-2012 x264 project -;* -;* Authors: Loren Merritt <lorenm@u.washington.edu> -;* Anton Mitrofanov <BugMaster@narod.ru> -;* Jason Garrett-Glaser <darkshikari@gmail.com> -;* Henrik Gramner <hengar-6@student.ltu.se> -;* -;* Permission to use, copy, modify, and/or distribute this software for any -;* purpose with or without fee is hereby granted, provided that the above -;* copyright notice and this permission notice appear in all copies. -;* -;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -;***************************************************************************** - -; This is a header file for the x264ASM assembly language, which uses -; NASM/YASM syntax combined with a large number of macros to provide easy -; abstraction between different calling conventions (x86_32, win64, linux64). -; It also has various other useful features to simplify writing the kind of -; DSP functions that are most often used in x264. - -; Unlike the rest of x264, this file is available under an ISC license, as it -; has significant usefulness outside of x264 and we want it to be available -; to the largest audience possible. Of course, if you modify it for your own -; purposes to add a new feature, we strongly encourage contributing a patch -; as this feature might be useful for others as well. Send patches or ideas -; to x264-devel@videolan.org . - -; Local changes for libyuv: -; remove %define program_name and references in labels -; rename cpus to uppercase - -%define WIN64 0 -%define UNIX64 0 -%if ARCH_X86_64 - %ifidn __OUTPUT_FORMAT__,win32 - %define WIN64 1 - %elifidn __OUTPUT_FORMAT__,win64 - %define WIN64 1 - %else - %define UNIX64 1 - %endif -%endif - -%ifdef PREFIX - %define mangle(x) _ %+ x -%else - %define mangle(x) x -%endif - -; Name of the .rodata section. -; Kludge: Something on OS X fails to align .rodata even given an align attribute, -; so use a different read-only section. -%macro SECTION_RODATA 0-1 16 - %ifidn __OUTPUT_FORMAT__,macho64 - SECTION .text align=%1 - %elifidn __OUTPUT_FORMAT__,macho - SECTION .text align=%1 - fakegot: - %elifidn __OUTPUT_FORMAT__,aout - section .text - %else - SECTION .rodata align=%1 - %endif -%endmacro - -; aout does not support align= -%macro SECTION_TEXT 0-1 16 - %ifidn __OUTPUT_FORMAT__,aout - SECTION .text - %else - SECTION .text align=%1 - %endif -%endmacro - -%if WIN64 - %define PIC -%elif ARCH_X86_64 == 0 -; x86_32 doesn't require PIC. -; Some distros prefer shared objects to be PIC, but nothing breaks if -; the code contains a few textrels, so we'll skip that complexity. - %undef PIC -%endif -%ifdef PIC - default rel -%endif - -; Always use long nops (reduces 0x90 spam in disassembly on x86_32) -CPU amdnop - -; Macros to eliminate most code duplication between x86_32 and x86_64: -; Currently this works only for leaf functions which load all their arguments -; into registers at the start, and make no other use of the stack. Luckily that -; covers most of x264's asm. - -; PROLOGUE: -; %1 = number of arguments. loads them from stack if needed. -; %2 = number of registers used. pushes callee-saved regs if needed. -; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed. -; %4 = list of names to define to registers -; PROLOGUE can also be invoked by adding the same options to cglobal - -; e.g. -; cglobal foo, 2,3,0, dst, src, tmp -; declares a function (foo), taking two args (dst and src) and one local variable (tmp) - -; TODO Some functions can use some args directly from the stack. If they're the -; last args then you can just not declare them, but if they're in the middle -; we need more flexible macro. - -; RET: -; Pops anything that was pushed by PROLOGUE, and returns. - -; REP_RET: -; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons -; which are slow when a normal ret follows a branch. - -; registers: -; rN and rNq are the native-size register holding function argument N -; rNd, rNw, rNb are dword, word, and byte size -; rNh is the high 8 bits of the word size -; rNm is the original location of arg N (a register or on the stack), dword -; rNmp is native size - -%macro DECLARE_REG 2-3 - %define r%1q %2 - %define r%1d %2d - %define r%1w %2w - %define r%1b %2b - %define r%1h %2h - %if %0 == 2 - %define r%1m %2d - %define r%1mp %2 - %elif ARCH_X86_64 ; memory - %define r%1m [rsp + stack_offset + %3] - %define r%1mp qword r %+ %1m - %else - %define r%1m [esp + stack_offset + %3] - %define r%1mp dword r %+ %1m - %endif - %define r%1 %2 -%endmacro - -%macro DECLARE_REG_SIZE 3 - %define r%1q r%1 - %define e%1q r%1 - %define r%1d e%1 - %define e%1d e%1 - %define r%1w %1 - %define e%1w %1 - %define r%1h %3 - %define e%1h %3 - %define r%1b %2 - %define e%1b %2 -%if ARCH_X86_64 == 0 - %define r%1 e%1 -%endif -%endmacro - -DECLARE_REG_SIZE ax, al, ah -DECLARE_REG_SIZE bx, bl, bh -DECLARE_REG_SIZE cx, cl, ch -DECLARE_REG_SIZE dx, dl, dh -DECLARE_REG_SIZE si, sil, null -DECLARE_REG_SIZE di, dil, null -DECLARE_REG_SIZE bp, bpl, null - -; t# defines for when per-arch register allocation is more complex than just function arguments - -%macro DECLARE_REG_TMP 1-* - %assign %%i 0 - %rep %0 - CAT_XDEFINE t, %%i, r%1 - %assign %%i %%i+1 - %rotate 1 - %endrep -%endmacro - -%macro DECLARE_REG_TMP_SIZE 0-* - %rep %0 - %define t%1q t%1 %+ q - %define t%1d t%1 %+ d - %define t%1w t%1 %+ w - %define t%1h t%1 %+ h - %define t%1b t%1 %+ b - %rotate 1 - %endrep -%endmacro - -DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 - -%if ARCH_X86_64 - %define gprsize 8 -%else - %define gprsize 4 -%endif - -%macro PUSH 1 - push %1 - %assign stack_offset stack_offset+gprsize -%endmacro - -%macro POP 1 - pop %1 - %assign stack_offset stack_offset-gprsize -%endmacro - -%macro PUSH_IF_USED 1-* - %rep %0 - %if %1 < regs_used - PUSH r%1 - %endif - %rotate 1 - %endrep -%endmacro - -%macro POP_IF_USED 1-* - %rep %0 - %if %1 < regs_used - pop r%1 - %endif - %rotate 1 - %endrep -%endmacro - -%macro LOAD_IF_USED 1-* - %rep %0 - %if %1 < num_args - mov r%1, r %+ %1 %+ mp - %endif - %rotate 1 - %endrep -%endmacro - -%macro SUB 2 - sub %1, %2 - %ifidn %1, rsp - %assign stack_offset stack_offset+(%2) - %endif -%endmacro - -%macro ADD 2 - add %1, %2 - %ifidn %1, rsp - %assign stack_offset stack_offset-(%2) - %endif -%endmacro - -%macro movifnidn 2 - %ifnidn %1, %2 - mov %1, %2 - %endif -%endmacro - -%macro movsxdifnidn 2 - %ifnidn %1, %2 - movsxd %1, %2 - %endif -%endmacro - -%macro ASSERT 1 - %if (%1) == 0 - %error assert failed - %endif -%endmacro - -%macro DEFINE_ARGS 0-* - %ifdef n_arg_names - %assign %%i 0 - %rep n_arg_names - CAT_UNDEF arg_name %+ %%i, q - CAT_UNDEF arg_name %+ %%i, d - CAT_UNDEF arg_name %+ %%i, w - CAT_UNDEF arg_name %+ %%i, h - CAT_UNDEF arg_name %+ %%i, b - CAT_UNDEF arg_name %+ %%i, m - CAT_UNDEF arg_name %+ %%i, mp - CAT_UNDEF arg_name, %%i - %assign %%i %%i+1 - %endrep - %endif - - %xdefine %%stack_offset stack_offset - %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine - %assign %%i 0 - %rep %0 - %xdefine %1q r %+ %%i %+ q - %xdefine %1d r %+ %%i %+ d - %xdefine %1w r %+ %%i %+ w - %xdefine %1h r %+ %%i %+ h - %xdefine %1b r %+ %%i %+ b - %xdefine %1m r %+ %%i %+ m - %xdefine %1mp r %+ %%i %+ mp - CAT_XDEFINE arg_name, %%i, %1 - %assign %%i %%i+1 - %rotate 1 - %endrep - %xdefine stack_offset %%stack_offset - %assign n_arg_names %0 -%endmacro - -%if WIN64 ; Windows x64 ;================================================= - -DECLARE_REG 0, rcx -DECLARE_REG 1, rdx -DECLARE_REG 2, R8 -DECLARE_REG 3, R9 -DECLARE_REG 4, R10, 40 -DECLARE_REG 5, R11, 48 -DECLARE_REG 6, rax, 56 -DECLARE_REG 7, rdi, 64 -DECLARE_REG 8, rsi, 72 -DECLARE_REG 9, rbx, 80 -DECLARE_REG 10, rbp, 88 -DECLARE_REG 11, R12, 96 -DECLARE_REG 12, R13, 104 -DECLARE_REG 13, R14, 112 -DECLARE_REG 14, R15, 120 - -%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names... - %assign num_args %1 - %assign regs_used %2 - ASSERT regs_used >= num_args - ASSERT regs_used <= 15 - PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14 - %if mmsize == 8 - %assign xmm_regs_used 0 - %else - WIN64_SPILL_XMM %3 - %endif - LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 - DEFINE_ARGS %4 -%endmacro - -%macro WIN64_SPILL_XMM 1 - %assign xmm_regs_used %1 - ASSERT xmm_regs_used <= 16 - %if xmm_regs_used > 6 - SUB rsp, (xmm_regs_used-6)*16+16 - %assign %%i xmm_regs_used - %rep (xmm_regs_used-6) - %assign %%i %%i-1 - movdqa [rsp + (%%i-6)*16+(~stack_offset&8)], xmm %+ %%i - %endrep - %endif -%endmacro - -%macro WIN64_RESTORE_XMM_INTERNAL 1 - %if xmm_regs_used > 6 - %assign %%i xmm_regs_used - %rep (xmm_regs_used-6) - %assign %%i %%i-1 - movdqa xmm %+ %%i, [%1 + (%%i-6)*16+(~stack_offset&8)] - %endrep - add %1, (xmm_regs_used-6)*16+16 - %endif -%endmacro - -%macro WIN64_RESTORE_XMM 1 - WIN64_RESTORE_XMM_INTERNAL %1 - %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16 - %assign xmm_regs_used 0 -%endmacro - -%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 - -%macro RET 0 - WIN64_RESTORE_XMM_INTERNAL rsp - POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 -%if mmsize == 32 - vzeroupper -%endif - ret -%endmacro - -%elif ARCH_X86_64 ; *nix x64 ;============================================= - -DECLARE_REG 0, rdi -DECLARE_REG 1, rsi -DECLARE_REG 2, rdx -DECLARE_REG 3, rcx -DECLARE_REG 4, R8 -DECLARE_REG 5, R9 -DECLARE_REG 6, rax, 8 -DECLARE_REG 7, R10, 16 -DECLARE_REG 8, R11, 24 -DECLARE_REG 9, rbx, 32 -DECLARE_REG 10, rbp, 40 -DECLARE_REG 11, R12, 48 -DECLARE_REG 12, R13, 56 -DECLARE_REG 13, R14, 64 -DECLARE_REG 14, R15, 72 - -%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... - %assign num_args %1 - %assign regs_used %2 - ASSERT regs_used >= num_args - ASSERT regs_used <= 15 - PUSH_IF_USED 9, 10, 11, 12, 13, 14 - LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14 - DEFINE_ARGS %4 -%endmacro - -%define has_epilogue regs_used > 9 || mmsize == 32 - -%macro RET 0 - POP_IF_USED 14, 13, 12, 11, 10, 9 -%if mmsize == 32 - vzeroupper -%endif - ret -%endmacro - -%else ; X86_32 ;============================================================== - -DECLARE_REG 0, eax, 4 -DECLARE_REG 1, ecx, 8 -DECLARE_REG 2, edx, 12 -DECLARE_REG 3, ebx, 16 -DECLARE_REG 4, esi, 20 -DECLARE_REG 5, edi, 24 -DECLARE_REG 6, ebp, 28 -%define rsp esp - -%macro DECLARE_ARG 1-* - %rep %0 - %define r%1m [esp + stack_offset + 4*%1 + 4] - %define r%1mp dword r%1m - %rotate 1 - %endrep -%endmacro - -DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 - -%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... - %assign num_args %1 - %assign regs_used %2 - %if regs_used > 7 - %assign regs_used 7 - %endif - ASSERT regs_used >= num_args - PUSH_IF_USED 3, 4, 5, 6 - LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6 - DEFINE_ARGS %4 -%endmacro - -%define has_epilogue regs_used > 3 || mmsize == 32 - -%macro RET 0 - POP_IF_USED 6, 5, 4, 3 -%if mmsize == 32 - vzeroupper -%endif - ret -%endmacro - -%endif ;====================================================================== - -%if WIN64 == 0 -%macro WIN64_SPILL_XMM 1 -%endmacro -%macro WIN64_RESTORE_XMM 1 -%endmacro -%endif - -%macro REP_RET 0 - %if has_epilogue - RET - %else - rep ret - %endif -%endmacro - -%macro TAIL_CALL 2 ; callee, is_nonadjacent - %if has_epilogue - call %1 - RET - %elif %2 - jmp %1 - %endif -%endmacro - -;============================================================================= -; arch-independent part -;============================================================================= - -%assign function_align 16 - -; Begin a function. -; Applies any symbol mangling needed for C linkage, and sets up a define such that -; subsequent uses of the function name automatically refer to the mangled version. -; Appends cpuflags to the function name if cpuflags has been specified. -%macro cglobal 1-2+ ; name, [PROLOGUE args] -%if %0 == 1 - cglobal_internal %1 %+ SUFFIX -%else - cglobal_internal %1 %+ SUFFIX, %2 -%endif -%endmacro -%macro cglobal_internal 1-2+ - %ifndef cglobaled_%1 - %xdefine %1 mangle(%1) - %xdefine %1.skip_prologue %1 %+ .skip_prologue - CAT_XDEFINE cglobaled_, %1, 1 - %endif - %xdefine current_function %1 - %ifidn __OUTPUT_FORMAT__,elf - global %1:function hidden - %else - global %1 - %endif - align function_align - %1: - RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer - %assign stack_offset 0 - %if %0 > 1 - PROLOGUE %2 - %endif -%endmacro - -%macro cextern 1 - %xdefine %1 mangle(%1) - CAT_XDEFINE cglobaled_, %1, 1 - extern %1 -%endmacro - -; like cextern, but without the prefix -%macro cextern_naked 1 - %xdefine %1 mangle(%1) - CAT_XDEFINE cglobaled_, %1, 1 - extern %1 -%endmacro - -%macro const 2+ - %xdefine %1 mangle(%1) - global %1 - %1: %2 -%endmacro - -; This is needed for ELF, otherwise the GNU linker assumes the stack is -; executable by default. -%ifidn __OUTPUT_FORMAT__,elf -SECTION .note.GNU-stack noalloc noexec nowrite progbits -%endif -%ifidn __OUTPUT_FORMAT__,elf32 -section .note.GNU-stack noalloc noexec nowrite progbits -%endif -%ifidn __OUTPUT_FORMAT__,elf64 -section .note.GNU-stack noalloc noexec nowrite progbits -%endif - -; cpuflags - -%assign cpuflags_MMX (1<<0) -%assign cpuflags_MMX2 (1<<1) | cpuflags_MMX -%assign cpuflags_3dnow (1<<2) | cpuflags_MMX -%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow -%assign cpuflags_SSE (1<<4) | cpuflags_MMX2 -%assign cpuflags_SSE2 (1<<5) | cpuflags_SSE -%assign cpuflags_SSE2slow (1<<6) | cpuflags_SSE2 -%assign cpuflags_SSE3 (1<<7) | cpuflags_SSE2 -%assign cpuflags_SSSE3 (1<<8) | cpuflags_SSE3 -%assign cpuflags_SSE4 (1<<9) | cpuflags_SSSE3 -%assign cpuflags_SSE42 (1<<10)| cpuflags_SSE4 -%assign cpuflags_AVX (1<<11)| cpuflags_SSE42 -%assign cpuflags_xop (1<<12)| cpuflags_AVX -%assign cpuflags_fma4 (1<<13)| cpuflags_AVX -%assign cpuflags_AVX2 (1<<14)| cpuflags_AVX -%assign cpuflags_fma3 (1<<15)| cpuflags_AVX - -%assign cpuflags_cache32 (1<<16) -%assign cpuflags_cache64 (1<<17) -%assign cpuflags_slowctz (1<<18) -%assign cpuflags_lzcnt (1<<19) -%assign cpuflags_misalign (1<<20) -%assign cpuflags_aligned (1<<21) ; not a cpu feature, but a function variant -%assign cpuflags_atom (1<<22) -%assign cpuflags_bmi1 (1<<23) -%assign cpuflags_bmi2 (1<<24)|cpuflags_bmi1 -%assign cpuflags_tbm (1<<25)|cpuflags_bmi1 - -%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x)) -%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x)) - -; Takes up to 2 cpuflags from the above list. -; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu. -; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co. -%macro INIT_CPUFLAGS 0-2 - %if %0 >= 1 - %xdefine cpuname %1 - %assign cpuflags cpuflags_%1 - %if %0 >= 2 - %xdefine cpuname %1_%2 - %assign cpuflags cpuflags | cpuflags_%2 - %endif - %xdefine SUFFIX _ %+ cpuname - %if cpuflag(AVX) - %assign AVX_enabled 1 - %endif - %if mmsize == 16 && notcpuflag(SSE2) - %define mova movaps - %define movu movups - %define movnta movntps - %endif - %if cpuflag(aligned) - %define movu mova - %elifidn %1, SSE3 - %define movu lddqu - %endif - %else - %xdefine SUFFIX - %undef cpuname - %undef cpuflags - %endif -%endmacro - -; merge MMX and SSE* - -%macro CAT_XDEFINE 3 - %xdefine %1%2 %3 -%endmacro - -%macro CAT_UNDEF 2 - %undef %1%2 -%endmacro - -%macro INIT_MMX 0-1+ - %assign AVX_enabled 0 - %define RESET_MM_PERMUTATION INIT_MMX %1 - %define mmsize 8 - %define num_mmregs 8 - %define mova movq - %define movu movq - %define movh movd - %define movnta movntq - %assign %%i 0 - %rep 8 - CAT_XDEFINE m, %%i, mm %+ %%i - CAT_XDEFINE nmm, %%i, %%i - %assign %%i %%i+1 - %endrep - %rep 8 - CAT_UNDEF m, %%i - CAT_UNDEF nmm, %%i - %assign %%i %%i+1 - %endrep - INIT_CPUFLAGS %1 -%endmacro - -%macro INIT_XMM 0-1+ - %assign AVX_enabled 0 - %define RESET_MM_PERMUTATION INIT_XMM %1 - %define mmsize 16 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 16 - %endif - %define mova movdqa - %define movu movdqu - %define movh movq - %define movnta movntdq - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, xmm %+ %%i - CAT_XDEFINE nxmm, %%i, %%i - %assign %%i %%i+1 - %endrep - INIT_CPUFLAGS %1 -%endmacro - -%macro INIT_YMM 0-1+ - %assign AVX_enabled 1 - %define RESET_MM_PERMUTATION INIT_YMM %1 - %define mmsize 32 - %define num_mmregs 8 - %if ARCH_X86_64 - %define num_mmregs 16 - %endif - %define mova vmovaps - %define movu vmovups - %undef movh - %define movnta vmovntps - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, ymm %+ %%i - CAT_XDEFINE nymm, %%i, %%i - %assign %%i %%i+1 - %endrep - INIT_CPUFLAGS %1 -%endmacro - -INIT_XMM - -; I often want to use macros that permute their arguments. e.g. there's no -; efficient way to implement butterfly or transpose or dct without swapping some -; arguments. -; -; I would like to not have to manually keep track of the permutations: -; If I insert a permutation in the middle of a function, it should automatically -; change everything that follows. For more complex macros I may also have multiple -; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations. -; -; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that -; permutes its arguments. It's equivalent to exchanging the contents of the -; registers, except that this way you exchange the register names instead, so it -; doesn't cost any cycles. - -%macro PERMUTE 2-* ; takes a list of pairs to swap -%rep %0/2 - %xdefine tmp%2 m%2 - %xdefine ntmp%2 nm%2 - %rotate 2 -%endrep -%rep %0/2 - %xdefine m%1 tmp%2 - %xdefine nm%1 ntmp%2 - %undef tmp%2 - %undef ntmp%2 - %rotate 2 -%endrep -%endmacro - -%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs) -%rep %0-1 -%ifdef m%1 - %xdefine tmp m%1 - %xdefine m%1 m%2 - %xdefine m%2 tmp - CAT_XDEFINE n, m%1, %1 - CAT_XDEFINE n, m%2, %2 -%else - ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here. - ; Be careful using this mode in nested macros though, as in some cases there may be - ; other copies of m# that have already been dereferenced and don't get updated correctly. - %xdefine %%n1 n %+ %1 - %xdefine %%n2 n %+ %2 - %xdefine tmp m %+ %%n1 - CAT_XDEFINE m, %%n1, m %+ %%n2 - CAT_XDEFINE m, %%n2, tmp - CAT_XDEFINE n, m %+ %%n1, %%n1 - CAT_XDEFINE n, m %+ %%n2, %%n2 -%endif - %undef tmp - %rotate 1 -%endrep -%endmacro - -; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later -; calls to that function will automatically load the permutation, so values can -; be returned in mmregs. -%macro SAVE_MM_PERMUTATION 0-1 - %if %0 - %xdefine %%f %1_m - %else - %xdefine %%f current_function %+ _m - %endif - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE %%f, %%i, m %+ %%i - %assign %%i %%i+1 - %endrep -%endmacro - -%macro LOAD_MM_PERMUTATION 1 ; name to load from - %ifdef %1_m0 - %assign %%i 0 - %rep num_mmregs - CAT_XDEFINE m, %%i, %1_m %+ %%i - CAT_XDEFINE n, m %+ %%i, %%i - %assign %%i %%i+1 - %endrep - %endif -%endmacro - -; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't -%macro call 1 - call_internal %1, %1 %+ SUFFIX -%endmacro -%macro call_internal 2 - %xdefine %%i %1 - %ifndef cglobaled_%1 - %ifdef cglobaled_%2 - %xdefine %%i %2 - %endif - %endif - call %%i - LOAD_MM_PERMUTATION %%i -%endmacro - -; Substitutions that reduce instruction size but are functionally equivalent -%macro add 2 - %ifnum %2 - %if %2==128 - sub %1, -128 - %else - add %1, %2 - %endif - %else - add %1, %2 - %endif -%endmacro - -%macro sub 2 - %ifnum %2 - %if %2==128 - add %1, -128 - %else - sub %1, %2 - %endif - %else - sub %1, %2 - %endif -%endmacro - -;============================================================================= -; AVX abstraction layer -;============================================================================= - -%assign i 0 -%rep 16 - %if i < 8 - CAT_XDEFINE sizeofmm, i, 8 - %endif - CAT_XDEFINE sizeofxmm, i, 16 - CAT_XDEFINE sizeofymm, i, 32 -%assign i i+1 -%endrep -%undef i - -%macro CHECK_AVX_INSTR_EMU 3-* - %xdefine %%opcode %1 - %xdefine %%dst %2 - %rep %0-2 - %ifidn %%dst, %3 - %error non-AVX emulation of ``%%opcode'' is not supported - %endif - %rotate 1 - %endrep -%endmacro - -;%1 == instruction -;%2 == 1 if float, 0 if int -;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm) -;%4 == number of operands given -;%5+: operands -%macro RUN_AVX_INSTR 6-7+ - %ifid %6 - %define %%sizeofreg sizeof%6 - %elifid %5 - %define %%sizeofreg sizeof%5 - %else - %define %%sizeofreg mmsize - %endif - %if %%sizeofreg==32 - %if %4>=3 - v%1 %5, %6, %7 - %else - v%1 %5, %6 - %endif - %else - %if %%sizeofreg==8 - %define %%regmov movq - %elif %2 - %define %%regmov movaps - %else - %define %%regmov movdqa - %endif - - %if %4>=3+%3 - %ifnidn %5, %6 - %if AVX_enabled && %%sizeofreg==16 - v%1 %5, %6, %7 - %else - CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7 - %%regmov %5, %6 - %1 %5, %7 - %endif - %else - %1 %5, %7 - %endif - %elif %4>=3 - %1 %5, %6, %7 - %else - %1 %5, %6 - %endif - %endif -%endmacro - -; 3arg AVX ops with a memory arg can only have it in src2, -; whereas SSE emulation of 3arg prefers to have it in src1 (i.e. the mov). -; So, if the op is symmetric and the wrong one is memory, swap them. -%macro RUN_AVX_INSTR1 8 - %assign %%swap 0 - %if AVX_enabled - %ifnid %6 - %assign %%swap 1 - %endif - %elifnidn %5, %6 - %ifnid %7 - %assign %%swap 1 - %endif - %endif - %if %%swap && %3 == 0 && %8 == 1 - RUN_AVX_INSTR %1, %2, %3, %4, %5, %7, %6 - %else - RUN_AVX_INSTR %1, %2, %3, %4, %5, %6, %7 - %endif -%endmacro - -;%1 == instruction -;%2 == 1 if float, 0 if int -;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm) -;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not -%macro AVX_INSTR 4 - %macro %1 2-9 fnord, fnord, fnord, %1, %2, %3, %4 - %ifidn %3, fnord - RUN_AVX_INSTR %6, %7, %8, 2, %1, %2 - %elifidn %4, fnord - RUN_AVX_INSTR1 %6, %7, %8, 3, %1, %2, %3, %9 - %elifidn %5, fnord - RUN_AVX_INSTR %6, %7, %8, 4, %1, %2, %3, %4 - %else - RUN_AVX_INSTR %6, %7, %8, 5, %1, %2, %3, %4, %5 - %endif - %endmacro -%endmacro - -AVX_INSTR addpd, 1, 0, 1 -AVX_INSTR addps, 1, 0, 1 -AVX_INSTR addsd, 1, 0, 1 -AVX_INSTR addss, 1, 0, 1 -AVX_INSTR addsubpd, 1, 0, 0 -AVX_INSTR addsubps, 1, 0, 0 -AVX_INSTR andpd, 1, 0, 1 -AVX_INSTR andps, 1, 0, 1 -AVX_INSTR andnpd, 1, 0, 0 -AVX_INSTR andnps, 1, 0, 0 -AVX_INSTR blendpd, 1, 0, 0 -AVX_INSTR blendps, 1, 0, 0 -AVX_INSTR blendvpd, 1, 0, 0 -AVX_INSTR blendvps, 1, 0, 0 -AVX_INSTR cmppd, 1, 0, 0 -AVX_INSTR cmpps, 1, 0, 0 -AVX_INSTR cmpsd, 1, 0, 0 -AVX_INSTR cmpss, 1, 0, 0 -AVX_INSTR cvtdq2ps, 1, 0, 0 -AVX_INSTR cvtps2dq, 1, 0, 0 -AVX_INSTR divpd, 1, 0, 0 -AVX_INSTR divps, 1, 0, 0 -AVX_INSTR divsd, 1, 0, 0 -AVX_INSTR divss, 1, 0, 0 -AVX_INSTR dppd, 1, 1, 0 -AVX_INSTR dpps, 1, 1, 0 -AVX_INSTR haddpd, 1, 0, 0 -AVX_INSTR haddps, 1, 0, 0 -AVX_INSTR hsubpd, 1, 0, 0 -AVX_INSTR hsubps, 1, 0, 0 -AVX_INSTR maxpd, 1, 0, 1 -AVX_INSTR maxps, 1, 0, 1 -AVX_INSTR maxsd, 1, 0, 1 -AVX_INSTR maxss, 1, 0, 1 -AVX_INSTR minpd, 1, 0, 1 -AVX_INSTR minps, 1, 0, 1 -AVX_INSTR minsd, 1, 0, 1 -AVX_INSTR minss, 1, 0, 1 -AVX_INSTR movhlps, 1, 0, 0 -AVX_INSTR movlhps, 1, 0, 0 -AVX_INSTR movsd, 1, 0, 0 -AVX_INSTR movss, 1, 0, 0 -AVX_INSTR mpsadbw, 0, 1, 0 -AVX_INSTR mulpd, 1, 0, 1 -AVX_INSTR mulps, 1, 0, 1 -AVX_INSTR mulsd, 1, 0, 1 -AVX_INSTR mulss, 1, 0, 1 -AVX_INSTR orpd, 1, 0, 1 -AVX_INSTR orps, 1, 0, 1 -AVX_INSTR pabsb, 0, 0, 0 -AVX_INSTR pabsw, 0, 0, 0 -AVX_INSTR pabsd, 0, 0, 0 -AVX_INSTR packsswb, 0, 0, 0 -AVX_INSTR packssdw, 0, 0, 0 -AVX_INSTR packuswb, 0, 0, 0 -AVX_INSTR packusdw, 0, 0, 0 -AVX_INSTR paddb, 0, 0, 1 -AVX_INSTR paddw, 0, 0, 1 -AVX_INSTR paddd, 0, 0, 1 -AVX_INSTR paddq, 0, 0, 1 -AVX_INSTR paddsb, 0, 0, 1 -AVX_INSTR paddsw, 0, 0, 1 -AVX_INSTR paddusb, 0, 0, 1 -AVX_INSTR paddusw, 0, 0, 1 -AVX_INSTR palignr, 0, 1, 0 -AVX_INSTR pand, 0, 0, 1 -AVX_INSTR pandn, 0, 0, 0 -AVX_INSTR pavgb, 0, 0, 1 -AVX_INSTR pavgw, 0, 0, 1 -AVX_INSTR pblendvb, 0, 0, 0 -AVX_INSTR pblendw, 0, 1, 0 -AVX_INSTR pcmpestri, 0, 0, 0 -AVX_INSTR pcmpestrm, 0, 0, 0 -AVX_INSTR pcmpistri, 0, 0, 0 -AVX_INSTR pcmpistrm, 0, 0, 0 -AVX_INSTR pcmpeqb, 0, 0, 1 -AVX_INSTR pcmpeqw, 0, 0, 1 -AVX_INSTR pcmpeqd, 0, 0, 1 -AVX_INSTR pcmpeqq, 0, 0, 1 -AVX_INSTR pcmpgtb, 0, 0, 0 -AVX_INSTR pcmpgtw, 0, 0, 0 -AVX_INSTR pcmpgtd, 0, 0, 0 -AVX_INSTR pcmpgtq, 0, 0, 0 -AVX_INSTR phaddw, 0, 0, 0 -AVX_INSTR phaddd, 0, 0, 0 -AVX_INSTR phaddsw, 0, 0, 0 -AVX_INSTR phsubw, 0, 0, 0 -AVX_INSTR phsubd, 0, 0, 0 -AVX_INSTR phsubsw, 0, 0, 0 -AVX_INSTR pmaddwd, 0, 0, 1 -AVX_INSTR pmaddubsw, 0, 0, 0 -AVX_INSTR pmaxsb, 0, 0, 1 -AVX_INSTR pmaxsw, 0, 0, 1 -AVX_INSTR pmaxsd, 0, 0, 1 -AVX_INSTR pmaxub, 0, 0, 1 -AVX_INSTR pmaxuw, 0, 0, 1 -AVX_INSTR pmaxud, 0, 0, 1 -AVX_INSTR pminsb, 0, 0, 1 -AVX_INSTR pminsw, 0, 0, 1 -AVX_INSTR pminsd, 0, 0, 1 -AVX_INSTR pminub, 0, 0, 1 -AVX_INSTR pminuw, 0, 0, 1 -AVX_INSTR pminud, 0, 0, 1 -AVX_INSTR pmovmskb, 0, 0, 0 -AVX_INSTR pmulhuw, 0, 0, 1 -AVX_INSTR pmulhrsw, 0, 0, 1 -AVX_INSTR pmulhw, 0, 0, 1 -AVX_INSTR pmullw, 0, 0, 1 -AVX_INSTR pmulld, 0, 0, 1 -AVX_INSTR pmuludq, 0, 0, 1 -AVX_INSTR pmuldq, 0, 0, 1 -AVX_INSTR por, 0, 0, 1 -AVX_INSTR psadbw, 0, 0, 1 -AVX_INSTR pshufb, 0, 0, 0 -AVX_INSTR pshufd, 0, 1, 0 -AVX_INSTR pshufhw, 0, 1, 0 -AVX_INSTR pshuflw, 0, 1, 0 -AVX_INSTR psignb, 0, 0, 0 -AVX_INSTR psignw, 0, 0, 0 -AVX_INSTR psignd, 0, 0, 0 -AVX_INSTR psllw, 0, 0, 0 -AVX_INSTR pslld, 0, 0, 0 -AVX_INSTR psllq, 0, 0, 0 -AVX_INSTR pslldq, 0, 0, 0 -AVX_INSTR psraw, 0, 0, 0 -AVX_INSTR psrad, 0, 0, 0 -AVX_INSTR psrlw, 0, 0, 0 -AVX_INSTR psrld, 0, 0, 0 -AVX_INSTR psrlq, 0, 0, 0 -AVX_INSTR psrldq, 0, 0, 0 -AVX_INSTR psubb, 0, 0, 0 -AVX_INSTR psubw, 0, 0, 0 -AVX_INSTR psubd, 0, 0, 0 -AVX_INSTR psubq, 0, 0, 0 -AVX_INSTR psubsb, 0, 0, 0 -AVX_INSTR psubsw, 0, 0, 0 -AVX_INSTR psubusb, 0, 0, 0 -AVX_INSTR psubusw, 0, 0, 0 -AVX_INSTR ptest, 0, 0, 0 -AVX_INSTR punpckhbw, 0, 0, 0 -AVX_INSTR punpckhwd, 0, 0, 0 -AVX_INSTR punpckhdq, 0, 0, 0 -AVX_INSTR punpckhqdq, 0, 0, 0 -AVX_INSTR punpcklbw, 0, 0, 0 -AVX_INSTR punpcklwd, 0, 0, 0 -AVX_INSTR punpckldq, 0, 0, 0 -AVX_INSTR punpcklqdq, 0, 0, 0 -AVX_INSTR pxor, 0, 0, 1 -AVX_INSTR shufps, 1, 1, 0 -AVX_INSTR subpd, 1, 0, 0 -AVX_INSTR subps, 1, 0, 0 -AVX_INSTR subsd, 1, 0, 0 -AVX_INSTR subss, 1, 0, 0 -AVX_INSTR unpckhpd, 1, 0, 0 -AVX_INSTR unpckhps, 1, 0, 0 -AVX_INSTR unpcklpd, 1, 0, 0 -AVX_INSTR unpcklps, 1, 0, 0 -AVX_INSTR xorpd, 1, 0, 1 -AVX_INSTR xorps, 1, 0, 1 - -; 3DNow instructions, for sharing code between AVX, SSE and 3DN -AVX_INSTR pfadd, 1, 0, 1 -AVX_INSTR pfsub, 1, 0, 0 -AVX_INSTR pfmul, 1, 0, 1 - -; base-4 constants for shuffles -%assign i 0 -%rep 256 - %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3) - %if j < 10 - CAT_XDEFINE q000, j, i - %elif j < 100 - CAT_XDEFINE q00, j, i - %elif j < 1000 - CAT_XDEFINE q0, j, i - %else - CAT_XDEFINE q, j, i - %endif -%assign i i+1 -%endrep -%undef i -%undef j - -%macro FMA_INSTR 3 - %macro %1 4-7 %1, %2, %3 - %if cpuflag(xop) - v%5 %1, %2, %3, %4 - %else - %6 %1, %2, %3 - %7 %1, %4 - %endif - %endmacro -%endmacro - -FMA_INSTR pmacsdd, pmulld, paddd -FMA_INSTR pmacsww, pmullw, paddw -FMA_INSTR pmadcswd, pmaddwd, paddd - -; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf. -; This lets us use tzcnt without bumping the yasm version requirement yet. -%define tzcnt rep bsf diff --git a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.c b/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.c deleted file mode 100755 index 3712a3b6d6..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.c +++ /dev/null @@ -1,72 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifdef _YUV_LIBYUV -#include <libyuv.h> -#include "yuv_util.h" -#include "yuv_libyuv.h" - -void decodeRGB(struct TheoraPixelTransform* t) -{ - I420ToRAW(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 3, t->w, t->h); -} - -void decodeRGBA(struct TheoraPixelTransform* t) -{ - I420ToABGR(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h); - _decodeAlpha(incOut(t, 3), t->w * 4); -} - -void decodeRGBX(struct TheoraPixelTransform* t) -{ - I420ToABGR(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h); -} - -void decodeARGB(struct TheoraPixelTransform* t) -{ - I420ToBGRA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h); - _decodeAlpha(t, t->w * 4); -} - -void decodeXRGB(struct TheoraPixelTransform* t) -{ - I420ToBGRA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h); -} - -void decodeBGR(struct TheoraPixelTransform* t) -{ - I420ToRGB24(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 3, t->w, t->h); -} - -void decodeBGRA(struct TheoraPixelTransform* t) -{ - I420ToARGB(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h); - _decodeAlpha(incOut(t, 3), t->w * 4); -} - -void decodeBGRX(struct TheoraPixelTransform* t) -{ - I420ToARGB(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h); -} - -void decodeABGR(struct TheoraPixelTransform* t) -{ - I420ToRGBA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h); - _decodeAlpha(t, t->w * 4); -} - -void decodeXBGR(struct TheoraPixelTransform* t) -{ - I420ToRGBA(t->y, t->yStride, t->u, t->uStride, t->v, t->vStride, t->out, t->w * 4, t->w, t->h); -} - -void initYUVConversionModule() -{ - -} -#endif diff --git a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.h b/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.h deleted file mode 100755 index f621af0c5f..0000000000 --- a/drivers/theoraplayer/src/YUV/libyuv/yuv_libyuv.h +++ /dev/null @@ -1,14 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _YUV_LIBYUV_h -#define _YUV_LIBYUV_h - -#include "TheoraPixelTransform.h" - -#endif diff --git a/drivers/theoraplayer/src/YUV/yuv_util.c b/drivers/theoraplayer/src/YUV/yuv_util.c deleted file mode 100644 index f5bf3e5f9e..0000000000 --- a/drivers/theoraplayer/src/YUV/yuv_util.c +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#include "yuv_util.h" - -struct TheoraPixelTransform* incOut(struct TheoraPixelTransform* t, int n) -{ - // used for XRGB, XBGR and similar - t->out += n; - return t; -} - -void _decodeAlpha(struct TheoraPixelTransform* t, int stride) -{ - int width = t->w; - unsigned char *ySrc, *yLineEnd, *out; - int luma; - unsigned int y; - for (y = 0; y < t->h; y++) - { - ySrc = t->y + y * t->yStride + width; - out = t->out + y * stride; - - for (yLineEnd = ySrc + width; ySrc != yLineEnd; ++ySrc, out += 4) - { - luma = (*ySrc); - // because in YCbCr specification, luma values are in the range of [16, 235] - // account for 'footroom' and 'headroom' ranges while using luma values as alpha channel - if (luma <= 16) *out = 0; - else if (luma >= 235) *out = 255; - else *out = (unsigned char) (((luma - 16) * 255) / 219); - } - } -} diff --git a/drivers/theoraplayer/src/YUV/yuv_util.h b/drivers/theoraplayer/src/YUV/yuv_util.h deleted file mode 100644 index 1f9d76634a..0000000000 --- a/drivers/theoraplayer/src/YUV/yuv_util.h +++ /dev/null @@ -1,17 +0,0 @@ -/************************************************************************************ -This source file is part of the Theora Video Playback Library -For latest info, see http://libtheoraplayer.googlecode.com -************************************************************************************* -Copyright (c) 2008-2014 Kresimir Spes (kspes@cateia.com) -This program is free software; you can redistribute it and/or modify it under -the terms of the BSD license: http://opensource.org/licenses/BSD-3-Clause -*************************************************************************************/ -#ifndef _YUV_UTIL_h -#define _YUV_UTIL_h - -#include "TheoraPixelTransform.h" - -struct TheoraPixelTransform* incOut(struct TheoraPixelTransform* t, int n); -void _decodeAlpha(struct TheoraPixelTransform* t, int stride); - -#endif diff --git a/drivers/theoraplayer/theoraplayer.xcodeproj/project.pbxproj b/drivers/theoraplayer/theoraplayer.xcodeproj/project.pbxproj deleted file mode 100644 index 23f875fe0c..0000000000 --- a/drivers/theoraplayer/theoraplayer.xcodeproj/project.pbxproj +++ /dev/null @@ -1,2606 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 46; - objects = { - -/* Begin PBXBuildFile section */ - D139462D17C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; }; - D139462E17C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; }; - D139462F17C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; }; - D139463017C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; }; - D139463117C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; }; - D139463217C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; }; - D139463317C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; }; - D139463417C0ED450091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; }; - D139463617C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; }; - D139463717C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; }; - D139463817C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; }; - D139463917C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; }; - D139463A17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; }; - D139463B17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; }; - D139463C17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; }; - D139463D17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; }; - D139463E17C0ED450091F4A4 /* yuv_libyuv.h in Headers */ = {isa = PBXBuildFile; fileRef = D139462C17C0ED450091F4A4 /* yuv_libyuv.h */; }; - D13946C617C110670091F4A4 /* yuv_libyuv.c in Sources */ = {isa = PBXBuildFile; fileRef = D139462B17C0ED450091F4A4 /* yuv_libyuv.c */; }; - D13946CC17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; }; - D13946CD17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; }; - D13946CE17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; }; - D13946CF17C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; }; - D13946D017C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; }; - D13946D117C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; }; - D13946D217C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; }; - D13946D317C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; }; - D13946D417C119B40091F4A4 /* yuv_util.c in Sources */ = {isa = PBXBuildFile; fileRef = D13946CA17C119B30091F4A4 /* yuv_util.c */; }; - D13946D517C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; }; - D13946D617C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; }; - D13946D717C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; }; - D13946D817C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; }; - D13946D917C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; }; - D13946DA17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; }; - D13946DB17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; }; - D13946DC17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; }; - D13946DD17C119B40091F4A4 /* yuv_util.h in Headers */ = {isa = PBXBuildFile; fileRef = D13946CB17C119B30091F4A4 /* yuv_util.h */; }; - D159BCB017C227F30030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; }; - D159BCB117C227F40030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; }; - D159BCB217C227F40030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; }; - D159BCB317C227F40030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; }; - D159BCB417C227F50030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; }; - D159BCB517C227F50030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; }; - D159BCB617C227F60030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; }; - D159BCB717C227F60030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; }; - D159BCB817C227F70030FAB6 /* convert_from.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05617C157CD00CA0FD2 /* convert_from.cc */; }; - D159BCB917C228310030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; }; - D159BCBA17C228320030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; }; - D159BCBB17C228320030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; }; - D159BCBC17C228330030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; }; - D159BCBD17C228330030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; }; - D159BCBE17C228340030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; }; - D159BCBF17C228340030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; }; - D159BCC017C228340030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; }; - D159BCC117C228350030FAB6 /* rotate_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */; }; - D159BCC217C2286D0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; }; - D159BCC317C2286D0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; }; - D159BCC417C2286D0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; }; - D159BCC517C2286E0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; }; - D159BCC617C2286E0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; }; - D159BCC717C2286F0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; }; - D159BCC817C2286F0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; }; - D159BCC917C2286F0030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; }; - D159BCCA17C228700030FAB6 /* scale.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06F17C157CD00CA0FD2 /* scale.cc */; }; - D15D361017C386A600F40439 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; }; - D15D361117C386A600F40439 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; }; - D15D361217C386A700F40439 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; }; - D15D361317C386B100F40439 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; }; - D15D361517C386B300F40439 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; }; - D15D361617C386B400F40439 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; }; - D16775AB155C501D0050EC64 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; }; - D16775AC155C501D0050EC64 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; }; - D16775AD155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; }; - D16775AE155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; }; - D16775AF155C501D0050EC64 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; }; - D16775B0155C501D0050EC64 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; }; - D16775B1155C501D0050EC64 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; }; - D16775B2155C501D0050EC64 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; }; - D16775B3155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; }; - D16775B4155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; }; - D16775B5155C501D0050EC64 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; }; - D16775B6155C501D0050EC64 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; }; - D16775B7155C501D0050EC64 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; }; - D16775B8155C501D0050EC64 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; }; - D16775B9155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; }; - D16775BA155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; }; - D16775BB155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; }; - D16775BC155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; }; - D16775BD155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; }; - D16775BE155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; }; - D16775BF155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; }; - D16775C0155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; }; - D16775CE155C50280050EC64 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775CF155C50280050EC64 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; }; - D16775D0155C50280050EC64 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775D1155C50280050EC64 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; }; - D16775D2155C50280050EC64 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775D3155C50280050EC64 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; }; - D16775D4155C50280050EC64 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775D5155C50280050EC64 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; }; - D16775D6155C50280050EC64 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775D7155C50280050EC64 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; }; - D16775D8155C50280050EC64 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775D9155C50280050EC64 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; }; - D16775DA155C50280050EC64 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775DB155C50280050EC64 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; }; - D16775DC155C50280050EC64 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775DD155C50280050EC64 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; }; - D16775DE155C50280050EC64 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775DF155C50280050EC64 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; }; - D16775E0155C50280050EC64 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775E1155C50280050EC64 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; }; - D16775E2155C50280050EC64 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775E3155C50280050EC64 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; }; - D16775E4155C50280050EC64 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775E5155C50280050EC64 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; }; - D16775E6155C50280050EC64 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D16775E7155C50280050EC64 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; }; - D198F952177A31FC002942E3 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; }; - D198F953177A31FC002942E3 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; }; - D198F954177A31FC002942E3 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; }; - D198F955177A31FC002942E3 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; }; - D198F956177A31FC002942E3 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; }; - D198F957177A31FC002942E3 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; }; - D198F958177A31FC002942E3 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; }; - D198F959177A31FC002942E3 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; }; - D198F95A177A31FC002942E3 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; }; - D198F95B177A31FC002942E3 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; }; - D198F95C177A31FC002942E3 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; }; - D198F95D177A31FC002942E3 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; }; - D198F95F177A31FC002942E3 /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; }; - D198F960177A31FC002942E3 /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; }; - D198F961177A31FC002942E3 /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; }; - D198F962177A31FC002942E3 /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; }; - D198F965177A31FC002942E3 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; }; - D198F966177A31FC002942E3 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; }; - D198F967177A31FC002942E3 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; }; - D198F968177A31FC002942E3 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; }; - D198F969177A31FC002942E3 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; }; - D198F96A177A31FC002942E3 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; }; - D198F96B177A31FC002942E3 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; }; - D198F96C177A31FC002942E3 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; }; - D198F96D177A31FC002942E3 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; }; - D198F96E177A31FC002942E3 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; }; - D198F96F177A31FC002942E3 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; }; - D198F970177A31FC002942E3 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; }; - D198F971177A31FC002942E3 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; }; - D198F972177A31FC002942E3 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; }; - D198F974177A31FC002942E3 /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; }; - D198F97E177A31FE002942E3 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; }; - D198F97F177A31FE002942E3 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; }; - D198F980177A31FE002942E3 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; }; - D198F981177A31FE002942E3 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; }; - D198F982177A31FE002942E3 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; }; - D198F983177A31FE002942E3 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; }; - D198F984177A31FE002942E3 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; }; - D198F985177A31FE002942E3 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; }; - D198F986177A31FE002942E3 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; }; - D198F987177A31FE002942E3 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; }; - D198F988177A31FE002942E3 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; }; - D198F989177A31FE002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; }; - D198F98B177A31FE002942E3 /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; }; - D198F98C177A31FE002942E3 /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; }; - D198F98D177A31FE002942E3 /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; }; - D198F98E177A31FE002942E3 /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; }; - D198F991177A31FE002942E3 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; }; - D198F992177A31FE002942E3 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; }; - D198F993177A31FE002942E3 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; }; - D198F994177A31FE002942E3 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; }; - D198F995177A31FE002942E3 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; }; - D198F996177A31FE002942E3 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; }; - D198F997177A31FE002942E3 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; }; - D198F998177A31FE002942E3 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; }; - D198F999177A31FE002942E3 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; }; - D198F99A177A31FE002942E3 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; }; - D198F99B177A31FE002942E3 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; }; - D198F99C177A31FE002942E3 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; }; - D198F99D177A31FE002942E3 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; }; - D198F99E177A31FE002942E3 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; }; - D198F9A0177A31FE002942E3 /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; }; - D198F9AA177A3200002942E3 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; }; - D198F9AB177A3200002942E3 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; }; - D198F9AC177A3200002942E3 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; }; - D198F9AD177A3200002942E3 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; }; - D198F9AE177A3200002942E3 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; }; - D198F9AF177A3200002942E3 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; }; - D198F9B0177A3200002942E3 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; }; - D198F9B1177A3200002942E3 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; }; - D198F9B2177A3200002942E3 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; }; - D198F9B3177A3200002942E3 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; }; - D198F9B4177A3200002942E3 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; }; - D198F9B5177A3200002942E3 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; }; - D198F9B6177A3200002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; }; - D198F9B8177A3200002942E3 /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; }; - D198F9B9177A3200002942E3 /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; }; - D198F9BA177A3200002942E3 /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; }; - D198F9BB177A3200002942E3 /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; }; - D198F9BE177A3200002942E3 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; }; - D198F9BF177A3200002942E3 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; }; - D198F9C0177A3200002942E3 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; }; - D198F9C1177A3200002942E3 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; }; - D198F9C2177A3200002942E3 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; }; - D198F9C3177A3200002942E3 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; }; - D198F9C4177A3200002942E3 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; }; - D198F9C5177A3200002942E3 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; }; - D198F9C6177A3200002942E3 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; }; - D198F9C7177A3200002942E3 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; }; - D198F9C8177A3200002942E3 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; }; - D198F9C9177A3200002942E3 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; }; - D198F9CA177A3200002942E3 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; }; - D198F9CB177A3200002942E3 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; }; - D198F9CD177A3200002942E3 /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; }; - D1BCE05A18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; }; - D1BCE05B18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; }; - D1BCE05C18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; }; - D1BCE05D18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; }; - D1BCE05E18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; }; - D1BCE05F18F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; }; - D1BCE06018F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; }; - D1BCE06118F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; }; - D1BCE06218F3F7FE00C83470 /* scale_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05818F3F7FE00C83470 /* scale_common.cc */; }; - D1BCE06318F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; }; - D1BCE06418F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; }; - D1BCE06518F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; }; - D1BCE06618F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; }; - D1BCE06718F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; }; - D1BCE06818F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; }; - D1BCE06918F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; }; - D1BCE06A18F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; }; - D1BCE06B18F3F7FE00C83470 /* scale_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1BCE05918F3F7FE00C83470 /* scale_posix.cc */; }; - D1C3D07217C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; }; - D1C3D07317C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; }; - D1C3D07417C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; }; - D1C3D07517C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; }; - D1C3D07617C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; }; - D1C3D07717C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; }; - D1C3D07817C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; }; - D1C3D07917C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; }; - D1C3D07A17C157CD00CA0FD2 /* compare_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */; }; - D1C3D08117C157CD00CA0FD2 /* compare_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */; }; - D1C3D08217C157CD00CA0FD2 /* compare_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */; }; - D1C3D08317C157CD00CA0FD2 /* compare_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */; }; - D1C3D08417C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; }; - D1C3D08517C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; }; - D1C3D08617C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; }; - D1C3D08717C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; }; - D1C3D08817C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; }; - D1C3D08917C157CD00CA0FD2 /* compare_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */; }; - D1C3D09617C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; }; - D1C3D09717C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; }; - D1C3D09817C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; }; - D1C3D09917C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; }; - D1C3D09A17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; }; - D1C3D09B17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; }; - D1C3D09C17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; }; - D1C3D09D17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; }; - D1C3D09E17C157CD00CA0FD2 /* compare.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05317C157CD00CA0FD2 /* compare.cc */; }; - D1C3D09F17C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; }; - D1C3D0A017C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; }; - D1C3D0A117C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; }; - D1C3D0A217C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; }; - D1C3D0A317C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; }; - D1C3D0A417C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; }; - D1C3D0A517C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; }; - D1C3D0A617C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; }; - D1C3D0A717C157CD00CA0FD2 /* convert_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */; }; - D1C3D0C317C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; }; - D1C3D0C417C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; }; - D1C3D0C517C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; }; - D1C3D0C617C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; }; - D1C3D0C717C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; }; - D1C3D0C817C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; }; - D1C3D0C917C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; }; - D1C3D0CA17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; }; - D1C3D0CB17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */; }; - D1C3D0CC17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; }; - D1C3D0CD17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; }; - D1C3D0CE17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; }; - D1C3D0CF17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; }; - D1C3D0D017C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; }; - D1C3D0D117C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; }; - D1C3D0D217C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; }; - D1C3D0D317C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; }; - D1C3D0D417C157CD00CA0FD2 /* convert_to_i420.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */; }; - D1C3D0D517C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; }; - D1C3D0D617C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; }; - D1C3D0D717C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; }; - D1C3D0D817C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; }; - D1C3D0D917C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; }; - D1C3D0DA17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; }; - D1C3D0DB17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; }; - D1C3D0DC17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; }; - D1C3D0DD17C157CD00CA0FD2 /* convert.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05A17C157CD00CA0FD2 /* convert.cc */; }; - D1C3D0DE17C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; }; - D1C3D0DF17C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; }; - D1C3D0E017C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; }; - D1C3D0E117C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; }; - D1C3D0E217C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; }; - D1C3D0E317C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; }; - D1C3D0E417C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; }; - D1C3D0E517C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; }; - D1C3D0E617C157CD00CA0FD2 /* cpu_id.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */; }; - D1C3D0E717C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; }; - D1C3D0E817C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; }; - D1C3D0E917C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; }; - D1C3D0EA17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; }; - D1C3D0EB17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; }; - D1C3D0EC17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; }; - D1C3D0ED17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; }; - D1C3D0EE17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; }; - D1C3D0EF17C157CD00CA0FD2 /* format_conversion.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */; }; - D1C3D10217C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; }; - D1C3D10317C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; }; - D1C3D10417C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; }; - D1C3D10517C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; }; - D1C3D10617C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; }; - D1C3D10717C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; }; - D1C3D10817C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; }; - D1C3D10917C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; }; - D1C3D10A17C157CD00CA0FD2 /* planar_functions.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */; }; - D1C3D12317C157CD00CA0FD2 /* rotate_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */; }; - D1C3D12417C157CD00CA0FD2 /* rotate_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */; }; - D1C3D12517C157CD00CA0FD2 /* rotate_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */; }; - D1C3D12617C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; }; - D1C3D12717C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; }; - D1C3D12817C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; }; - D1C3D12917C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; }; - D1C3D12A17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; }; - D1C3D12B17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; }; - D1C3D12C17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; }; - D1C3D12D17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; }; - D1C3D12E17C157CD00CA0FD2 /* rotate.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06317C157CD00CA0FD2 /* rotate.cc */; }; - D1C3D12F17C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; }; - D1C3D13017C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; }; - D1C3D13117C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; }; - D1C3D13217C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; }; - D1C3D13317C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; }; - D1C3D13417C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; }; - D1C3D13517C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; }; - D1C3D13617C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; }; - D1C3D13717C157CD00CA0FD2 /* row_any.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06417C157CD00CA0FD2 /* row_any.cc */; }; - D1C3D13817C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; }; - D1C3D13917C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; }; - D1C3D13A17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; }; - D1C3D13B17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; }; - D1C3D13C17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; }; - D1C3D13D17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; }; - D1C3D13E17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; }; - D1C3D13F17C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; }; - D1C3D14017C157CD00CA0FD2 /* row_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06517C157CD00CA0FD2 /* row_common.cc */; }; - D1C3D15017C157CD00CA0FD2 /* row_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06717C157CD00CA0FD2 /* row_neon.cc */; }; - D1C3D15117C157CD00CA0FD2 /* row_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06717C157CD00CA0FD2 /* row_neon.cc */; }; - D1C3D15217C157CD00CA0FD2 /* row_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06717C157CD00CA0FD2 /* row_neon.cc */; }; - D1C3D15317C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; }; - D1C3D15417C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; }; - D1C3D15517C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; }; - D1C3D15617C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; }; - D1C3D15717C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; }; - D1C3D15817C157CD00CA0FD2 /* row_posix.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06817C157CD00CA0FD2 /* row_posix.cc */; }; - D1C3D17417C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */; }; - D1C3D17517C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */; }; - D1C3D17617C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */; }; - D1C3D17717C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; }; - D1C3D17817C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; }; - D1C3D17917C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; }; - D1C3D17A17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; }; - D1C3D17B17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; }; - D1C3D17C17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; }; - D1C3D17D17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; }; - D1C3D17E17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; }; - D1C3D17F17C157CD00CA0FD2 /* scale_argb.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */; }; - D1C3D18F17C157CD00CA0FD2 /* scale_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */; }; - D1C3D19017C157CD00CA0FD2 /* scale_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */; }; - D1C3D19117C157CD00CA0FD2 /* scale_neon.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */; }; - D1C3D19B17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; }; - D1C3D19C17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; }; - D1C3D19D17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; }; - D1C3D19E17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; }; - D1C3D19F17C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; }; - D1C3D1A017C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; }; - D1C3D1A117C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; }; - D1C3D1A217C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; }; - D1C3D1A317C157CD00CA0FD2 /* video_common.cc in Sources */ = {isa = PBXBuildFile; fileRef = D1C3D07017C157CD00CA0FD2 /* video_common.cc */; }; - D1CD00001696FC0B00609AB0 /* Theora.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFFB1696FC0100609AB0 /* Theora.framework */; }; - D1CD00011696FC0B00609AB0 /* Vorbis.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF91696FBF700609AB0 /* Vorbis.framework */; }; - D1CD00021696FC0B00609AB0 /* Ogg.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF71696FBF400609AB0 /* Ogg.framework */; }; - D1CD00041696FF9400609AB0 /* CoreMedia.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CD00031696FF9400609AB0 /* CoreMedia.framework */; }; - D1CD00051696FF9600609AB0 /* CoreMedia.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CD00031696FF9400609AB0 /* CoreMedia.framework */; }; - D1CDFF241696C77A00609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; }; - D1CDFF251696C77A00609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; }; - D1CDFF261696C77A00609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; }; - D1CDFF271696C77A00609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; }; - D1CDFF281696C77A00609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; }; - D1CDFF291696C77A00609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; }; - D1CDFF2A1696C77A00609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; }; - D1CDFF2B1696C77A00609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; }; - D1CDFF2C1696C77A00609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; }; - D1CDFF2D1696C77A00609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; }; - D1CDFF2E1696C77A00609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; }; - D1CDFF341696C77A00609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF351696C77A00609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF361696C77A00609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF371696C77A00609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF381696C77A00609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF391696C77A00609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF3A1696C77A00609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF3B1696C77A00609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF3C1696C77A00609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF3D1696C77A00609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF3E1696C77A00609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF3F1696C77A00609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF401696C77A00609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF4C1696C79700609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; }; - D1CDFF4D1696C79700609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; }; - D1CDFF4E1696C79700609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; }; - D1CDFF4F1696C79700609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; }; - D1CDFF501696C79700609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; }; - D1CDFF511696C79700609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; }; - D1CDFF521696C79700609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; }; - D1CDFF531696C79700609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; }; - D1CDFF541696C79700609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; }; - D1CDFF551696C79700609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; }; - D1CDFF561696C79700609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; }; - D1CDFF5C1696C79700609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF5D1696C79700609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF5E1696C79700609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF5F1696C79700609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF601696C79700609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF611696C79700609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF621696C79700609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF631696C79700609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF641696C79700609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF651696C79700609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF661696C79700609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF671696C79700609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF681696C79700609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1CDFF961696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; }; - D1CDFF971696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; }; - D1CDFF981696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; }; - D1CDFF991696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; }; - D1CDFF9A1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; }; - D1CDFF9B1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; }; - D1CDFF9E1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; }; - D1CDFF9F1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */; }; - D1CDFFA21696E1CA00609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; }; - D1CDFFA31696E1CA00609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; }; - D1CDFFA41696E1CA00609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; }; - D1CDFFA51696E1CA00609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; }; - D1CDFFA61696E1CA00609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; }; - D1CDFFA71696E1CA00609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; }; - D1CDFFA81696E1CA00609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; }; - D1CDFFA91696E1CA00609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; }; - D1CDFFAA1696E1CA00609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; }; - D1CDFFAB1696E1CA00609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; }; - D1CDFFAC1696E1CA00609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; }; - D1CDFFB01696E1CA00609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; }; - D1CDFFB11696E1CA00609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; }; - D1CDFFB21696E1CA00609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; }; - D1CDFFB31696E1CA00609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; }; - D1CDFFB41696E1CA00609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; }; - D1CDFFB51696E1CA00609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; }; - D1CDFFB61696E1CA00609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; }; - D1CDFFB71696E1CA00609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; }; - D1CDFFB81696E1CA00609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; }; - D1CDFFB91696E1CA00609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; }; - D1CDFFBA1696E1CA00609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; }; - D1CDFFBB1696E1CA00609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; }; - D1CDFFBC1696E1CA00609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; }; - D1CDFFBD1696E1CA00609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; }; - D1CDFFC71696E1D700609AB0 /* TheoraAsync.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759E155C501D0050EC64 /* TheoraAsync.cpp */; }; - D1CDFFC81696E1D700609AB0 /* TheoraAudioInterface.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */; }; - D1CDFFC91696E1D700609AB0 /* TheoraDataSource.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */; }; - D1CDFFCA1696E1D700609AB0 /* TheoraException.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A1155C501D0050EC64 /* TheoraException.cpp */; }; - D1CDFFCB1696E1D700609AB0 /* TheoraFrameQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */; }; - D1CDFFCC1696E1D700609AB0 /* TheoraTimer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A3155C501D0050EC64 /* TheoraTimer.cpp */; }; - D1CDFFCD1696E1D700609AB0 /* TheoraUtil.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A4155C501D0050EC64 /* TheoraUtil.cpp */; }; - D1CDFFCE1696E1D700609AB0 /* TheoraVideoClip.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */; }; - D1CDFFCF1696E1D700609AB0 /* TheoraVideoFrame.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */; }; - D1CDFFD01696E1D700609AB0 /* TheoraVideoManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */; }; - D1CDFFD11696E1D700609AB0 /* TheoraWorkerThread.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */; }; - D1CDFFD21696E1D700609AB0 /* TheoraVideoClip_Theora.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */; }; - D1CDFFD51696E1D700609AB0 /* TheoraAsync.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C1155C50280050EC64 /* TheoraAsync.h */; }; - D1CDFFD61696E1D700609AB0 /* TheoraAudioInterface.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C2155C50280050EC64 /* TheoraAudioInterface.h */; }; - D1CDFFD71696E1D700609AB0 /* TheoraDataSource.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C3155C50280050EC64 /* TheoraDataSource.h */; }; - D1CDFFD81696E1D700609AB0 /* TheoraException.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C4155C50280050EC64 /* TheoraException.h */; }; - D1CDFFD91696E1D700609AB0 /* TheoraExport.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C5155C50280050EC64 /* TheoraExport.h */; }; - D1CDFFDA1696E1D700609AB0 /* TheoraFrameQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C6155C50280050EC64 /* TheoraFrameQueue.h */; }; - D1CDFFDB1696E1D700609AB0 /* TheoraPlayer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C7155C50280050EC64 /* TheoraPlayer.h */; }; - D1CDFFDC1696E1D700609AB0 /* TheoraTimer.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C8155C50280050EC64 /* TheoraTimer.h */; }; - D1CDFFDD1696E1D700609AB0 /* TheoraUtil.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775C9155C50280050EC64 /* TheoraUtil.h */; }; - D1CDFFDE1696E1D700609AB0 /* TheoraVideoClip.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CA155C50280050EC64 /* TheoraVideoClip.h */; }; - D1CDFFDF1696E1D700609AB0 /* TheoraVideoFrame.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CB155C50280050EC64 /* TheoraVideoFrame.h */; }; - D1CDFFE01696E1D700609AB0 /* TheoraVideoManager.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CC155C50280050EC64 /* TheoraVideoManager.h */; }; - D1CDFFE11696E1D700609AB0 /* TheoraWorkerThread.h in Headers */ = {isa = PBXBuildFile; fileRef = D16775CD155C50280050EC64 /* TheoraWorkerThread.h */; }; - D1CDFFE21696E1D700609AB0 /* TheoraVideoClip_Theora.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */; }; - D1CDFFEA1696E24B00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; }; - D1CDFFEB1696E24C00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; }; - D1CDFFEC1696E24F00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */ = {isa = PBXBuildFile; fileRef = D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */; }; - D1CDFFEE1696FB7200609AB0 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */; }; - D1CDFFF11696FB8900609AB0 /* CoreVideo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */; }; - D1CDFFF31696FBA800609AB0 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF21696FBA800609AB0 /* Foundation.framework */; }; - D1CDFFF41696FBB200609AB0 /* CoreVideo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */; }; - D1CDFFF51696FBB200609AB0 /* AVFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */; }; - D1CDFFF61696FBB200609AB0 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF21696FBA800609AB0 /* Foundation.framework */; }; - D1CDFFFD1696FC0800609AB0 /* Theora.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFFB1696FC0100609AB0 /* Theora.framework */; }; - D1CDFFFE1696FC0800609AB0 /* Vorbis.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF91696FBF700609AB0 /* Vorbis.framework */; }; - D1CDFFFF1696FC0800609AB0 /* Ogg.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = D1CDFFF71696FBF400609AB0 /* Ogg.framework */; }; - D1D465D616C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1D465D716C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1D465D816C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */ = {isa = PBXBuildFile; fileRef = D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1D465DA16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; }; - D1D465DB16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; }; - D1D465DC16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; }; - D1D465DD16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; }; - D1D465DE16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; }; - D1D465DF16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */ = {isa = PBXBuildFile; fileRef = D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */; }; - D1E2719916B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; }; - D1E2719A16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; }; - D1E2719B16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; }; - D1E2719C16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; }; - D1E2719D16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; }; - D1E2719E16B46F640046C00C /* yuv420_grey_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718A16B46F640046C00C /* yuv420_grey_c.c */; }; - D1E271A516B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; }; - D1E271A616B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; }; - D1E271A716B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; }; - D1E271A816B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; }; - D1E271A916B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; }; - D1E271AA16B46F640046C00C /* yuv420_yuv_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */; }; - D1E271AC16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; }; - D1E271AD16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; }; - D1E271AE16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; }; - D1E271AF16B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; }; - D1E271B016B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; }; - D1E271B116B470210046C00C /* yuv420_rgb_c.c in Sources */ = {isa = PBXBuildFile; fileRef = D1E271AB16B470210046C00C /* yuv420_rgb_c.c */; }; - D1E271B316B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1E271B416B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1E271B516B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; settings = {ATTRIBUTES = (Public, ); }; }; - D1E271B616B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; }; - D1E271B716B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; }; - D1E271B816B471E80046C00C /* TheoraPixelTransform.h in Headers */ = {isa = PBXBuildFile; fileRef = D1E271B216B471E80046C00C /* TheoraPixelTransform.h */; }; - D1F09EB1169AFEFB00DEEC63 /* TheoraVideoClip_AVFoundation.h in Headers */ = {isa = PBXBuildFile; fileRef = D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */; }; -/* End PBXBuildFile section */ - -/* Begin PBXFileReference section */ - D12CA55517734B4200412E5B /* TheoraVideoClip_FFmpeg.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoClip_FFmpeg.cpp; path = src/FFmpeg/TheoraVideoClip_FFmpeg.cpp; sourceTree = "<group>"; }; - D12CA55617734B4200412E5B /* TheoraVideoClip_FFmpeg.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip_FFmpeg.h; path = src/FFmpeg/TheoraVideoClip_FFmpeg.h; sourceTree = "<group>"; }; - D1358BC218D7777200A36FDC /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; - D1358BC318D7777800A36FDC /* iOS.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = iOS.xcconfig; path = xcconfig/iOS.xcconfig; sourceTree = "<group>"; }; - D1358BC418D7777800A36FDC /* Mac.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; name = Mac.xcconfig; path = xcconfig/Mac.xcconfig; sourceTree = "<group>"; }; - D139462B17C0ED450091F4A4 /* yuv_libyuv.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv_libyuv.c; path = src/YUV/libyuv/yuv_libyuv.c; sourceTree = "<group>"; }; - D139462C17C0ED450091F4A4 /* yuv_libyuv.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = yuv_libyuv.h; path = src/YUV/libyuv/yuv_libyuv.h; sourceTree = "<group>"; }; - D13946CA17C119B30091F4A4 /* yuv_util.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv_util.c; path = src/YUV/yuv_util.c; sourceTree = "<group>"; }; - D13946CB17C119B30091F4A4 /* yuv_util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = yuv_util.h; path = src/YUV/yuv_util.h; sourceTree = "<group>"; }; - D1473F2A150CA69B00B20490 /* theoraplayer.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = theoraplayer.framework; sourceTree = BUILT_PRODUCTS_DIR; }; - D159BCAB17C227940030FAB6 /* compare_win.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = compare_win.cc; path = src/YUV/libyuv/src/compare_win.cc; sourceTree = "<group>"; }; - D159BCAC17C227940030FAB6 /* row_win.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = row_win.cc; path = src/YUV/libyuv/src/row_win.cc; sourceTree = "<group>"; }; - D159BCAD17C227940030FAB6 /* row_x86.asm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm.asm; name = row_x86.asm; path = src/YUV/libyuv/src/row_x86.asm; sourceTree = "<group>"; }; - D159BCAE17C227940030FAB6 /* x86inc.asm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.asm.asm; name = x86inc.asm; path = src/YUV/libyuv/src/x86inc.asm; sourceTree = "<group>"; }; - D167759E155C501D0050EC64 /* TheoraAsync.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraAsync.cpp; path = src/TheoraAsync.cpp; sourceTree = "<group>"; }; - D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraAudioInterface.cpp; path = src/TheoraAudioInterface.cpp; sourceTree = "<group>"; }; - D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraDataSource.cpp; path = src/TheoraDataSource.cpp; sourceTree = "<group>"; }; - D16775A1155C501D0050EC64 /* TheoraException.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraException.cpp; path = src/TheoraException.cpp; sourceTree = "<group>"; }; - D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraFrameQueue.cpp; path = src/TheoraFrameQueue.cpp; sourceTree = "<group>"; }; - D16775A3155C501D0050EC64 /* TheoraTimer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraTimer.cpp; path = src/TheoraTimer.cpp; sourceTree = "<group>"; }; - D16775A4155C501D0050EC64 /* TheoraUtil.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraUtil.cpp; path = src/TheoraUtil.cpp; sourceTree = "<group>"; }; - D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoClip.cpp; path = src/TheoraVideoClip.cpp; sourceTree = "<group>"; }; - D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoFrame.cpp; path = src/TheoraVideoFrame.cpp; sourceTree = "<group>"; }; - D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoManager.cpp; path = src/TheoraVideoManager.cpp; sourceTree = "<group>"; }; - D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraWorkerThread.cpp; path = src/TheoraWorkerThread.cpp; sourceTree = "<group>"; }; - D16775C1155C50280050EC64 /* TheoraAsync.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraAsync.h; path = include/theoraplayer/TheoraAsync.h; sourceTree = "<group>"; }; - D16775C2155C50280050EC64 /* TheoraAudioInterface.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraAudioInterface.h; path = include/theoraplayer/TheoraAudioInterface.h; sourceTree = "<group>"; }; - D16775C3155C50280050EC64 /* TheoraDataSource.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraDataSource.h; path = include/theoraplayer/TheoraDataSource.h; sourceTree = "<group>"; }; - D16775C4155C50280050EC64 /* TheoraException.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraException.h; path = include/theoraplayer/TheoraException.h; sourceTree = "<group>"; }; - D16775C5155C50280050EC64 /* TheoraExport.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraExport.h; path = include/theoraplayer/TheoraExport.h; sourceTree = "<group>"; }; - D16775C6155C50280050EC64 /* TheoraFrameQueue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraFrameQueue.h; path = include/theoraplayer/TheoraFrameQueue.h; sourceTree = "<group>"; }; - D16775C7155C50280050EC64 /* TheoraPlayer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraPlayer.h; path = include/theoraplayer/TheoraPlayer.h; sourceTree = "<group>"; }; - D16775C8155C50280050EC64 /* TheoraTimer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraTimer.h; path = include/theoraplayer/TheoraTimer.h; sourceTree = "<group>"; }; - D16775C9155C50280050EC64 /* TheoraUtil.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraUtil.h; path = include/theoraplayer/TheoraUtil.h; sourceTree = "<group>"; }; - D16775CA155C50280050EC64 /* TheoraVideoClip.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip.h; path = include/theoraplayer/TheoraVideoClip.h; sourceTree = "<group>"; }; - D16775CB155C50280050EC64 /* TheoraVideoFrame.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoFrame.h; path = include/theoraplayer/TheoraVideoFrame.h; sourceTree = "<group>"; }; - D16775CC155C50280050EC64 /* TheoraVideoManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoManager.h; path = include/theoraplayer/TheoraVideoManager.h; sourceTree = "<group>"; }; - D16775CD155C50280050EC64 /* TheoraWorkerThread.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraWorkerThread.h; path = include/theoraplayer/TheoraWorkerThread.h; sourceTree = "<group>"; }; - D198F97B177A31FC002942E3 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; }; - D198F9A7177A31FE002942E3 /* libtheoraplayer_avfoundation.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer_avfoundation.a; sourceTree = BUILT_PRODUCTS_DIR; }; - D198F9D4177A3200002942E3 /* libtheoraplayer_theora_avfoundation.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer_theora_avfoundation.a; sourceTree = BUILT_PRODUCTS_DIR; }; - D1BB6FAE150E9E7100EF9400 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; }; - D1BCE05718F3F7D800C83470 /* scale_row.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = scale_row.h; path = src/YUV/libyuv/include/libyuv/scale_row.h; sourceTree = "<group>"; }; - D1BCE05818F3F7FE00C83470 /* scale_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_common.cc; path = src/YUV/libyuv/src/scale_common.cc; sourceTree = "<group>"; }; - D1BCE05918F3F7FE00C83470 /* scale_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_posix.cc; path = src/YUV/libyuv/src/scale_posix.cc; sourceTree = "<group>"; }; - D1BCE06C18F3F80800C83470 /* scale_win.cc */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = scale_win.cc; path = src/YUV/libyuv/src/scale_win.cc; sourceTree = "<group>"; }; - D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare_common.cc; path = src/YUV/libyuv/src/compare_common.cc; sourceTree = "<group>"; }; - D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare_neon.cc; path = src/YUV/libyuv/src/compare_neon.cc; sourceTree = "<group>"; }; - D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare_posix.cc; path = src/YUV/libyuv/src/compare_posix.cc; sourceTree = "<group>"; }; - D1C3D05317C157CD00CA0FD2 /* compare.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = compare.cc; path = src/YUV/libyuv/src/compare.cc; sourceTree = "<group>"; }; - D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_argb.cc; path = src/YUV/libyuv/src/convert_argb.cc; sourceTree = "<group>"; }; - D1C3D05517C157CD00CA0FD2 /* convert_from_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_from_argb.cc; path = src/YUV/libyuv/src/convert_from_argb.cc; sourceTree = "<group>"; }; - D1C3D05617C157CD00CA0FD2 /* convert_from.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_from.cc; path = src/YUV/libyuv/src/convert_from.cc; sourceTree = "<group>"; }; - D1C3D05717C157CD00CA0FD2 /* convert_jpeg.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_jpeg.cc; path = src/YUV/libyuv/src/convert_jpeg.cc; sourceTree = "<group>"; }; - D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_to_argb.cc; path = src/YUV/libyuv/src/convert_to_argb.cc; sourceTree = "<group>"; }; - D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert_to_i420.cc; path = src/YUV/libyuv/src/convert_to_i420.cc; sourceTree = "<group>"; }; - D1C3D05A17C157CD00CA0FD2 /* convert.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = convert.cc; path = src/YUV/libyuv/src/convert.cc; sourceTree = "<group>"; }; - D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = cpu_id.cc; path = src/YUV/libyuv/src/cpu_id.cc; sourceTree = "<group>"; }; - D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = format_conversion.cc; path = src/YUV/libyuv/src/format_conversion.cc; sourceTree = "<group>"; }; - D1C3D05D17C157CD00CA0FD2 /* mjpeg_decoder.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mjpeg_decoder.cc; path = src/YUV/libyuv/src/mjpeg_decoder.cc; sourceTree = "<group>"; }; - D1C3D05E17C157CD00CA0FD2 /* mjpeg_validate.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mjpeg_validate.cc; path = src/YUV/libyuv/src/mjpeg_validate.cc; sourceTree = "<group>"; }; - D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = planar_functions.cc; path = src/YUV/libyuv/src/planar_functions.cc; sourceTree = "<group>"; }; - D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate_argb.cc; path = src/YUV/libyuv/src/rotate_argb.cc; sourceTree = "<group>"; }; - D1C3D06117C157CD00CA0FD2 /* rotate_mips.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate_mips.cc; path = src/YUV/libyuv/src/rotate_mips.cc; sourceTree = "<group>"; }; - D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate_neon.cc; path = src/YUV/libyuv/src/rotate_neon.cc; sourceTree = "<group>"; }; - D1C3D06317C157CD00CA0FD2 /* rotate.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rotate.cc; path = src/YUV/libyuv/src/rotate.cc; sourceTree = "<group>"; }; - D1C3D06417C157CD00CA0FD2 /* row_any.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_any.cc; path = src/YUV/libyuv/src/row_any.cc; sourceTree = "<group>"; }; - D1C3D06517C157CD00CA0FD2 /* row_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_common.cc; path = src/YUV/libyuv/src/row_common.cc; sourceTree = "<group>"; }; - D1C3D06617C157CD00CA0FD2 /* row_mips.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_mips.cc; path = src/YUV/libyuv/src/row_mips.cc; sourceTree = "<group>"; }; - D1C3D06717C157CD00CA0FD2 /* row_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_neon.cc; path = src/YUV/libyuv/src/row_neon.cc; sourceTree = "<group>"; }; - D1C3D06817C157CD00CA0FD2 /* row_posix.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = row_posix.cc; path = src/YUV/libyuv/src/row_posix.cc; sourceTree = "<group>"; }; - D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_argb_neon.cc; path = src/YUV/libyuv/src/scale_argb_neon.cc; sourceTree = "<group>"; }; - D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_argb.cc; path = src/YUV/libyuv/src/scale_argb.cc; sourceTree = "<group>"; }; - D1C3D06D17C157CD00CA0FD2 /* scale_mips.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_mips.cc; path = src/YUV/libyuv/src/scale_mips.cc; sourceTree = "<group>"; }; - D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale_neon.cc; path = src/YUV/libyuv/src/scale_neon.cc; sourceTree = "<group>"; }; - D1C3D06F17C157CD00CA0FD2 /* scale.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = scale.cc; path = src/YUV/libyuv/src/scale.cc; sourceTree = "<group>"; }; - D1C3D07017C157CD00CA0FD2 /* video_common.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = video_common.cc; path = src/YUV/libyuv/src/video_common.cc; sourceTree = "<group>"; }; - D1C3D1CE17C15BB400CA0FD2 /* libyuv.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = libyuv.h; path = src/YUV/libyuv/include/libyuv.h; sourceTree = "<group>"; }; - D1C3D1CF17C15BC100CA0FD2 /* basic_types.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = basic_types.h; path = src/YUV/libyuv/include/libyuv/basic_types.h; sourceTree = "<group>"; }; - D1C3D1D017C15BC100CA0FD2 /* compare.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = compare.h; path = src/YUV/libyuv/include/libyuv/compare.h; sourceTree = "<group>"; }; - D1C3D1D117C15BC100CA0FD2 /* convert_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert_argb.h; path = src/YUV/libyuv/include/libyuv/convert_argb.h; sourceTree = "<group>"; }; - D1C3D1D217C15BC100CA0FD2 /* convert_from_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert_from_argb.h; path = src/YUV/libyuv/include/libyuv/convert_from_argb.h; sourceTree = "<group>"; }; - D1C3D1D317C15BC100CA0FD2 /* convert_from.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert_from.h; path = src/YUV/libyuv/include/libyuv/convert_from.h; sourceTree = "<group>"; }; - D1C3D1D417C15BC100CA0FD2 /* convert.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = convert.h; path = src/YUV/libyuv/include/libyuv/convert.h; sourceTree = "<group>"; }; - D1C3D1D517C15BC100CA0FD2 /* cpu_id.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = cpu_id.h; path = src/YUV/libyuv/include/libyuv/cpu_id.h; sourceTree = "<group>"; }; - D1C3D1D617C15BC100CA0FD2 /* format_conversion.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = format_conversion.h; path = src/YUV/libyuv/include/libyuv/format_conversion.h; sourceTree = "<group>"; }; - D1C3D1D717C15BC100CA0FD2 /* mjpeg_decoder.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = mjpeg_decoder.h; path = src/YUV/libyuv/include/libyuv/mjpeg_decoder.h; sourceTree = "<group>"; }; - D1C3D1D817C15BC100CA0FD2 /* planar_functions.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = planar_functions.h; path = src/YUV/libyuv/include/libyuv/planar_functions.h; sourceTree = "<group>"; }; - D1C3D1D917C15BC100CA0FD2 /* rotate_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = rotate_argb.h; path = src/YUV/libyuv/include/libyuv/rotate_argb.h; sourceTree = "<group>"; }; - D1C3D1DA17C15BC100CA0FD2 /* rotate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = rotate.h; path = src/YUV/libyuv/include/libyuv/rotate.h; sourceTree = "<group>"; }; - D1C3D1DB17C15BC100CA0FD2 /* row.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = row.h; path = src/YUV/libyuv/include/libyuv/row.h; sourceTree = "<group>"; }; - D1C3D1DC17C15BC100CA0FD2 /* scale_argb.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = scale_argb.h; path = src/YUV/libyuv/include/libyuv/scale_argb.h; sourceTree = "<group>"; }; - D1C3D1DD17C15BC100CA0FD2 /* scale.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = scale.h; path = src/YUV/libyuv/include/libyuv/scale.h; sourceTree = "<group>"; }; - D1C3D1DE17C15BC100CA0FD2 /* version.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = version.h; path = src/YUV/libyuv/include/libyuv/version.h; sourceTree = "<group>"; }; - D1C3D1DF17C15BC100CA0FD2 /* video_common.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = video_common.h; path = src/YUV/libyuv/include/libyuv/video_common.h; sourceTree = "<group>"; }; - D1CD00031696FF9400609AB0 /* CoreMedia.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreMedia.framework; path = System/Library/Frameworks/CoreMedia.framework; sourceTree = SDKROOT; }; - D1CDFF481696C77A00609AB0 /* theoraplayer.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = theoraplayer.framework; sourceTree = BUILT_PRODUCTS_DIR; }; - D1CDFF701696C79700609AB0 /* theoraplayer.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = theoraplayer.framework; sourceTree = BUILT_PRODUCTS_DIR; }; - D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraVideoClip_Theora.cpp; path = src/Theora/TheoraVideoClip_Theora.cpp; sourceTree = "<group>"; }; - D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip_Theora.h; path = src/Theora/TheoraVideoClip_Theora.h; sourceTree = "<group>"; }; - D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; name = TheoraVideoClip_AVFoundation.mm; path = src/AVFoundation/TheoraVideoClip_AVFoundation.mm; sourceTree = "<group>"; }; - D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraVideoClip_AVFoundation.h; path = src/AVFoundation/TheoraVideoClip_AVFoundation.h; sourceTree = "<group>"; }; - D1CDFFC41696E1CA00609AB0 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; }; - D1CDFFE91696E1D700609AB0 /* libtheoraplayer.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libtheoraplayer.a; sourceTree = BUILT_PRODUCTS_DIR; }; - D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AVFoundation.framework; path = System/Library/Frameworks/AVFoundation.framework; sourceTree = SDKROOT; }; - D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreVideo.framework; path = System/Library/Frameworks/CoreVideo.framework; sourceTree = SDKROOT; }; - D1CDFFF21696FBA800609AB0 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; - D1CDFFF71696FBF400609AB0 /* Ogg.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Ogg.framework; path = ../__build__/products/Debug/Ogg.framework; sourceTree = "<group>"; }; - D1CDFFF91696FBF700609AB0 /* Vorbis.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Vorbis.framework; path = ../__build__/products/Debug/Vorbis.framework; sourceTree = "<group>"; }; - D1CDFFFB1696FC0100609AB0 /* Theora.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Theora.framework; path = ../__build__/products/Debug/Theora.framework; sourceTree = "<group>"; }; - D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraAudioPacketQueue.h; path = include/theoraplayer/TheoraAudioPacketQueue.h; sourceTree = "<group>"; }; - D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TheoraAudioPacketQueue.cpp; path = src/TheoraAudioPacketQueue.cpp; sourceTree = "<group>"; }; - D1E2718A16B46F640046C00C /* yuv420_grey_c.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv420_grey_c.c; path = src/YUV/C/yuv420_grey_c.c; sourceTree = "<group>"; }; - D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv420_yuv_c.c; path = src/YUV/C/yuv420_yuv_c.c; sourceTree = "<group>"; }; - D1E271AB16B470210046C00C /* yuv420_rgb_c.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = yuv420_rgb_c.c; path = src/YUV/C/yuv420_rgb_c.c; sourceTree = "<group>"; }; - D1E271B216B471E80046C00C /* TheoraPixelTransform.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TheoraPixelTransform.h; path = include/theoraplayer/TheoraPixelTransform.h; sourceTree = "<group>"; }; - D1F4DA1D18FECACE007C1968 /* cpu-features.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; name = "cpu-features.c"; path = "src/YUV/android/cpu-features.c"; sourceTree = "<group>"; }; - D1F4DA1E18FECACE007C1968 /* cpu-features.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "cpu-features.h"; path = "src/YUV/android/cpu-features.h"; sourceTree = "<group>"; }; -/* End PBXFileReference section */ - -/* Begin PBXFrameworksBuildPhase section */ - D1473F26150CA69B00B20490 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CD00001696FC0B00609AB0 /* Theora.framework in Frameworks */, - D1CD00011696FC0B00609AB0 /* Vorbis.framework in Frameworks */, - D1CD00021696FC0B00609AB0 /* Ogg.framework in Frameworks */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D198F963177A31FC002942E3 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D198F98F177A31FE002942E3 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D198F9BC177A3200002942E3 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1BB6FAB150E9E7100EF9400 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFF2F1696C77A00609AB0 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CDFFF41696FBB200609AB0 /* CoreVideo.framework in Frameworks */, - D1CDFFF51696FBB200609AB0 /* AVFoundation.framework in Frameworks */, - D1CDFFF61696FBB200609AB0 /* Foundation.framework in Frameworks */, - D1CD00051696FF9600609AB0 /* CoreMedia.framework in Frameworks */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFF571696C79700609AB0 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CD00041696FF9400609AB0 /* CoreMedia.framework in Frameworks */, - D1CDFFF31696FBA800609AB0 /* Foundation.framework in Frameworks */, - D1CDFFF11696FB8900609AB0 /* CoreVideo.framework in Frameworks */, - D1CDFFEE1696FB7200609AB0 /* AVFoundation.framework in Frameworks */, - D1CDFFFD1696FC0800609AB0 /* Theora.framework in Frameworks */, - D1CDFFFE1696FC0800609AB0 /* Vorbis.framework in Frameworks */, - D1CDFFFF1696FC0800609AB0 /* Ogg.framework in Frameworks */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFFAE1696E1CA00609AB0 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFFD31696E1D700609AB0 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXFrameworksBuildPhase section */ - -/* Begin PBXGroup section */ - D12CA55417734B2400412E5B /* FFmpeg */ = { - isa = PBXGroup; - children = ( - D12CA55517734B4200412E5B /* TheoraVideoClip_FFmpeg.cpp */, - D12CA55617734B4200412E5B /* TheoraVideoClip_FFmpeg.h */, - ); - name = FFmpeg; - sourceTree = "<group>"; - }; - D1358BC118D7776700A36FDC /* config */ = { - isa = PBXGroup; - children = ( - D1358BC318D7777800A36FDC /* iOS.xcconfig */, - D1358BC418D7777800A36FDC /* Mac.xcconfig */, - D1358BC218D7777200A36FDC /* Info.plist */, - ); - name = config; - sourceTree = "<group>"; - }; - D139462A17C0ED2F0091F4A4 /* libyuv */ = { - isa = PBXGroup; - children = ( - D1C3D04E17C157AC00CA0FD2 /* include */, - D1C3D04D17C157A800CA0FD2 /* src */, - D139462B17C0ED450091F4A4 /* yuv_libyuv.c */, - D139462C17C0ED450091F4A4 /* yuv_libyuv.h */, - ); - name = libyuv; - sourceTree = "<group>"; - }; - D1473F1E150CA69B00B20490 = { - isa = PBXGroup; - children = ( - D1358BC118D7776700A36FDC /* config */, - D147401F150CAE9600B20490 /* include */, - D1473F42150CA6C000B20490 /* src */, - D1473F2C150CA69B00B20490 /* Frameworks */, - D1473F2B150CA69B00B20490 /* Products */, - ); - sourceTree = "<group>"; - }; - D1473F2B150CA69B00B20490 /* Products */ = { - isa = PBXGroup; - children = ( - D1473F2A150CA69B00B20490 /* theoraplayer.framework */, - D1BB6FAE150E9E7100EF9400 /* libtheoraplayer.a */, - D1CDFF481696C77A00609AB0 /* theoraplayer.framework */, - D1CDFF701696C79700609AB0 /* theoraplayer.framework */, - D1CDFFC41696E1CA00609AB0 /* libtheoraplayer.a */, - D1CDFFE91696E1D700609AB0 /* libtheoraplayer.a */, - D198F97B177A31FC002942E3 /* libtheoraplayer.a */, - D198F9A7177A31FE002942E3 /* libtheoraplayer_avfoundation.a */, - D198F9D4177A3200002942E3 /* libtheoraplayer_theora_avfoundation.a */, - ); - name = Products; - sourceTree = "<group>"; - }; - D1473F2C150CA69B00B20490 /* Frameworks */ = { - isa = PBXGroup; - children = ( - D1473F82150CA7F300B20490 /* mac */, - ); - name = Frameworks; - sourceTree = "<group>"; - }; - D1473F42150CA6C000B20490 /* src */ = { - isa = PBXGroup; - children = ( - D1E2718516B46F370046C00C /* YUV */, - D1CDFF921696CEFA00609AB0 /* Theora */, - D1CDFF931696CF0000609AB0 /* AVFoundation */, - D12CA55417734B2400412E5B /* FFmpeg */, - D16775A5155C501D0050EC64 /* TheoraVideoClip.cpp */, - D167759E155C501D0050EC64 /* TheoraAsync.cpp */, - D1D465D916C2D070007A45AA /* TheoraAudioPacketQueue.cpp */, - D167759F155C501D0050EC64 /* TheoraAudioInterface.cpp */, - D16775A0155C501D0050EC64 /* TheoraDataSource.cpp */, - D16775A1155C501D0050EC64 /* TheoraException.cpp */, - D16775A2155C501D0050EC64 /* TheoraFrameQueue.cpp */, - D16775A3155C501D0050EC64 /* TheoraTimer.cpp */, - D16775A4155C501D0050EC64 /* TheoraUtil.cpp */, - D16775A6155C501D0050EC64 /* TheoraVideoFrame.cpp */, - D16775A7155C501D0050EC64 /* TheoraVideoManager.cpp */, - D16775A8155C501D0050EC64 /* TheoraWorkerThread.cpp */, - ); - name = src; - sourceTree = "<group>"; - }; - D1473F82150CA7F300B20490 /* mac */ = { - isa = PBXGroup; - children = ( - D1CD00031696FF9400609AB0 /* CoreMedia.framework */, - D1CDFFFB1696FC0100609AB0 /* Theora.framework */, - D1CDFFF91696FBF700609AB0 /* Vorbis.framework */, - D1CDFFF71696FBF400609AB0 /* Ogg.framework */, - D1CDFFF01696FB8900609AB0 /* CoreVideo.framework */, - D1CDFFED1696FB7200609AB0 /* AVFoundation.framework */, - D1CDFFF21696FBA800609AB0 /* Foundation.framework */, - ); - name = mac; - sourceTree = "<group>"; - }; - D147401F150CAE9600B20490 /* include */ = { - isa = PBXGroup; - children = ( - D16775C1155C50280050EC64 /* TheoraAsync.h */, - D16775C2155C50280050EC64 /* TheoraAudioInterface.h */, - D16775C3155C50280050EC64 /* TheoraDataSource.h */, - D16775C4155C50280050EC64 /* TheoraException.h */, - D16775C5155C50280050EC64 /* TheoraExport.h */, - D1E271B216B471E80046C00C /* TheoraPixelTransform.h */, - D16775CB155C50280050EC64 /* TheoraVideoFrame.h */, - D16775C6155C50280050EC64 /* TheoraFrameQueue.h */, - D16775C7155C50280050EC64 /* TheoraPlayer.h */, - D16775C8155C50280050EC64 /* TheoraTimer.h */, - D16775C9155C50280050EC64 /* TheoraUtil.h */, - D16775CA155C50280050EC64 /* TheoraVideoClip.h */, - D16775CC155C50280050EC64 /* TheoraVideoManager.h */, - D1D465D516C2D063007A45AA /* TheoraAudioPacketQueue.h */, - D16775CD155C50280050EC64 /* TheoraWorkerThread.h */, - ); - name = include; - sourceTree = "<group>"; - }; - D1C3D04D17C157A800CA0FD2 /* src */ = { - isa = PBXGroup; - children = ( - D1C3D04F17C157CD00CA0FD2 /* compare_common.cc */, - D1C3D05017C157CD00CA0FD2 /* compare_neon.cc */, - D1C3D05117C157CD00CA0FD2 /* compare_posix.cc */, - D159BCAB17C227940030FAB6 /* compare_win.cc */, - D1C3D05317C157CD00CA0FD2 /* compare.cc */, - D1C3D05417C157CD00CA0FD2 /* convert_argb.cc */, - D1C3D05517C157CD00CA0FD2 /* convert_from_argb.cc */, - D1C3D05617C157CD00CA0FD2 /* convert_from.cc */, - D1C3D05717C157CD00CA0FD2 /* convert_jpeg.cc */, - D1C3D05817C157CD00CA0FD2 /* convert_to_argb.cc */, - D1C3D05917C157CD00CA0FD2 /* convert_to_i420.cc */, - D1C3D05A17C157CD00CA0FD2 /* convert.cc */, - D1C3D05B17C157CD00CA0FD2 /* cpu_id.cc */, - D1C3D05C17C157CD00CA0FD2 /* format_conversion.cc */, - D1C3D05D17C157CD00CA0FD2 /* mjpeg_decoder.cc */, - D1C3D05E17C157CD00CA0FD2 /* mjpeg_validate.cc */, - D1C3D05F17C157CD00CA0FD2 /* planar_functions.cc */, - D1C3D06017C157CD00CA0FD2 /* rotate_argb.cc */, - D1C3D06117C157CD00CA0FD2 /* rotate_mips.cc */, - D1C3D06217C157CD00CA0FD2 /* rotate_neon.cc */, - D1C3D06317C157CD00CA0FD2 /* rotate.cc */, - D1C3D06417C157CD00CA0FD2 /* row_any.cc */, - D1C3D06517C157CD00CA0FD2 /* row_common.cc */, - D1C3D06617C157CD00CA0FD2 /* row_mips.cc */, - D1C3D06717C157CD00CA0FD2 /* row_neon.cc */, - D1C3D06817C157CD00CA0FD2 /* row_posix.cc */, - D159BCAC17C227940030FAB6 /* row_win.cc */, - D1C3D06B17C157CD00CA0FD2 /* scale_argb_neon.cc */, - D1C3D06C17C157CD00CA0FD2 /* scale_argb.cc */, - D1C3D06D17C157CD00CA0FD2 /* scale_mips.cc */, - D1C3D06E17C157CD00CA0FD2 /* scale_neon.cc */, - D1BCE06C18F3F80800C83470 /* scale_win.cc */, - D1C3D06F17C157CD00CA0FD2 /* scale.cc */, - D1BCE05818F3F7FE00C83470 /* scale_common.cc */, - D1BCE05918F3F7FE00C83470 /* scale_posix.cc */, - D1C3D07017C157CD00CA0FD2 /* video_common.cc */, - D159BCAD17C227940030FAB6 /* row_x86.asm */, - D159BCAE17C227940030FAB6 /* x86inc.asm */, - ); - name = src; - sourceTree = "<group>"; - }; - D1C3D04E17C157AC00CA0FD2 /* include */ = { - isa = PBXGroup; - children = ( - D1C3D1CD17C15BA900CA0FD2 /* libyuv */, - D1C3D1CE17C15BB400CA0FD2 /* libyuv.h */, - ); - name = include; - sourceTree = "<group>"; - }; - D1C3D1CD17C15BA900CA0FD2 /* libyuv */ = { - isa = PBXGroup; - children = ( - D1C3D1CF17C15BC100CA0FD2 /* basic_types.h */, - D1C3D1D017C15BC100CA0FD2 /* compare.h */, - D1C3D1D117C15BC100CA0FD2 /* convert_argb.h */, - D1C3D1D217C15BC100CA0FD2 /* convert_from_argb.h */, - D1C3D1D317C15BC100CA0FD2 /* convert_from.h */, - D1C3D1D417C15BC100CA0FD2 /* convert.h */, - D1C3D1D517C15BC100CA0FD2 /* cpu_id.h */, - D1C3D1D617C15BC100CA0FD2 /* format_conversion.h */, - D1C3D1D717C15BC100CA0FD2 /* mjpeg_decoder.h */, - D1C3D1D817C15BC100CA0FD2 /* planar_functions.h */, - D1C3D1D917C15BC100CA0FD2 /* rotate_argb.h */, - D1C3D1DA17C15BC100CA0FD2 /* rotate.h */, - D1C3D1DB17C15BC100CA0FD2 /* row.h */, - D1C3D1DC17C15BC100CA0FD2 /* scale_argb.h */, - D1BCE05718F3F7D800C83470 /* scale_row.h */, - D1C3D1DD17C15BC100CA0FD2 /* scale.h */, - D1C3D1DE17C15BC100CA0FD2 /* version.h */, - D1C3D1DF17C15BC100CA0FD2 /* video_common.h */, - ); - name = libyuv; - sourceTree = "<group>"; - }; - D1CDFF921696CEFA00609AB0 /* Theora */ = { - isa = PBXGroup; - children = ( - D1CDFF951696D0F000609AB0 /* TheoraVideoClip_Theora.h */, - D1CDFF941696D0F000609AB0 /* TheoraVideoClip_Theora.cpp */, - ); - name = Theora; - sourceTree = "<group>"; - }; - D1CDFF931696CF0000609AB0 /* AVFoundation */ = { - isa = PBXGroup; - children = ( - D1CDFF9D1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h */, - D1CDFF9C1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm */, - ); - name = AVFoundation; - sourceTree = "<group>"; - }; - D1E2718516B46F370046C00C /* YUV */ = { - isa = PBXGroup; - children = ( - D1F4DA1C18FECABC007C1968 /* android */, - D139462A17C0ED2F0091F4A4 /* libyuv */, - D1E2718716B46F4F0046C00C /* C */, - D13946CA17C119B30091F4A4 /* yuv_util.c */, - D13946CB17C119B30091F4A4 /* yuv_util.h */, - ); - name = YUV; - sourceTree = "<group>"; - }; - D1E2718716B46F4F0046C00C /* C */ = { - isa = PBXGroup; - children = ( - D1E271AB16B470210046C00C /* yuv420_rgb_c.c */, - D1E2718C16B46F640046C00C /* yuv420_yuv_c.c */, - D1E2718A16B46F640046C00C /* yuv420_grey_c.c */, - ); - name = C; - sourceTree = "<group>"; - }; - D1F4DA1C18FECABC007C1968 /* android */ = { - isa = PBXGroup; - children = ( - D1F4DA1D18FECACE007C1968 /* cpu-features.c */, - D1F4DA1E18FECACE007C1968 /* cpu-features.h */, - ); - name = android; - sourceTree = "<group>"; - }; -/* End PBXGroup section */ - -/* Begin PBXHeadersBuildPhase section */ - D1473F27150CA69B00B20490 /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - D16775CE155C50280050EC64 /* TheoraAsync.h in Headers */, - D16775D0155C50280050EC64 /* TheoraAudioInterface.h in Headers */, - D16775D2155C50280050EC64 /* TheoraDataSource.h in Headers */, - D16775D4155C50280050EC64 /* TheoraException.h in Headers */, - D16775D6155C50280050EC64 /* TheoraExport.h in Headers */, - D16775D8155C50280050EC64 /* TheoraFrameQueue.h in Headers */, - D16775DA155C50280050EC64 /* TheoraPlayer.h in Headers */, - D16775DC155C50280050EC64 /* TheoraTimer.h in Headers */, - D16775DE155C50280050EC64 /* TheoraUtil.h in Headers */, - D16775E0155C50280050EC64 /* TheoraVideoClip.h in Headers */, - D16775E2155C50280050EC64 /* TheoraVideoFrame.h in Headers */, - D16775E4155C50280050EC64 /* TheoraVideoManager.h in Headers */, - D16775E6155C50280050EC64 /* TheoraWorkerThread.h in Headers */, - D1D465D616C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */, - D1E271B316B471E80046C00C /* TheoraPixelTransform.h in Headers */, - D1CDFF991696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */, - D139463617C0ED450091F4A4 /* yuv_libyuv.h in Headers */, - D13946D517C119B40091F4A4 /* yuv_util.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D198F964177A31FC002942E3 /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - D198F965177A31FC002942E3 /* TheoraAsync.h in Headers */, - D198F966177A31FC002942E3 /* TheoraAudioInterface.h in Headers */, - D198F967177A31FC002942E3 /* TheoraDataSource.h in Headers */, - D198F968177A31FC002942E3 /* TheoraException.h in Headers */, - D198F969177A31FC002942E3 /* TheoraExport.h in Headers */, - D198F96A177A31FC002942E3 /* TheoraFrameQueue.h in Headers */, - D198F96B177A31FC002942E3 /* TheoraPlayer.h in Headers */, - D198F96C177A31FC002942E3 /* TheoraTimer.h in Headers */, - D198F96D177A31FC002942E3 /* TheoraUtil.h in Headers */, - D198F96E177A31FC002942E3 /* TheoraVideoClip.h in Headers */, - D198F96F177A31FC002942E3 /* TheoraVideoFrame.h in Headers */, - D198F970177A31FC002942E3 /* TheoraVideoManager.h in Headers */, - D198F971177A31FC002942E3 /* TheoraWorkerThread.h in Headers */, - D198F972177A31FC002942E3 /* TheoraVideoClip_Theora.h in Headers */, - D198F974177A31FC002942E3 /* TheoraPixelTransform.h in Headers */, - D139463917C0ED450091F4A4 /* yuv_libyuv.h in Headers */, - D13946D817C119B40091F4A4 /* yuv_util.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D198F990177A31FE002942E3 /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - D198F991177A31FE002942E3 /* TheoraAsync.h in Headers */, - D198F992177A31FE002942E3 /* TheoraAudioInterface.h in Headers */, - D198F993177A31FE002942E3 /* TheoraDataSource.h in Headers */, - D198F994177A31FE002942E3 /* TheoraException.h in Headers */, - D198F995177A31FE002942E3 /* TheoraExport.h in Headers */, - D198F996177A31FE002942E3 /* TheoraFrameQueue.h in Headers */, - D198F997177A31FE002942E3 /* TheoraPlayer.h in Headers */, - D198F998177A31FE002942E3 /* TheoraTimer.h in Headers */, - D198F999177A31FE002942E3 /* TheoraUtil.h in Headers */, - D198F99A177A31FE002942E3 /* TheoraVideoClip.h in Headers */, - D198F99B177A31FE002942E3 /* TheoraVideoFrame.h in Headers */, - D198F99C177A31FE002942E3 /* TheoraVideoManager.h in Headers */, - D198F99D177A31FE002942E3 /* TheoraWorkerThread.h in Headers */, - D198F99E177A31FE002942E3 /* TheoraVideoClip_Theora.h in Headers */, - D198F9A0177A31FE002942E3 /* TheoraPixelTransform.h in Headers */, - D139463A17C0ED450091F4A4 /* yuv_libyuv.h in Headers */, - D13946D917C119B40091F4A4 /* yuv_util.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D198F9BD177A3200002942E3 /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - D198F9BE177A3200002942E3 /* TheoraAsync.h in Headers */, - D198F9BF177A3200002942E3 /* TheoraAudioInterface.h in Headers */, - D198F9C0177A3200002942E3 /* TheoraDataSource.h in Headers */, - D198F9C1177A3200002942E3 /* TheoraException.h in Headers */, - D198F9C2177A3200002942E3 /* TheoraExport.h in Headers */, - D198F9C3177A3200002942E3 /* TheoraFrameQueue.h in Headers */, - D198F9C4177A3200002942E3 /* TheoraPlayer.h in Headers */, - D198F9C5177A3200002942E3 /* TheoraTimer.h in Headers */, - D198F9C6177A3200002942E3 /* TheoraUtil.h in Headers */, - D198F9C7177A3200002942E3 /* TheoraVideoClip.h in Headers */, - D198F9C8177A3200002942E3 /* TheoraVideoFrame.h in Headers */, - D198F9C9177A3200002942E3 /* TheoraVideoManager.h in Headers */, - D198F9CA177A3200002942E3 /* TheoraWorkerThread.h in Headers */, - D198F9CB177A3200002942E3 /* TheoraVideoClip_Theora.h in Headers */, - D198F9CD177A3200002942E3 /* TheoraPixelTransform.h in Headers */, - D139463B17C0ED450091F4A4 /* yuv_libyuv.h in Headers */, - D13946DA17C119B40091F4A4 /* yuv_util.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1BB6FAC150E9E7100EF9400 /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - D16775CF155C50280050EC64 /* TheoraAsync.h in Headers */, - D16775D1155C50280050EC64 /* TheoraAudioInterface.h in Headers */, - D16775D3155C50280050EC64 /* TheoraDataSource.h in Headers */, - D16775D5155C50280050EC64 /* TheoraException.h in Headers */, - D16775D7155C50280050EC64 /* TheoraExport.h in Headers */, - D16775D9155C50280050EC64 /* TheoraFrameQueue.h in Headers */, - D16775DB155C50280050EC64 /* TheoraPlayer.h in Headers */, - D16775DD155C50280050EC64 /* TheoraTimer.h in Headers */, - D16775DF155C50280050EC64 /* TheoraUtil.h in Headers */, - D16775E1155C50280050EC64 /* TheoraVideoClip.h in Headers */, - D16775E3155C50280050EC64 /* TheoraVideoFrame.h in Headers */, - D16775E5155C50280050EC64 /* TheoraVideoManager.h in Headers */, - D16775E7155C50280050EC64 /* TheoraWorkerThread.h in Headers */, - D1CDFF9B1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */, - D1E271B616B471E80046C00C /* TheoraPixelTransform.h in Headers */, - D139463C17C0ED450091F4A4 /* yuv_libyuv.h in Headers */, - D13946DB17C119B40091F4A4 /* yuv_util.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFF331696C77A00609AB0 /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CDFF341696C77A00609AB0 /* TheoraAsync.h in Headers */, - D1CDFF351696C77A00609AB0 /* TheoraAudioInterface.h in Headers */, - D1CDFF361696C77A00609AB0 /* TheoraDataSource.h in Headers */, - D1CDFF371696C77A00609AB0 /* TheoraException.h in Headers */, - D1CDFF381696C77A00609AB0 /* TheoraExport.h in Headers */, - D1CDFF391696C77A00609AB0 /* TheoraFrameQueue.h in Headers */, - D1CDFF3A1696C77A00609AB0 /* TheoraPlayer.h in Headers */, - D1CDFF3B1696C77A00609AB0 /* TheoraTimer.h in Headers */, - D1CDFF3C1696C77A00609AB0 /* TheoraUtil.h in Headers */, - D1CDFF3D1696C77A00609AB0 /* TheoraVideoClip.h in Headers */, - D1CDFF3E1696C77A00609AB0 /* TheoraVideoFrame.h in Headers */, - D1CDFF3F1696C77A00609AB0 /* TheoraVideoManager.h in Headers */, - D1CDFF401696C77A00609AB0 /* TheoraWorkerThread.h in Headers */, - D1E271B416B471E80046C00C /* TheoraPixelTransform.h in Headers */, - D1D465D716C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */, - D1CDFF9F1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.h in Headers */, - D139463717C0ED450091F4A4 /* yuv_libyuv.h in Headers */, - D13946D617C119B40091F4A4 /* yuv_util.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFF5B1696C79700609AB0 /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CDFF5C1696C79700609AB0 /* TheoraAsync.h in Headers */, - D1CDFF5D1696C79700609AB0 /* TheoraAudioInterface.h in Headers */, - D1CDFF5E1696C79700609AB0 /* TheoraDataSource.h in Headers */, - D1CDFF5F1696C79700609AB0 /* TheoraException.h in Headers */, - D1CDFF601696C79700609AB0 /* TheoraExport.h in Headers */, - D1CDFF611696C79700609AB0 /* TheoraFrameQueue.h in Headers */, - D1CDFF621696C79700609AB0 /* TheoraPlayer.h in Headers */, - D1CDFF631696C79700609AB0 /* TheoraTimer.h in Headers */, - D1CDFF641696C79700609AB0 /* TheoraUtil.h in Headers */, - D1CDFF651696C79700609AB0 /* TheoraVideoClip.h in Headers */, - D1CDFF661696C79700609AB0 /* TheoraVideoFrame.h in Headers */, - D1CDFF671696C79700609AB0 /* TheoraVideoManager.h in Headers */, - D1CDFF681696C79700609AB0 /* TheoraWorkerThread.h in Headers */, - D1E271B516B471E80046C00C /* TheoraPixelTransform.h in Headers */, - D1D465D816C2D063007A45AA /* TheoraAudioPacketQueue.h in Headers */, - D1CDFF9A1696D0F000609AB0 /* TheoraVideoClip_Theora.h in Headers */, - D1F09EB1169AFEFB00DEEC63 /* TheoraVideoClip_AVFoundation.h in Headers */, - D139463817C0ED450091F4A4 /* yuv_libyuv.h in Headers */, - D13946D717C119B40091F4A4 /* yuv_util.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFFAF1696E1CA00609AB0 /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CDFFB01696E1CA00609AB0 /* TheoraAsync.h in Headers */, - D1CDFFB11696E1CA00609AB0 /* TheoraAudioInterface.h in Headers */, - D1CDFFB21696E1CA00609AB0 /* TheoraDataSource.h in Headers */, - D1CDFFB31696E1CA00609AB0 /* TheoraException.h in Headers */, - D1CDFFB41696E1CA00609AB0 /* TheoraExport.h in Headers */, - D1CDFFB51696E1CA00609AB0 /* TheoraFrameQueue.h in Headers */, - D1CDFFB61696E1CA00609AB0 /* TheoraPlayer.h in Headers */, - D1CDFFB71696E1CA00609AB0 /* TheoraTimer.h in Headers */, - D1CDFFB81696E1CA00609AB0 /* TheoraUtil.h in Headers */, - D1CDFFB91696E1CA00609AB0 /* TheoraVideoClip.h in Headers */, - D1CDFFBA1696E1CA00609AB0 /* TheoraVideoFrame.h in Headers */, - D1CDFFBB1696E1CA00609AB0 /* TheoraVideoManager.h in Headers */, - D1CDFFBC1696E1CA00609AB0 /* TheoraWorkerThread.h in Headers */, - D1CDFFBD1696E1CA00609AB0 /* TheoraVideoClip_Theora.h in Headers */, - D1E271B716B471E80046C00C /* TheoraPixelTransform.h in Headers */, - D139463D17C0ED450091F4A4 /* yuv_libyuv.h in Headers */, - D13946DC17C119B40091F4A4 /* yuv_util.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFFD41696E1D700609AB0 /* Headers */ = { - isa = PBXHeadersBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CDFFD51696E1D700609AB0 /* TheoraAsync.h in Headers */, - D1CDFFD61696E1D700609AB0 /* TheoraAudioInterface.h in Headers */, - D1CDFFD71696E1D700609AB0 /* TheoraDataSource.h in Headers */, - D1CDFFD81696E1D700609AB0 /* TheoraException.h in Headers */, - D1CDFFD91696E1D700609AB0 /* TheoraExport.h in Headers */, - D1CDFFDA1696E1D700609AB0 /* TheoraFrameQueue.h in Headers */, - D1CDFFDB1696E1D700609AB0 /* TheoraPlayer.h in Headers */, - D1CDFFDC1696E1D700609AB0 /* TheoraTimer.h in Headers */, - D1CDFFDD1696E1D700609AB0 /* TheoraUtil.h in Headers */, - D1CDFFDE1696E1D700609AB0 /* TheoraVideoClip.h in Headers */, - D1CDFFDF1696E1D700609AB0 /* TheoraVideoFrame.h in Headers */, - D1CDFFE01696E1D700609AB0 /* TheoraVideoManager.h in Headers */, - D1CDFFE11696E1D700609AB0 /* TheoraWorkerThread.h in Headers */, - D1CDFFE21696E1D700609AB0 /* TheoraVideoClip_Theora.h in Headers */, - D1E271B816B471E80046C00C /* TheoraPixelTransform.h in Headers */, - D139463E17C0ED450091F4A4 /* yuv_libyuv.h in Headers */, - D13946DD17C119B40091F4A4 /* yuv_util.h in Headers */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXHeadersBuildPhase section */ - -/* Begin PBXNativeTarget section */ - D1473F29150CA69B00B20490 /* theoraplayer (Theora) */ = { - isa = PBXNativeTarget; - buildConfigurationList = D1473F3F150CA69B00B20490 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora)" */; - buildPhases = ( - D1473F25150CA69B00B20490 /* Sources */, - D1473F26150CA69B00B20490 /* Frameworks */, - D1473F27150CA69B00B20490 /* Headers */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "theoraplayer (Theora)"; - productName = theoraplayer; - productReference = D1473F2A150CA69B00B20490 /* theoraplayer.framework */; - productType = "com.apple.product-type.framework"; - }; - D198F950177A31FC002942E3 /* theoraplayer (Mac Theora) */ = { - isa = PBXNativeTarget; - buildConfigurationList = D198F975177A31FC002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora)" */; - buildPhases = ( - D198F951177A31FC002942E3 /* Sources */, - D198F963177A31FC002942E3 /* Frameworks */, - D198F964177A31FC002942E3 /* Headers */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "theoraplayer (Mac Theora)"; - productName = libtheoraplayer; - productReference = D198F97B177A31FC002942E3 /* libtheoraplayer.a */; - productType = "com.apple.product-type.library.static"; - }; - D198F97C177A31FE002942E3 /* theoraplayer (Mac AVFoundation) */ = { - isa = PBXNativeTarget; - buildConfigurationList = D198F9A1177A31FE002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac AVFoundation)" */; - buildPhases = ( - D198F97D177A31FE002942E3 /* Sources */, - D198F98F177A31FE002942E3 /* Frameworks */, - D198F990177A31FE002942E3 /* Headers */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "theoraplayer (Mac AVFoundation)"; - productName = libtheoraplayer; - productReference = D198F9A7177A31FE002942E3 /* libtheoraplayer_avfoundation.a */; - productType = "com.apple.product-type.library.static"; - }; - D198F9A8177A3200002942E3 /* theoraplayer (Mac Theora AVFoundation) */ = { - isa = PBXNativeTarget; - buildConfigurationList = D198F9CE177A3200002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora AVFoundation)" */; - buildPhases = ( - D198F9A9177A3200002942E3 /* Sources */, - D198F9BC177A3200002942E3 /* Frameworks */, - D198F9BD177A3200002942E3 /* Headers */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "theoraplayer (Mac Theora AVFoundation)"; - productName = libtheoraplayer; - productReference = D198F9D4177A3200002942E3 /* libtheoraplayer_theora_avfoundation.a */; - productType = "com.apple.product-type.library.static"; - }; - D1BB6FAD150E9E7100EF9400 /* theoraplayer (iOS Theora) */ = { - isa = PBXNativeTarget; - buildConfigurationList = D1BB6FBC150E9E7100EF9400 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora)" */; - buildPhases = ( - D1BB6FAA150E9E7100EF9400 /* Sources */, - D1BB6FAB150E9E7100EF9400 /* Frameworks */, - D1BB6FAC150E9E7100EF9400 /* Headers */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "theoraplayer (iOS Theora)"; - productName = libtheoraplayer; - productReference = D1BB6FAE150E9E7100EF9400 /* libtheoraplayer.a */; - productType = "com.apple.product-type.library.static"; - }; - D1CDFF221696C77A00609AB0 /* theoraplayer (AVFoundation) */ = { - isa = PBXNativeTarget; - buildConfigurationList = D1CDFF421696C77A00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (AVFoundation)" */; - buildPhases = ( - D1CDFF231696C77A00609AB0 /* Sources */, - D1CDFF2F1696C77A00609AB0 /* Frameworks */, - D1CDFF331696C77A00609AB0 /* Headers */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "theoraplayer (AVFoundation)"; - productName = theoraplayer; - productReference = D1CDFF481696C77A00609AB0 /* theoraplayer.framework */; - productType = "com.apple.product-type.framework"; - }; - D1CDFF4A1696C79700609AB0 /* theoraplayer (Theora AVFoundation) */ = { - isa = PBXNativeTarget; - buildConfigurationList = D1CDFF6A1696C79700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora AVFoundation)" */; - buildPhases = ( - D1CDFF4B1696C79700609AB0 /* Sources */, - D1CDFF571696C79700609AB0 /* Frameworks */, - D1CDFF5B1696C79700609AB0 /* Headers */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "theoraplayer (Theora AVFoundation)"; - productName = theoraplayer; - productReference = D1CDFF701696C79700609AB0 /* theoraplayer.framework */; - productType = "com.apple.product-type.framework"; - }; - D1CDFFA01696E1CA00609AB0 /* theoraplayer (iOS AVFoundation) */ = { - isa = PBXNativeTarget; - buildConfigurationList = D1CDFFBE1696E1CA00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS AVFoundation)" */; - buildPhases = ( - D1CDFFA11696E1CA00609AB0 /* Sources */, - D1CDFFAE1696E1CA00609AB0 /* Frameworks */, - D1CDFFAF1696E1CA00609AB0 /* Headers */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "theoraplayer (iOS AVFoundation)"; - productName = libtheoraplayer; - productReference = D1CDFFC41696E1CA00609AB0 /* libtheoraplayer.a */; - productType = "com.apple.product-type.library.static"; - }; - D1CDFFC51696E1D700609AB0 /* theoraplayer (iOS Theora AVFoundation) */ = { - isa = PBXNativeTarget; - buildConfigurationList = D1CDFFE31696E1D700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora AVFoundation)" */; - buildPhases = ( - D1CDFFC61696E1D700609AB0 /* Sources */, - D1CDFFD31696E1D700609AB0 /* Frameworks */, - D1CDFFD41696E1D700609AB0 /* Headers */, - ); - buildRules = ( - ); - dependencies = ( - ); - name = "theoraplayer (iOS Theora AVFoundation)"; - productName = libtheoraplayer; - productReference = D1CDFFE91696E1D700609AB0 /* libtheoraplayer.a */; - productType = "com.apple.product-type.library.static"; - }; -/* End PBXNativeTarget section */ - -/* Begin PBXProject section */ - D1473F20150CA69B00B20490 /* Project object */ = { - isa = PBXProject; - attributes = { - LastUpgradeCheck = 0510; - }; - buildConfigurationList = D1473F23150CA69B00B20490 /* Build configuration list for PBXProject "theoraplayer" */; - compatibilityVersion = "Xcode 3.2"; - developmentRegion = English; - hasScannedForEncodings = 0; - knownRegions = ( - en, - ); - mainGroup = D1473F1E150CA69B00B20490; - productRefGroup = D1473F2B150CA69B00B20490 /* Products */; - projectDirPath = ""; - projectRoot = ""; - targets = ( - D1473F29150CA69B00B20490 /* theoraplayer (Theora) */, - D1CDFF221696C77A00609AB0 /* theoraplayer (AVFoundation) */, - D1CDFF4A1696C79700609AB0 /* theoraplayer (Theora AVFoundation) */, - D198F950177A31FC002942E3 /* theoraplayer (Mac Theora) */, - D198F97C177A31FE002942E3 /* theoraplayer (Mac AVFoundation) */, - D198F9A8177A3200002942E3 /* theoraplayer (Mac Theora AVFoundation) */, - D1BB6FAD150E9E7100EF9400 /* theoraplayer (iOS Theora) */, - D1CDFFA01696E1CA00609AB0 /* theoraplayer (iOS AVFoundation) */, - D1CDFFC51696E1D700609AB0 /* theoraplayer (iOS Theora AVFoundation) */, - ); - }; -/* End PBXProject section */ - -/* Begin PBXSourcesBuildPhase section */ - D1473F25150CA69B00B20490 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - D16775AB155C501D0050EC64 /* TheoraAsync.cpp in Sources */, - D16775AD155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */, - D16775AF155C501D0050EC64 /* TheoraDataSource.cpp in Sources */, - D16775B1155C501D0050EC64 /* TheoraException.cpp in Sources */, - D16775B3155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */, - D16775B5155C501D0050EC64 /* TheoraTimer.cpp in Sources */, - D16775B7155C501D0050EC64 /* TheoraUtil.cpp in Sources */, - D16775B9155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */, - D16775BB155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */, - D16775BD155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */, - D16775BF155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */, - D1CDFF961696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */, - D1E2719916B46F640046C00C /* yuv420_grey_c.c in Sources */, - D1E271A516B46F640046C00C /* yuv420_yuv_c.c in Sources */, - D1E271AC16B470210046C00C /* yuv420_rgb_c.c in Sources */, - D1BCE05A18F3F7FE00C83470 /* scale_common.cc in Sources */, - D1D465DA16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */, - D139462D17C0ED450091F4A4 /* yuv_libyuv.c in Sources */, - D13946CC17C119B40091F4A4 /* yuv_util.c in Sources */, - D1C3D07217C157CD00CA0FD2 /* compare_common.cc in Sources */, - D1C3D08417C157CD00CA0FD2 /* compare_posix.cc in Sources */, - D1BCE06318F3F7FE00C83470 /* scale_posix.cc in Sources */, - D1C3D09617C157CD00CA0FD2 /* compare.cc in Sources */, - D1C3D09F17C157CD00CA0FD2 /* convert_argb.cc in Sources */, - D1C3D0C317C157CD00CA0FD2 /* convert_to_argb.cc in Sources */, - D1C3D0CC17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */, - D1C3D0D517C157CD00CA0FD2 /* convert.cc in Sources */, - D1C3D0DE17C157CD00CA0FD2 /* cpu_id.cc in Sources */, - D1C3D0E717C157CD00CA0FD2 /* format_conversion.cc in Sources */, - D1C3D10217C157CD00CA0FD2 /* planar_functions.cc in Sources */, - D1C3D12617C157CD00CA0FD2 /* rotate.cc in Sources */, - D1C3D12F17C157CD00CA0FD2 /* row_any.cc in Sources */, - D1C3D13817C157CD00CA0FD2 /* row_common.cc in Sources */, - D1C3D15317C157CD00CA0FD2 /* row_posix.cc in Sources */, - D1C3D17717C157CD00CA0FD2 /* scale_argb.cc in Sources */, - D1C3D19B17C157CD00CA0FD2 /* video_common.cc in Sources */, - D159BCB017C227F30030FAB6 /* convert_from.cc in Sources */, - D159BCB917C228310030FAB6 /* rotate_argb.cc in Sources */, - D159BCC217C2286D0030FAB6 /* scale.cc in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D198F951177A31FC002942E3 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - D198F952177A31FC002942E3 /* TheoraAsync.cpp in Sources */, - D198F953177A31FC002942E3 /* TheoraAudioInterface.cpp in Sources */, - D198F954177A31FC002942E3 /* TheoraDataSource.cpp in Sources */, - D198F955177A31FC002942E3 /* TheoraException.cpp in Sources */, - D198F956177A31FC002942E3 /* TheoraFrameQueue.cpp in Sources */, - D198F957177A31FC002942E3 /* TheoraTimer.cpp in Sources */, - D198F958177A31FC002942E3 /* TheoraUtil.cpp in Sources */, - D198F959177A31FC002942E3 /* TheoraVideoClip.cpp in Sources */, - D198F95A177A31FC002942E3 /* TheoraVideoFrame.cpp in Sources */, - D198F95B177A31FC002942E3 /* TheoraVideoManager.cpp in Sources */, - D198F95C177A31FC002942E3 /* TheoraWorkerThread.cpp in Sources */, - D198F95D177A31FC002942E3 /* TheoraVideoClip_Theora.cpp in Sources */, - D198F95F177A31FC002942E3 /* yuv420_grey_c.c in Sources */, - D198F960177A31FC002942E3 /* yuv420_yuv_c.c in Sources */, - D198F961177A31FC002942E3 /* yuv420_rgb_c.c in Sources */, - D1BCE05D18F3F7FE00C83470 /* scale_common.cc in Sources */, - D198F962177A31FC002942E3 /* TheoraAudioPacketQueue.cpp in Sources */, - D139463017C0ED450091F4A4 /* yuv_libyuv.c in Sources */, - D13946CF17C119B40091F4A4 /* yuv_util.c in Sources */, - D1C3D07517C157CD00CA0FD2 /* compare_common.cc in Sources */, - D1C3D08717C157CD00CA0FD2 /* compare_posix.cc in Sources */, - D1BCE06618F3F7FE00C83470 /* scale_posix.cc in Sources */, - D1C3D09917C157CD00CA0FD2 /* compare.cc in Sources */, - D1C3D0A217C157CD00CA0FD2 /* convert_argb.cc in Sources */, - D1C3D0C617C157CD00CA0FD2 /* convert_to_argb.cc in Sources */, - D1C3D0CF17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */, - D1C3D0D817C157CD00CA0FD2 /* convert.cc in Sources */, - D1C3D0E117C157CD00CA0FD2 /* cpu_id.cc in Sources */, - D1C3D0EA17C157CD00CA0FD2 /* format_conversion.cc in Sources */, - D1C3D10517C157CD00CA0FD2 /* planar_functions.cc in Sources */, - D1C3D12917C157CD00CA0FD2 /* rotate.cc in Sources */, - D1C3D13217C157CD00CA0FD2 /* row_any.cc in Sources */, - D1C3D13B17C157CD00CA0FD2 /* row_common.cc in Sources */, - D1C3D15617C157CD00CA0FD2 /* row_posix.cc in Sources */, - D1C3D17A17C157CD00CA0FD2 /* scale_argb.cc in Sources */, - D1C3D19E17C157CD00CA0FD2 /* video_common.cc in Sources */, - D159BCB317C227F40030FAB6 /* convert_from.cc in Sources */, - D159BCBC17C228330030FAB6 /* rotate_argb.cc in Sources */, - D159BCC517C2286E0030FAB6 /* scale.cc in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D198F97D177A31FE002942E3 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - D198F97E177A31FE002942E3 /* TheoraAsync.cpp in Sources */, - D198F97F177A31FE002942E3 /* TheoraAudioInterface.cpp in Sources */, - D198F980177A31FE002942E3 /* TheoraDataSource.cpp in Sources */, - D198F981177A31FE002942E3 /* TheoraException.cpp in Sources */, - D198F982177A31FE002942E3 /* TheoraFrameQueue.cpp in Sources */, - D198F983177A31FE002942E3 /* TheoraTimer.cpp in Sources */, - D198F984177A31FE002942E3 /* TheoraUtil.cpp in Sources */, - D198F985177A31FE002942E3 /* TheoraVideoClip.cpp in Sources */, - D198F986177A31FE002942E3 /* TheoraVideoFrame.cpp in Sources */, - D198F987177A31FE002942E3 /* TheoraVideoManager.cpp in Sources */, - D198F988177A31FE002942E3 /* TheoraWorkerThread.cpp in Sources */, - D198F989177A31FE002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */, - D198F98B177A31FE002942E3 /* yuv420_grey_c.c in Sources */, - D198F98C177A31FE002942E3 /* yuv420_yuv_c.c in Sources */, - D198F98D177A31FE002942E3 /* yuv420_rgb_c.c in Sources */, - D1BCE05E18F3F7FE00C83470 /* scale_common.cc in Sources */, - D198F98E177A31FE002942E3 /* TheoraAudioPacketQueue.cpp in Sources */, - D139463117C0ED450091F4A4 /* yuv_libyuv.c in Sources */, - D13946D017C119B40091F4A4 /* yuv_util.c in Sources */, - D1C3D07617C157CD00CA0FD2 /* compare_common.cc in Sources */, - D1C3D08817C157CD00CA0FD2 /* compare_posix.cc in Sources */, - D1BCE06718F3F7FE00C83470 /* scale_posix.cc in Sources */, - D1C3D09A17C157CD00CA0FD2 /* compare.cc in Sources */, - D1C3D0A317C157CD00CA0FD2 /* convert_argb.cc in Sources */, - D1C3D0C717C157CD00CA0FD2 /* convert_to_argb.cc in Sources */, - D1C3D0D017C157CD00CA0FD2 /* convert_to_i420.cc in Sources */, - D1C3D0D917C157CD00CA0FD2 /* convert.cc in Sources */, - D1C3D0E217C157CD00CA0FD2 /* cpu_id.cc in Sources */, - D1C3D0EB17C157CD00CA0FD2 /* format_conversion.cc in Sources */, - D1C3D10617C157CD00CA0FD2 /* planar_functions.cc in Sources */, - D1C3D12A17C157CD00CA0FD2 /* rotate.cc in Sources */, - D1C3D13317C157CD00CA0FD2 /* row_any.cc in Sources */, - D1C3D13C17C157CD00CA0FD2 /* row_common.cc in Sources */, - D1C3D15717C157CD00CA0FD2 /* row_posix.cc in Sources */, - D1C3D17B17C157CD00CA0FD2 /* scale_argb.cc in Sources */, - D1C3D19F17C157CD00CA0FD2 /* video_common.cc in Sources */, - D159BCB417C227F50030FAB6 /* convert_from.cc in Sources */, - D159BCBD17C228330030FAB6 /* rotate_argb.cc in Sources */, - D159BCC617C2286E0030FAB6 /* scale.cc in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D198F9A9177A3200002942E3 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - D198F9AA177A3200002942E3 /* TheoraAsync.cpp in Sources */, - D198F9AB177A3200002942E3 /* TheoraAudioInterface.cpp in Sources */, - D198F9AC177A3200002942E3 /* TheoraDataSource.cpp in Sources */, - D198F9AD177A3200002942E3 /* TheoraException.cpp in Sources */, - D198F9AE177A3200002942E3 /* TheoraFrameQueue.cpp in Sources */, - D198F9AF177A3200002942E3 /* TheoraTimer.cpp in Sources */, - D198F9B0177A3200002942E3 /* TheoraUtil.cpp in Sources */, - D198F9B1177A3200002942E3 /* TheoraVideoClip.cpp in Sources */, - D198F9B2177A3200002942E3 /* TheoraVideoFrame.cpp in Sources */, - D198F9B3177A3200002942E3 /* TheoraVideoManager.cpp in Sources */, - D198F9B4177A3200002942E3 /* TheoraWorkerThread.cpp in Sources */, - D198F9B5177A3200002942E3 /* TheoraVideoClip_Theora.cpp in Sources */, - D1BCE06818F3F7FE00C83470 /* scale_posix.cc in Sources */, - D198F9B6177A3200002942E3 /* TheoraVideoClip_AVFoundation.mm in Sources */, - D198F9B8177A3200002942E3 /* yuv420_grey_c.c in Sources */, - D198F9B9177A3200002942E3 /* yuv420_yuv_c.c in Sources */, - D198F9BA177A3200002942E3 /* yuv420_rgb_c.c in Sources */, - D198F9BB177A3200002942E3 /* TheoraAudioPacketQueue.cpp in Sources */, - D139463217C0ED450091F4A4 /* yuv_libyuv.c in Sources */, - D13946D117C119B40091F4A4 /* yuv_util.c in Sources */, - D1C3D07717C157CD00CA0FD2 /* compare_common.cc in Sources */, - D1C3D08917C157CD00CA0FD2 /* compare_posix.cc in Sources */, - D1C3D09B17C157CD00CA0FD2 /* compare.cc in Sources */, - D1C3D0A417C157CD00CA0FD2 /* convert_argb.cc in Sources */, - D1C3D0C817C157CD00CA0FD2 /* convert_to_argb.cc in Sources */, - D1C3D0D117C157CD00CA0FD2 /* convert_to_i420.cc in Sources */, - D1C3D0DA17C157CD00CA0FD2 /* convert.cc in Sources */, - D1C3D0E317C157CD00CA0FD2 /* cpu_id.cc in Sources */, - D1C3D0EC17C157CD00CA0FD2 /* format_conversion.cc in Sources */, - D1C3D10717C157CD00CA0FD2 /* planar_functions.cc in Sources */, - D1C3D12B17C157CD00CA0FD2 /* rotate.cc in Sources */, - D1C3D13417C157CD00CA0FD2 /* row_any.cc in Sources */, - D1C3D13D17C157CD00CA0FD2 /* row_common.cc in Sources */, - D1C3D15817C157CD00CA0FD2 /* row_posix.cc in Sources */, - D1C3D17C17C157CD00CA0FD2 /* scale_argb.cc in Sources */, - D1C3D1A017C157CD00CA0FD2 /* video_common.cc in Sources */, - D159BCB517C227F50030FAB6 /* convert_from.cc in Sources */, - D159BCBE17C228340030FAB6 /* rotate_argb.cc in Sources */, - D1BCE05F18F3F7FE00C83470 /* scale_common.cc in Sources */, - D159BCC717C2286F0030FAB6 /* scale.cc in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1BB6FAA150E9E7100EF9400 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - D16775AC155C501D0050EC64 /* TheoraAsync.cpp in Sources */, - D1BCE06018F3F7FE00C83470 /* scale_common.cc in Sources */, - D16775AE155C501D0050EC64 /* TheoraAudioInterface.cpp in Sources */, - D16775B0155C501D0050EC64 /* TheoraDataSource.cpp in Sources */, - D16775B2155C501D0050EC64 /* TheoraException.cpp in Sources */, - D16775B4155C501D0050EC64 /* TheoraFrameQueue.cpp in Sources */, - D16775B6155C501D0050EC64 /* TheoraTimer.cpp in Sources */, - D16775B8155C501D0050EC64 /* TheoraUtil.cpp in Sources */, - D16775BA155C501D0050EC64 /* TheoraVideoClip.cpp in Sources */, - D16775BC155C501D0050EC64 /* TheoraVideoFrame.cpp in Sources */, - D16775BE155C501D0050EC64 /* TheoraVideoManager.cpp in Sources */, - D16775C0155C501D0050EC64 /* TheoraWorkerThread.cpp in Sources */, - D1BCE06918F3F7FE00C83470 /* scale_posix.cc in Sources */, - D1CDFF981696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */, - D1E2719C16B46F640046C00C /* yuv420_grey_c.c in Sources */, - D1E271A816B46F640046C00C /* yuv420_yuv_c.c in Sources */, - D1E271AF16B470210046C00C /* yuv420_rgb_c.c in Sources */, - D139463317C0ED450091F4A4 /* yuv_libyuv.c in Sources */, - D1D465DD16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */, - D13946D217C119B40091F4A4 /* yuv_util.c in Sources */, - D1C3D07817C157CD00CA0FD2 /* compare_common.cc in Sources */, - D1C3D08117C157CD00CA0FD2 /* compare_neon.cc in Sources */, - D1C3D09C17C157CD00CA0FD2 /* compare.cc in Sources */, - D1C3D0A517C157CD00CA0FD2 /* convert_argb.cc in Sources */, - D1C3D0C917C157CD00CA0FD2 /* convert_to_argb.cc in Sources */, - D1C3D0D217C157CD00CA0FD2 /* convert_to_i420.cc in Sources */, - D1C3D0DB17C157CD00CA0FD2 /* convert.cc in Sources */, - D1C3D0E417C157CD00CA0FD2 /* cpu_id.cc in Sources */, - D1C3D0ED17C157CD00CA0FD2 /* format_conversion.cc in Sources */, - D1C3D10817C157CD00CA0FD2 /* planar_functions.cc in Sources */, - D1C3D12317C157CD00CA0FD2 /* rotate_neon.cc in Sources */, - D1C3D12C17C157CD00CA0FD2 /* rotate.cc in Sources */, - D1C3D13517C157CD00CA0FD2 /* row_any.cc in Sources */, - D1C3D13E17C157CD00CA0FD2 /* row_common.cc in Sources */, - D1C3D15017C157CD00CA0FD2 /* row_neon.cc in Sources */, - D1C3D17417C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */, - D1C3D17D17C157CD00CA0FD2 /* scale_argb.cc in Sources */, - D1C3D18F17C157CD00CA0FD2 /* scale_neon.cc in Sources */, - D1C3D1A117C157CD00CA0FD2 /* video_common.cc in Sources */, - D159BCB617C227F60030FAB6 /* convert_from.cc in Sources */, - D159BCBF17C228340030FAB6 /* rotate_argb.cc in Sources */, - D159BCC817C2286F0030FAB6 /* scale.cc in Sources */, - D15D361017C386A600F40439 /* row_posix.cc in Sources */, - D15D361317C386B100F40439 /* compare_posix.cc in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFF231696C77A00609AB0 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CDFF241696C77A00609AB0 /* TheoraAsync.cpp in Sources */, - D1CDFF251696C77A00609AB0 /* TheoraAudioInterface.cpp in Sources */, - D1CDFF261696C77A00609AB0 /* TheoraDataSource.cpp in Sources */, - D1CDFF271696C77A00609AB0 /* TheoraException.cpp in Sources */, - D1CDFF281696C77A00609AB0 /* TheoraFrameQueue.cpp in Sources */, - D1CDFF291696C77A00609AB0 /* TheoraTimer.cpp in Sources */, - D1CDFF2A1696C77A00609AB0 /* TheoraUtil.cpp in Sources */, - D1CDFF2B1696C77A00609AB0 /* TheoraVideoClip.cpp in Sources */, - D1CDFF2C1696C77A00609AB0 /* TheoraVideoFrame.cpp in Sources */, - D1CDFF2D1696C77A00609AB0 /* TheoraVideoManager.cpp in Sources */, - D1CDFF2E1696C77A00609AB0 /* TheoraWorkerThread.cpp in Sources */, - D1CDFF9E1696D0FA00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */, - D1E2719A16B46F640046C00C /* yuv420_grey_c.c in Sources */, - D1E271A616B46F640046C00C /* yuv420_yuv_c.c in Sources */, - D1E271AD16B470210046C00C /* yuv420_rgb_c.c in Sources */, - D1BCE05B18F3F7FE00C83470 /* scale_common.cc in Sources */, - D1D465DB16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */, - D139462E17C0ED450091F4A4 /* yuv_libyuv.c in Sources */, - D13946CD17C119B40091F4A4 /* yuv_util.c in Sources */, - D1C3D07317C157CD00CA0FD2 /* compare_common.cc in Sources */, - D1C3D08517C157CD00CA0FD2 /* compare_posix.cc in Sources */, - D1BCE06418F3F7FE00C83470 /* scale_posix.cc in Sources */, - D1C3D09717C157CD00CA0FD2 /* compare.cc in Sources */, - D1C3D0A017C157CD00CA0FD2 /* convert_argb.cc in Sources */, - D1C3D0C417C157CD00CA0FD2 /* convert_to_argb.cc in Sources */, - D1C3D0CD17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */, - D1C3D0D617C157CD00CA0FD2 /* convert.cc in Sources */, - D1C3D0DF17C157CD00CA0FD2 /* cpu_id.cc in Sources */, - D1C3D0E817C157CD00CA0FD2 /* format_conversion.cc in Sources */, - D1C3D10317C157CD00CA0FD2 /* planar_functions.cc in Sources */, - D1C3D12717C157CD00CA0FD2 /* rotate.cc in Sources */, - D1C3D13017C157CD00CA0FD2 /* row_any.cc in Sources */, - D1C3D13917C157CD00CA0FD2 /* row_common.cc in Sources */, - D1C3D15417C157CD00CA0FD2 /* row_posix.cc in Sources */, - D1C3D17817C157CD00CA0FD2 /* scale_argb.cc in Sources */, - D1C3D19C17C157CD00CA0FD2 /* video_common.cc in Sources */, - D159BCB117C227F40030FAB6 /* convert_from.cc in Sources */, - D159BCBA17C228320030FAB6 /* rotate_argb.cc in Sources */, - D159BCC317C2286D0030FAB6 /* scale.cc in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFF4B1696C79700609AB0 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CDFF4C1696C79700609AB0 /* TheoraAsync.cpp in Sources */, - D1CDFF4D1696C79700609AB0 /* TheoraAudioInterface.cpp in Sources */, - D1CDFF4E1696C79700609AB0 /* TheoraDataSource.cpp in Sources */, - D1CDFF4F1696C79700609AB0 /* TheoraException.cpp in Sources */, - D1CDFF501696C79700609AB0 /* TheoraFrameQueue.cpp in Sources */, - D1CDFF511696C79700609AB0 /* TheoraTimer.cpp in Sources */, - D1CDFF521696C79700609AB0 /* TheoraUtil.cpp in Sources */, - D1CDFF531696C79700609AB0 /* TheoraVideoClip.cpp in Sources */, - D1CDFF541696C79700609AB0 /* TheoraVideoFrame.cpp in Sources */, - D1CDFF551696C79700609AB0 /* TheoraVideoManager.cpp in Sources */, - D1CDFF561696C79700609AB0 /* TheoraWorkerThread.cpp in Sources */, - D1CDFF971696D0F000609AB0 /* TheoraVideoClip_Theora.cpp in Sources */, - D1BCE06518F3F7FE00C83470 /* scale_posix.cc in Sources */, - D1CDFFEC1696E24F00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */, - D1E2719B16B46F640046C00C /* yuv420_grey_c.c in Sources */, - D1E271A716B46F640046C00C /* yuv420_yuv_c.c in Sources */, - D1E271AE16B470210046C00C /* yuv420_rgb_c.c in Sources */, - D1D465DC16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */, - D139462F17C0ED450091F4A4 /* yuv_libyuv.c in Sources */, - D13946CE17C119B40091F4A4 /* yuv_util.c in Sources */, - D1C3D07417C157CD00CA0FD2 /* compare_common.cc in Sources */, - D1C3D08617C157CD00CA0FD2 /* compare_posix.cc in Sources */, - D1C3D09817C157CD00CA0FD2 /* compare.cc in Sources */, - D1C3D0A117C157CD00CA0FD2 /* convert_argb.cc in Sources */, - D1C3D0C517C157CD00CA0FD2 /* convert_to_argb.cc in Sources */, - D1C3D0CE17C157CD00CA0FD2 /* convert_to_i420.cc in Sources */, - D1C3D0D717C157CD00CA0FD2 /* convert.cc in Sources */, - D1C3D0E017C157CD00CA0FD2 /* cpu_id.cc in Sources */, - D1C3D0E917C157CD00CA0FD2 /* format_conversion.cc in Sources */, - D1C3D10417C157CD00CA0FD2 /* planar_functions.cc in Sources */, - D1C3D12817C157CD00CA0FD2 /* rotate.cc in Sources */, - D1C3D13117C157CD00CA0FD2 /* row_any.cc in Sources */, - D1C3D13A17C157CD00CA0FD2 /* row_common.cc in Sources */, - D1C3D15517C157CD00CA0FD2 /* row_posix.cc in Sources */, - D1C3D17917C157CD00CA0FD2 /* scale_argb.cc in Sources */, - D1C3D19D17C157CD00CA0FD2 /* video_common.cc in Sources */, - D159BCB217C227F40030FAB6 /* convert_from.cc in Sources */, - D159BCBB17C228320030FAB6 /* rotate_argb.cc in Sources */, - D1BCE05C18F3F7FE00C83470 /* scale_common.cc in Sources */, - D159BCC417C2286D0030FAB6 /* scale.cc in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFFA11696E1CA00609AB0 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CDFFA21696E1CA00609AB0 /* TheoraAsync.cpp in Sources */, - D1BCE06118F3F7FE00C83470 /* scale_common.cc in Sources */, - D1CDFFA31696E1CA00609AB0 /* TheoraAudioInterface.cpp in Sources */, - D1CDFFA41696E1CA00609AB0 /* TheoraDataSource.cpp in Sources */, - D1CDFFA51696E1CA00609AB0 /* TheoraException.cpp in Sources */, - D1CDFFA61696E1CA00609AB0 /* TheoraFrameQueue.cpp in Sources */, - D1CDFFA71696E1CA00609AB0 /* TheoraTimer.cpp in Sources */, - D1CDFFA81696E1CA00609AB0 /* TheoraUtil.cpp in Sources */, - D1CDFFA91696E1CA00609AB0 /* TheoraVideoClip.cpp in Sources */, - D1CDFFAA1696E1CA00609AB0 /* TheoraVideoFrame.cpp in Sources */, - D1CDFFAB1696E1CA00609AB0 /* TheoraVideoManager.cpp in Sources */, - D1CDFFAC1696E1CA00609AB0 /* TheoraWorkerThread.cpp in Sources */, - D1BCE06A18F3F7FE00C83470 /* scale_posix.cc in Sources */, - D1CDFFEA1696E24B00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */, - D1E2719D16B46F640046C00C /* yuv420_grey_c.c in Sources */, - D1E271A916B46F640046C00C /* yuv420_yuv_c.c in Sources */, - D1E271B016B470210046C00C /* yuv420_rgb_c.c in Sources */, - D139463417C0ED450091F4A4 /* yuv_libyuv.c in Sources */, - D1D465DE16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */, - D13946D317C119B40091F4A4 /* yuv_util.c in Sources */, - D1C3D07917C157CD00CA0FD2 /* compare_common.cc in Sources */, - D1C3D08217C157CD00CA0FD2 /* compare_neon.cc in Sources */, - D1C3D09D17C157CD00CA0FD2 /* compare.cc in Sources */, - D1C3D0A617C157CD00CA0FD2 /* convert_argb.cc in Sources */, - D1C3D0CA17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */, - D1C3D0D317C157CD00CA0FD2 /* convert_to_i420.cc in Sources */, - D1C3D0DC17C157CD00CA0FD2 /* convert.cc in Sources */, - D1C3D0E517C157CD00CA0FD2 /* cpu_id.cc in Sources */, - D1C3D0EE17C157CD00CA0FD2 /* format_conversion.cc in Sources */, - D1C3D10917C157CD00CA0FD2 /* planar_functions.cc in Sources */, - D1C3D12417C157CD00CA0FD2 /* rotate_neon.cc in Sources */, - D1C3D12D17C157CD00CA0FD2 /* rotate.cc in Sources */, - D1C3D13617C157CD00CA0FD2 /* row_any.cc in Sources */, - D1C3D13F17C157CD00CA0FD2 /* row_common.cc in Sources */, - D1C3D15117C157CD00CA0FD2 /* row_neon.cc in Sources */, - D1C3D17517C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */, - D1C3D17E17C157CD00CA0FD2 /* scale_argb.cc in Sources */, - D1C3D19017C157CD00CA0FD2 /* scale_neon.cc in Sources */, - D1C3D1A217C157CD00CA0FD2 /* video_common.cc in Sources */, - D159BCB717C227F60030FAB6 /* convert_from.cc in Sources */, - D159BCC017C228340030FAB6 /* rotate_argb.cc in Sources */, - D159BCC917C2286F0030FAB6 /* scale.cc in Sources */, - D15D361117C386A600F40439 /* row_posix.cc in Sources */, - D15D361617C386B400F40439 /* compare_posix.cc in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; - D1CDFFC61696E1D700609AB0 /* Sources */ = { - isa = PBXSourcesBuildPhase; - buildActionMask = 2147483647; - files = ( - D1CDFFC71696E1D700609AB0 /* TheoraAsync.cpp in Sources */, - D1CDFFC81696E1D700609AB0 /* TheoraAudioInterface.cpp in Sources */, - D1CDFFC91696E1D700609AB0 /* TheoraDataSource.cpp in Sources */, - D1CDFFCA1696E1D700609AB0 /* TheoraException.cpp in Sources */, - D1CDFFCB1696E1D700609AB0 /* TheoraFrameQueue.cpp in Sources */, - D1CDFFCC1696E1D700609AB0 /* TheoraTimer.cpp in Sources */, - D1CDFFCD1696E1D700609AB0 /* TheoraUtil.cpp in Sources */, - D1CDFFCE1696E1D700609AB0 /* TheoraVideoClip.cpp in Sources */, - D1CDFFCF1696E1D700609AB0 /* TheoraVideoFrame.cpp in Sources */, - D1CDFFD01696E1D700609AB0 /* TheoraVideoManager.cpp in Sources */, - D1CDFFD11696E1D700609AB0 /* TheoraWorkerThread.cpp in Sources */, - D1CDFFD21696E1D700609AB0 /* TheoraVideoClip_Theora.cpp in Sources */, - D1CDFFEB1696E24C00609AB0 /* TheoraVideoClip_AVFoundation.mm in Sources */, - D1E2719E16B46F640046C00C /* yuv420_grey_c.c in Sources */, - D1E271AA16B46F640046C00C /* yuv420_yuv_c.c in Sources */, - D1E271B116B470210046C00C /* yuv420_rgb_c.c in Sources */, - D13946C617C110670091F4A4 /* yuv_libyuv.c in Sources */, - D1D465DF16C2D070007A45AA /* TheoraAudioPacketQueue.cpp in Sources */, - D13946D417C119B40091F4A4 /* yuv_util.c in Sources */, - D1C3D07A17C157CD00CA0FD2 /* compare_common.cc in Sources */, - D1C3D08317C157CD00CA0FD2 /* compare_neon.cc in Sources */, - D1C3D09E17C157CD00CA0FD2 /* compare.cc in Sources */, - D1C3D0A717C157CD00CA0FD2 /* convert_argb.cc in Sources */, - D1C3D0CB17C157CD00CA0FD2 /* convert_to_argb.cc in Sources */, - D1C3D0D417C157CD00CA0FD2 /* convert_to_i420.cc in Sources */, - D1C3D0DD17C157CD00CA0FD2 /* convert.cc in Sources */, - D1C3D0E617C157CD00CA0FD2 /* cpu_id.cc in Sources */, - D1C3D0EF17C157CD00CA0FD2 /* format_conversion.cc in Sources */, - D1C3D10A17C157CD00CA0FD2 /* planar_functions.cc in Sources */, - D1C3D12517C157CD00CA0FD2 /* rotate_neon.cc in Sources */, - D1C3D12E17C157CD00CA0FD2 /* rotate.cc in Sources */, - D1C3D13717C157CD00CA0FD2 /* row_any.cc in Sources */, - D1C3D14017C157CD00CA0FD2 /* row_common.cc in Sources */, - D1C3D15217C157CD00CA0FD2 /* row_neon.cc in Sources */, - D1C3D17617C157CD00CA0FD2 /* scale_argb_neon.cc in Sources */, - D1C3D17F17C157CD00CA0FD2 /* scale_argb.cc in Sources */, - D1C3D19117C157CD00CA0FD2 /* scale_neon.cc in Sources */, - D1C3D1A317C157CD00CA0FD2 /* video_common.cc in Sources */, - D159BCB817C227F70030FAB6 /* convert_from.cc in Sources */, - D1BCE06218F3F7FE00C83470 /* scale_common.cc in Sources */, - D159BCC117C228350030FAB6 /* rotate_argb.cc in Sources */, - D159BCCA17C228700030FAB6 /* scale.cc in Sources */, - D1BCE06B18F3F7FE00C83470 /* scale_posix.cc in Sources */, - D15D361217C386A700F40439 /* row_posix.cc in Sources */, - D15D361517C386B300F40439 /* compare_posix.cc in Sources */, - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXSourcesBuildPhase section */ - -/* Begin XCBuildConfiguration section */ - D1473F43150CA6CE00B20490 /* Debug */ = { - isa = XCBuildConfiguration; - buildSettings = { - ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_WARN_BOOL_CONVERSION = YES; - CLANG_WARN_CONSTANT_CONVERSION = YES; - CLANG_WARN_EMPTY_BODY = YES; - CLANG_WARN_ENUM_CONVERSION = YES; - CLANG_WARN_INT_CONVERSION = YES; - CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_OBJC_EXCEPTIONS = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_INLINES_ARE_PRIVATE_EXTERN = YES; - GCC_OPTIMIZATION_LEVEL = 0; - GCC_PREPROCESSOR_DEFINITIONS = ( - "DEBUG=1", - _DEBUG, - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphoneos*]" = ( - "$(LIBYUV_PREPROCESSOR_IOS)", - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphonesimulator*]" = "$(inherited)"; - "GCC_PREPROCESSOR_DEFINITIONS[sdk=macosx*]" = ( - "$(LIBYUV_PREPROCESSOR_MAC)", - "$(inherited)", - ); - GCC_SYMBOLS_PRIVATE_EXTERN = YES; - GCC_WARN_64_TO_32_BIT_CONVERSION = YES; - GCC_WARN_ABOUT_RETURN_TYPE = YES; - GCC_WARN_UNDECLARED_SELECTOR = YES; - GCC_WARN_UNINITIALIZED_AUTOS = YES; - GCC_WARN_UNUSED_FUNCTION = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - HEADER_SEARCH_PATHS = ( - "$(SRCROOT)/../ogg/include", - "$(SRCROOT)/../vorbis/include", - "$(SRCROOT)/../xal/lib/ogg/include", - "$(SRCROOT)/../xal/lib/vorbis/include", - "$(SRCROOT)/../theora/include", - "$(SRCROOT)/src/YUV/libyuv/include", - ); - LIBYUV_PREPROCESSOR_IOS = "LIBYUV_NEON __ARM_NEON__"; - LIBYUV_PREPROCESSOR_MAC = __SSSE3__; - ONLY_ACTIVE_ARCH = YES; - SKIP_INSTALL = YES; - }; - name = Debug; - }; - D1473F44150CA6CE00B20490 /* Debug */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - __THEORA, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - INFOPLIST_FILE = Info.plist; - LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)"; - PRODUCT_NAME = theoraplayer; - WRAPPER_EXTENSION = framework; - }; - name = Debug; - }; - D1473F45150CA6D600B20490 /* App Store */ = { - isa = XCBuildConfiguration; - buildSettings = { - ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_WARN_BOOL_CONVERSION = YES; - CLANG_WARN_CONSTANT_CONVERSION = YES; - CLANG_WARN_EMPTY_BODY = YES; - CLANG_WARN_ENUM_CONVERSION = YES; - CLANG_WARN_INT_CONVERSION = YES; - CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - COPY_PHASE_STRIP = YES; - GCC_ENABLE_OBJC_EXCEPTIONS = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_INLINES_ARE_PRIVATE_EXTERN = YES; - GCC_OPTIMIZATION_LEVEL = 3; - "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphoneos*]" = ( - "$(LIBYUV_PREPROCESSOR_IOS)", - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphonesimulator*]" = "$(inherited)"; - "GCC_PREPROCESSOR_DEFINITIONS[sdk=macosx*]" = ( - "$(LIBYUV_PREPROCESSOR_MAC)", - "$(inherited)", - ); - GCC_SYMBOLS_PRIVATE_EXTERN = YES; - GCC_WARN_64_TO_32_BIT_CONVERSION = YES; - GCC_WARN_ABOUT_RETURN_TYPE = YES; - GCC_WARN_UNDECLARED_SELECTOR = YES; - GCC_WARN_UNINITIALIZED_AUTOS = YES; - GCC_WARN_UNUSED_FUNCTION = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - HEADER_SEARCH_PATHS = ( - "$(SRCROOT)/../ogg/include", - "$(SRCROOT)/../vorbis/include", - "$(SRCROOT)/../xal/lib/ogg/include", - "$(SRCROOT)/../xal/lib/vorbis/include", - "$(SRCROOT)/../theora/include", - "$(SRCROOT)/src/YUV/libyuv/include", - ); - LIBYUV_PREPROCESSOR_IOS = "LIBYUV_NEON __ARM_NEON__"; - LIBYUV_PREPROCESSOR_MAC = __SSSE3__; - SKIP_INSTALL = YES; - }; - name = "App Store"; - }; - D1473F46150CA6D600B20490 /* App Store */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - __THEORA, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - INFOPLIST_FILE = Info.plist; - LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)"; - PRODUCT_NAME = theoraplayer; - WRAPPER_EXTENSION = framework; - }; - name = "App Store"; - }; - D1473F47150CA6E200B20490 /* Release */ = { - isa = XCBuildConfiguration; - buildSettings = { - ALWAYS_SEARCH_USER_PATHS = NO; - CLANG_WARN_BOOL_CONVERSION = YES; - CLANG_WARN_CONSTANT_CONVERSION = YES; - CLANG_WARN_EMPTY_BODY = YES; - CLANG_WARN_ENUM_CONVERSION = YES; - CLANG_WARN_INT_CONVERSION = YES; - CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - COPY_PHASE_STRIP = YES; - GCC_ENABLE_OBJC_EXCEPTIONS = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_INLINES_ARE_PRIVATE_EXTERN = YES; - GCC_OPTIMIZATION_LEVEL = 3; - "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphoneos*]" = ( - "$(LIBYUV_PREPROCESSOR_IOS)", - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[sdk=iphonesimulator*]" = "$(inherited)"; - "GCC_PREPROCESSOR_DEFINITIONS[sdk=macosx*]" = ( - "$(LIBYUV_PREPROCESSOR_MAC)", - "$(inherited)", - ); - GCC_SYMBOLS_PRIVATE_EXTERN = YES; - GCC_WARN_64_TO_32_BIT_CONVERSION = YES; - GCC_WARN_ABOUT_RETURN_TYPE = YES; - GCC_WARN_UNDECLARED_SELECTOR = YES; - GCC_WARN_UNINITIALIZED_AUTOS = YES; - GCC_WARN_UNUSED_FUNCTION = YES; - GCC_WARN_UNUSED_VARIABLE = YES; - HEADER_SEARCH_PATHS = ( - "$(SRCROOT)/../ogg/include", - "$(SRCROOT)/../vorbis/include", - "$(SRCROOT)/../xal/lib/ogg/include", - "$(SRCROOT)/../xal/lib/vorbis/include", - "$(SRCROOT)/../theora/include", - "$(SRCROOT)/src/YUV/libyuv/include", - ); - LIBYUV_PREPROCESSOR_IOS = "LIBYUV_NEON __ARM_NEON__"; - LIBYUV_PREPROCESSOR_MAC = __SSSE3__; - SKIP_INSTALL = YES; - }; - name = Release; - }; - D1473F48150CA6E200B20490 /* Release */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - __THEORA, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - INFOPLIST_FILE = Info.plist; - LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)"; - PRODUCT_NAME = theoraplayer; - WRAPPER_EXTENSION = framework; - }; - name = Release; - }; - D198F977177A31FC002942E3 /* Debug */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _MAC, - __THEORA, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = Debug; - }; - D198F979177A31FC002942E3 /* Release */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _MAC, - __THEORA, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = Release; - }; - D198F97A177A31FC002942E3 /* App Store */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _MAC, - __THEORA, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = "App Store"; - }; - D198F9A3177A31FE002942E3 /* Debug */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _MAC, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer_avfoundation; - SKIP_INSTALL = YES; - }; - name = Debug; - }; - D198F9A5177A31FE002942E3 /* Release */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _MAC, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer_avfoundation; - SKIP_INSTALL = YES; - }; - name = Release; - }; - D198F9A6177A31FE002942E3 /* App Store */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _MAC, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer_avfoundation; - SKIP_INSTALL = YES; - }; - name = "App Store"; - }; - D198F9D0177A3200002942E3 /* Debug */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _MAC, - __THEORA, - __AVFOUNDATION, - YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer_theora_avfoundation; - SKIP_INSTALL = YES; - }; - name = Debug; - }; - D198F9D2177A3200002942E3 /* Release */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _MAC, - __THEORA, - __AVFOUNDATION, - YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer_theora_avfoundation; - SKIP_INSTALL = YES; - }; - name = Release; - }; - D198F9D3177A3200002942E3 /* App Store */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _MAC, - __THEORA, - __AVFOUNDATION, - YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer_theora_avfoundation; - SKIP_INSTALL = YES; - }; - name = "App Store"; - }; - D1BB6FB8150E9E7100EF9400 /* Debug */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _IOS, - __THEORA, - _YUV_LIBYUV, - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON; - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = Debug; - }; - D1BB6FBA150E9E7100EF9400 /* Release */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _IOS, - __THEORA, - _YUV_LIBYUV, - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON; - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = Release; - }; - D1BB6FBB150E9E7100EF9400 /* App Store */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _IOS, - __THEORA, - _YUV_LIBYUV, - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON; - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = "App Store"; - }; - D1CDFF441696C77A00609AB0 /* Debug */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - INFOPLIST_FILE = Info.plist; - LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)"; - PRODUCT_NAME = theoraplayer; - WRAPPER_EXTENSION = framework; - }; - name = Debug; - }; - D1CDFF461696C77A00609AB0 /* Release */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - INFOPLIST_FILE = Info.plist; - LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)"; - PRODUCT_NAME = theoraplayer; - WRAPPER_EXTENSION = framework; - }; - name = Release; - }; - D1CDFF471696C77A00609AB0 /* App Store */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - INFOPLIST_FILE = Info.plist; - LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)"; - PRODUCT_NAME = theoraplayer; - WRAPPER_EXTENSION = framework; - }; - name = "App Store"; - }; - D1CDFF6C1696C79700609AB0 /* Debug */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - __THEORA, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - INFOPLIST_FILE = Info.plist; - LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)"; - PRODUCT_NAME = theoraplayer; - WRAPPER_EXTENSION = framework; - }; - name = Debug; - }; - D1CDFF6E1696C79700609AB0 /* Release */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - __THEORA, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - INFOPLIST_FILE = Info.plist; - LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)"; - PRODUCT_NAME = theoraplayer; - WRAPPER_EXTENSION = framework; - }; - name = Release; - }; - D1CDFF6F1696C79700609AB0 /* App Store */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC418D7777800A36FDC /* Mac.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - __THEORA, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - INFOPLIST_FILE = Info.plist; - LD_DYLIB_INSTALL_NAME = "@executable_path/../Frameworks/$(EXECUTABLE_PATH)"; - PRODUCT_NAME = theoraplayer; - WRAPPER_EXTENSION = framework; - }; - name = "App Store"; - }; - D1CDFFC01696E1CA00609AB0 /* Debug */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _IOS, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON; - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = Debug; - }; - D1CDFFC21696E1CA00609AB0 /* Release */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _IOS, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON; - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = Release; - }; - D1CDFFC31696E1CA00609AB0 /* App Store */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _IOS, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON; - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = "App Store"; - }; - D1CDFFE51696E1D700609AB0 /* Debug */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _IOS, - __THEORA, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON; - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = Debug; - }; - D1CDFFE71696E1D700609AB0 /* Release */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _IOS, - __THEORA, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON; - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = Release; - }; - D1CDFFE81696E1D700609AB0 /* App Store */ = { - isa = XCBuildConfiguration; - baseConfigurationReference = D1358BC318D7777800A36FDC /* iOS.xcconfig */; - buildSettings = { - GCC_PREPROCESSOR_DEFINITIONS = ( - _IOS, - __THEORA, - __AVFOUNDATION, - _YUV_LIBYUV, - "$(inherited)", - ); - "GCC_PREPROCESSOR_DEFINITIONS[arch=arm64]" = LIBYUV_DISABLE_NEON; - GCC_WARN_UNINITIALIZED_AUTOS = NO; - GCC_WARN_UNUSED_VALUE = NO; - GCC_WARN_UNUSED_VARIABLE = NO; - PRODUCT_NAME = theoraplayer; - SKIP_INSTALL = YES; - }; - name = "App Store"; - }; -/* End XCBuildConfiguration section */ - -/* Begin XCConfigurationList section */ - D1473F23150CA69B00B20490 /* Build configuration list for PBXProject "theoraplayer" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - D1473F43150CA6CE00B20490 /* Debug */, - D1473F47150CA6E200B20490 /* Release */, - D1473F45150CA6D600B20490 /* App Store */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; - D1473F3F150CA69B00B20490 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora)" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - D1473F44150CA6CE00B20490 /* Debug */, - D1473F48150CA6E200B20490 /* Release */, - D1473F46150CA6D600B20490 /* App Store */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; - D198F975177A31FC002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora)" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - D198F977177A31FC002942E3 /* Debug */, - D198F979177A31FC002942E3 /* Release */, - D198F97A177A31FC002942E3 /* App Store */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; - D198F9A1177A31FE002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac AVFoundation)" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - D198F9A3177A31FE002942E3 /* Debug */, - D198F9A5177A31FE002942E3 /* Release */, - D198F9A6177A31FE002942E3 /* App Store */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; - D198F9CE177A3200002942E3 /* Build configuration list for PBXNativeTarget "theoraplayer (Mac Theora AVFoundation)" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - D198F9D0177A3200002942E3 /* Debug */, - D198F9D2177A3200002942E3 /* Release */, - D198F9D3177A3200002942E3 /* App Store */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; - D1BB6FBC150E9E7100EF9400 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora)" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - D1BB6FB8150E9E7100EF9400 /* Debug */, - D1BB6FBA150E9E7100EF9400 /* Release */, - D1BB6FBB150E9E7100EF9400 /* App Store */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; - D1CDFF421696C77A00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (AVFoundation)" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - D1CDFF441696C77A00609AB0 /* Debug */, - D1CDFF461696C77A00609AB0 /* Release */, - D1CDFF471696C77A00609AB0 /* App Store */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; - D1CDFF6A1696C79700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (Theora AVFoundation)" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - D1CDFF6C1696C79700609AB0 /* Debug */, - D1CDFF6E1696C79700609AB0 /* Release */, - D1CDFF6F1696C79700609AB0 /* App Store */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; - D1CDFFBE1696E1CA00609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS AVFoundation)" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - D1CDFFC01696E1CA00609AB0 /* Debug */, - D1CDFFC21696E1CA00609AB0 /* Release */, - D1CDFFC31696E1CA00609AB0 /* App Store */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; - D1CDFFE31696E1D700609AB0 /* Build configuration list for PBXNativeTarget "theoraplayer (iOS Theora AVFoundation)" */ = { - isa = XCConfigurationList; - buildConfigurations = ( - D1CDFFE51696E1D700609AB0 /* Debug */, - D1CDFFE71696E1D700609AB0 /* Release */, - D1CDFFE81696E1D700609AB0 /* App Store */, - ); - defaultConfigurationIsVisible = 0; - defaultConfigurationName = Debug; - }; -/* End XCConfigurationList section */ - }; - rootObject = D1473F20150CA69B00B20490 /* Project object */; -} diff --git a/drivers/theoraplayer/video_stream_theoraplayer.cpp b/drivers/theoraplayer/video_stream_theoraplayer.cpp deleted file mode 100644 index 876cac3425..0000000000 --- a/drivers/theoraplayer/video_stream_theoraplayer.cpp +++ /dev/null @@ -1,556 +0,0 @@ -/*************************************************************************/ -/* video_stream.cpp */ -/*************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* http://www.godotengine.org */ -/*************************************************************************/ -/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/*************************************************************************/ -#include "video_stream_theoraplayer.h" - -#include "core/os/file_access.h" - -#include "include/theoraplayer/TheoraPlayer.h" -#include "include/theoraplayer/TheoraTimer.h" -#include "include/theoraplayer/TheoraAudioInterface.h" -#include "include/theoraplayer/TheoraDataSource.h" -#include "include/theoraplayer/TheoraException.h" - -#include "core/ring_buffer.h" -#include "core/os/thread_safe.h" - -#include "core/globals.h" - -static TheoraVideoManager* mgr = NULL; - -class TPDataFA : public TheoraDataSource { - - FileAccess* fa; - String data_name; - -public: - - int read(void* output,int nBytes) { - - if (!fa) - return -1; - - return fa->get_buffer((uint8_t*)output, nBytes); - }; - - //! returns a string representation of the DataSource, eg 'File: source.ogg' - virtual std::string repr() { - return data_name.utf8().get_data(); - }; - - //! position the source pointer to byte_index from the start of the source - virtual void seek(unsigned long byte_index) { - - if (!fa) - return; - - fa->seek(byte_index); - }; - - - //! return the size of the stream in bytes - virtual unsigned long size() { - - if (!fa) - return 0; - - return fa->get_len(); - }; - - //! return the current position of the source pointer - virtual unsigned long tell() { - - if (!fa) - return 0; - - return fa->get_pos(); - }; - - TPDataFA(const String& p_path) { - - fa = FileAccess::open(p_path, FileAccess::READ); - data_name = "File: " + p_path; - }; - - TPDataFA(FileAccess* p_fa, const String& p_path) { - - fa = p_fa; - data_name = "File: " + p_path; - }; - - ~TPDataFA() { - - if (fa) - memdelete(fa); - }; -}; - -class AudioStreamInput : public AudioStreamResampled { - - _THREAD_SAFE_CLASS_; - - int channels; - int freq; - - RID stream_rid; - mutable RingBuffer<float> rb; - int rb_power; - int total_wrote; - bool playing; - bool paused; - -public: - - virtual void play() { - - _THREAD_SAFE_METHOD_ - _setup(channels, freq, 256); - stream_rid=AudioServer::get_singleton()->audio_stream_create(get_audio_stream()); - AudioServer::get_singleton()->stream_set_active(stream_rid,true); - AudioServer::get_singleton()->stream_set_volume_scale(stream_rid,1); - playing = true; - paused = false; - }; - virtual void stop() { - - _THREAD_SAFE_METHOD_ - - AudioServer::get_singleton()->stream_set_active(stream_rid,false); - //_clear_stream(); - playing=false; - _clear(); - }; - - virtual bool is_playing() const { return true; }; - - virtual void set_paused(bool p_paused) { paused = p_paused; }; - virtual bool is_paused(bool p_paused) const { return paused; }; - - virtual void set_loop(bool p_enable) {}; - virtual bool has_loop() const { return false; }; - - virtual float get_length() const { return 0; }; - - virtual String get_stream_name() const { return "Theora Audio Stream"; }; - - virtual int get_loop_count() const { return 1; }; - - virtual float get_pos() const { return 0; }; - virtual void seek_pos(float p_time) {}; - - virtual UpdateMode get_update_mode() const { return UPDATE_THREAD; }; - - virtual bool _can_mix() const { return true; }; - - void input(float* p_data, int p_samples) { - - - _THREAD_SAFE_METHOD_; - //printf("input %i samples from %p\n", p_samples, p_data); - if (rb.space_left() < p_samples) { - rb_power += 1; - rb.resize(rb_power); - } - rb.write(p_data, p_samples); - - update(); //update too here for less latency - }; - - void update() { - - _THREAD_SAFE_METHOD_; - int todo = get_todo(); - int16_t* buffer = get_write_buffer(); - int frames = rb.data_left()/channels; - const int to_write = MIN(todo, frames); - - for (int i=0; i<to_write*channels; i++) { - - int v = rb.read() * 32767; - int16_t sample = CLAMP(v,-32768,32767); - buffer[i] = sample; - }; - write(to_write); - total_wrote += to_write; - }; - - int get_pending() const { - return rb.data_left(); - }; - - int get_total_wrote() { - - return total_wrote - (get_total() - get_todo()); - }; - - AudioStreamInput(int p_channels, int p_freq) { - - playing = false; - paused = true; - channels = p_channels; - freq = p_freq; - total_wrote = 0; - rb_power = 22; - rb.resize(rb_power); - }; - - ~AudioStreamInput() { - - stop(); - }; -}; - -class TPAudioGodot : public TheoraAudioInterface, TheoraTimer { - - Ref<AudioStreamInput> stream; - int sample_count; - int channels; - int freq; - -public: - - void insertData(float* data, int nSamples) { - - stream->input(data, nSamples); - }; - - TPAudioGodot(TheoraVideoClip* owner, int nChannels, int p_freq) - : TheoraAudioInterface(owner, nChannels, p_freq), TheoraTimer() { - - printf("***************** audio interface constructor freq %i\n", p_freq); - channels = nChannels; - freq = p_freq; - stream = Ref<AudioStreamInput>(memnew(AudioStreamInput(nChannels, p_freq))); - stream->play(); - sample_count = 0; - owner->setTimer(this); - }; - - void stop() { - - stream->stop(); - }; - - void update(float time_increase) - { - float prev_time = mTime; - //mTime = (float)(stream->get_total_wrote()) / freq; - //mTime = MAX(0,mTime-AudioServer::get_singleton()->get_output_delay()); - //mTime = (float)sample_count / channels / freq; - mTime += time_increase; - if (mTime - prev_time > .02) printf("time increase %f secs\n", mTime - prev_time); - //float duration=mClip->getDuration(); - //if (mTime > duration) mTime=duration; - //printf("time at timer is %f, %f, samples %i\n", mTime, time_increase, sample_count); - } -}; - -class TPAudioGodotFactory : public TheoraAudioInterfaceFactory { - -public: - TheoraAudioInterface* createInstance(TheoraVideoClip* owner, int nChannels, int freq) { - - printf("************** creating audio output\n"); - TheoraAudioInterface* ta = new TPAudioGodot(owner, nChannels, freq); - return ta; - }; -}; - -static TPAudioGodotFactory* audio_factory = NULL; - -void VideoStreamTheoraplayer::stop() { - - playing = false; - if (clip) { - clip->stop(); - clip->seek(0); - }; - started = true; -}; - -void VideoStreamTheoraplayer::play() { - if (clip) - playing = true; -}; - -bool VideoStreamTheoraplayer::is_playing() const { - - return playing; -}; - -void VideoStreamTheoraplayer::set_paused(bool p_paused) { - - paused = p_paused; - if (paused) { - clip->pause(); - } else { - if (clip && playing && !started) - clip->play(); - } -}; - -bool VideoStreamTheoraplayer::is_paused(bool p_paused) const { - - return !playing; -}; - -void VideoStreamTheoraplayer::set_loop(bool p_enable) { - - loop = p_enable; -}; - -bool VideoStreamTheoraplayer::has_loop() const { - - return loop; -}; - -float VideoStreamTheoraplayer::get_length() const { - - if (!clip) - return 0; - - return clip->getDuration(); -}; - - -float VideoStreamTheoraplayer::get_pos() const { - - if (!clip) - return 0; - - return clip->getTimer()->getTime(); -}; - -void VideoStreamTheoraplayer::seek_pos(float p_time) { - - if (!clip) - return; - - clip->seek(p_time); -}; - -int VideoStreamTheoraplayer::get_pending_frame_count() const { - - if (!clip) - return 0; - - TheoraVideoFrame* f = clip->getNextFrame(); - return f ? 1 : 0; -}; - - -void VideoStreamTheoraplayer::pop_frame(Ref<ImageTexture> p_tex) { - - if (!clip) - return; - - TheoraVideoFrame* f = clip->getNextFrame(); - if (!f) { - return; - }; - -#ifdef GLES2_ENABLED -// RasterizerGLES2* r = RasterizerGLES2::get_singleton(); -// r->_texture_set_data(p_tex, f->mBpp == 3 ? Image::Format_RGB : Image::Format_RGBA, f->mBpp, w, h, f->getBuffer()); - -#endif - - float w=clip->getWidth(),h=clip->getHeight(); - int imgsize = w * h * f->mBpp; - - int size = f->getStride() * f->getHeight() * f->mBpp; - data.resize(imgsize); - { - DVector<uint8_t>::Write wr = data.write(); - uint8_t* ptr = wr.ptr(); - copymem(ptr, f->getBuffer(), imgsize); - } - /* - for (int i=0; i<h; i++) { - int dstofs = i * w * f->mBpp; - int srcofs = i * f->getStride() * f->mBpp; - copymem(ptr + dstofs, f->getBuffer() + dstofs, w * f->mBpp); - }; - */ - Image frame = Image(); - frame.create(w, h, 0, f->mBpp == 3 ? Image::FORMAT_RGB : Image::FORMAT_RGBA, data); - - clip->popFrame(); - - if (p_tex->get_width() == 0) { - p_tex->create(frame.get_width(),frame.get_height(),frame.get_format(),Texture::FLAG_VIDEO_SURFACE|Texture::FLAG_FILTER); - p_tex->set_data(frame); - } else { - - p_tex->set_data(frame); - }; -}; - -/* -Image VideoStreamTheoraplayer::pop_frame() { - - Image ret = frame; - frame = Image(); - return ret; -}; -*/ - -Image VideoStreamTheoraplayer::peek_frame() const { - - return Image(); -}; - -void VideoStreamTheoraplayer::update(float p_time) { - - if (!mgr) - return; - - if (!clip) - return; - - if (!playing || paused) - return; - - //printf("video update!\n"); - if (started) { - if (clip->getNumReadyFrames() < 2) { - printf("frames not ready, returning!\n"); - return; - }; - started = false; - //printf("playing clip!\n"); - clip->play(); - } else if (clip->isDone()) { - playing = false; - }; - - mgr->update(p_time); -}; - - -void VideoStreamTheoraplayer::set_audio_track(int p_idx) { - audio_track=p_idx; - if (clip) - clip->set_audio_track(audio_track); -} - -void VideoStreamTheoraplayer::set_file(const String& p_file) { - - FileAccess* f = FileAccess::open(p_file, FileAccess::READ); - if (!f || !f->is_open()) - return; - - if (!audio_factory) { - audio_factory = memnew(TPAudioGodotFactory); - }; - - if (mgr == NULL) { - mgr = memnew(TheoraVideoManager); - mgr->setAudioInterfaceFactory(audio_factory); - }; - - int track = GLOBAL_DEF("theora/audio_track", 0); // hack - - if (p_file.find(".mp4") != -1) { - - std::string file = p_file.replace("res://", "").utf8().get_data(); - clip = mgr->createVideoClip(file, TH_RGBX, 2, false, track); - //clip->set_audio_track(audio_track); - memdelete(f); - - } else { - - TheoraDataSource* ds = memnew(TPDataFA(f, p_file)); - - try { - clip = mgr->createVideoClip(ds); - clip->set_audio_track(audio_track); - } catch (_TheoraGenericException e) { - printf("exception ocurred! %s\n", e.repr().c_str()); - clip = NULL; - }; - }; - - clip->pause(); - started = true; -}; - -VideoStreamTheoraplayer::~VideoStreamTheoraplayer() { - - stop(); - //if (mgr) { // this should be a singleton or static or something - // memdelete(mgr); - //}; - //mgr = NULL; - if (clip) { - mgr->destroyVideoClip(clip); - clip = NULL; - }; -}; - -VideoStreamTheoraplayer::VideoStreamTheoraplayer() { - - //mgr = NULL; - clip = NULL; - started = false; - playing = false; - paused = false; - loop = false; - audio_track=0; -}; - - -RES ResourceFormatLoaderVideoStreamTheoraplayer::load(const String &p_path, const String& p_original_path, Error *r_error) { - if (r_error) - *r_error=OK; - - VideoStreamTheoraplayer *stream = memnew(VideoStreamTheoraplayer); - stream->set_file(p_path); - return Ref<VideoStreamTheoraplayer>(stream); -} - -void ResourceFormatLoaderVideoStreamTheoraplayer::get_recognized_extensions(List<String> *p_extensions) const { - - p_extensions->push_back("ogm"); - p_extensions->push_back("ogv"); - p_extensions->push_back("mp4"); -} -bool ResourceFormatLoaderVideoStreamTheoraplayer::handles_type(const String& p_type) const { - return p_type=="VideoStream" || p_type == "VideoStreamTheoraplayer"; -} - -String ResourceFormatLoaderVideoStreamTheoraplayer::get_resource_type(const String &p_path) const { - - String exl=p_path.extension().to_lower(); - if (exl=="ogm" || exl=="ogv" || exl=="mp4") - return "VideoStream"; - return ""; -} - - - diff --git a/drivers/theoraplayer/video_stream_theoraplayer.h b/drivers/theoraplayer/video_stream_theoraplayer.h deleted file mode 100644 index 69cae7c4a2..0000000000 --- a/drivers/theoraplayer/video_stream_theoraplayer.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef VIDEO_STREAM_THEORAPLAYER_H -#define VIDEO_STREAM_THEORAPLAYER_H - -#include "scene/resources/video_stream.h" -#include "io/resource_loader.h" -#include "scene/resources/texture.h" - -class TheoraVideoManager; -class TheoraVideoClip; - -class VideoStreamTheoraplayer : public VideoStream { - - OBJ_TYPE(VideoStreamTheoraplayer,VideoStream); - - mutable DVector<uint8_t> data; - TheoraVideoClip* clip; - bool started; - bool playing; - bool loop; - bool paused; - - int audio_track; - -public: - - virtual void stop(); - virtual void play(); - - virtual bool is_playing() const; - - virtual void set_paused(bool p_paused); - virtual bool is_paused(bool p_paused) const; - - virtual void set_loop(bool p_enable); - virtual bool has_loop() const; - - virtual float get_pos() const; - virtual void seek_pos(float p_time); - - virtual float get_length() const; - - virtual int get_pending_frame_count() const; - virtual void pop_frame(Ref<ImageTexture> p_tex); - virtual Image peek_frame() const; - - void update(float p_time); - - void set_file(const String& p_file); - void set_audio_track(int p_idx); - - ~VideoStreamTheoraplayer(); - VideoStreamTheoraplayer(); -}; - -class ResourceFormatLoaderVideoStreamTheoraplayer : public ResourceFormatLoader { -public: - virtual RES load(const String &p_path,const String& p_original_path="",Error *r_error=NULL); - virtual void get_recognized_extensions(List<String> *p_extensions) const; - virtual bool handles_type(const String& p_type) const; - virtual String get_resource_type(const String &p_path) const; - -}; - - -#endif - diff --git a/drivers/unix/file_access_unix.cpp b/drivers/unix/file_access_unix.cpp index 76042089ff..8e70ecc932 100644 --- a/drivers/unix/file_access_unix.cpp +++ b/drivers/unix/file_access_unix.cpp @@ -63,7 +63,7 @@ Error FileAccessUnix::_open(const String& p_path, int p_mode_flags) { fclose(f); f=NULL; - String path=fix_path(p_path); + path=fix_path(p_path); //printf("opening %ls, %i\n", path.c_str(), Memory::get_static_mem_usage()); ERR_FAIL_COND_V(f,ERR_ALREADY_IN_USE); @@ -114,6 +114,9 @@ void FileAccessUnix::close() { return; fclose(f); f = NULL; + if (close_notification_func) { + close_notification_func(path,flags); + } if (save_path!="") { //unlink(save_path.utf8().get_data()); @@ -240,6 +243,7 @@ FileAccess * FileAccessUnix::create_libc() { return memnew( FileAccessUnix ); } +CloseNotificationFunc FileAccessUnix::close_notification_func=NULL; FileAccessUnix::FileAccessUnix() { diff --git a/drivers/unix/file_access_unix.h b/drivers/unix/file_access_unix.h index 5b0f0e7cb7..6c41a51ec5 100644 --- a/drivers/unix/file_access_unix.h +++ b/drivers/unix/file_access_unix.h @@ -38,6 +38,10 @@ /** @author Juan Linietsky <reduzio@gmail.com> */ + + +typedef void (*CloseNotificationFunc)(const String& p_file,int p_flags); + class FileAccessUnix : public FileAccess { FILE *f; @@ -45,10 +49,13 @@ class FileAccessUnix : public FileAccess { void check_errors() const; mutable Error last_error; String save_path; + String path; - static FileAccess* create_libc(); + static FileAccess* create_libc(); public: + static CloseNotificationFunc close_notification_func; + virtual Error _open(const String& p_path, int p_mode_flags); ///< open a file virtual void close(); ///< close a file virtual bool is_open() const; ///< true when file is open diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp index 314e13cee4..96f90e6be1 100644 --- a/drivers/unix/os_unix.cpp +++ b/drivers/unix/os_unix.cpp @@ -226,8 +226,9 @@ uint64_t OS_Unix::get_unix_time() const { uint64_t OS_Unix::get_system_time_msec() const { struct timeval tv_now; gettimeofday(&tv_now, NULL); - localtime(&tv_now.tv_usec); - uint64_t msec = tv_now.tv_usec/1000; + //localtime(&tv_now.tv_usec); + //localtime((const long *)&tv_now.tv_usec); + uint64_t msec = uint64_t(tv_now.tv_sec)*1000+tv_now.tv_usec/1000; return msec; } diff --git a/drivers/vorbis/audio_stream_ogg_vorbis.cpp b/drivers/vorbis/audio_stream_ogg_vorbis.cpp index ee9ba8da4d..ca055c8b62 100644 --- a/drivers/vorbis/audio_stream_ogg_vorbis.cpp +++ b/drivers/vorbis/audio_stream_ogg_vorbis.cpp @@ -30,7 +30,7 @@ -size_t AudioStreamOGGVorbis::_ov_read_func(void *p_dst,size_t p_data, size_t p_count, void *_f) { +size_t AudioStreamPlaybackOGGVorbis::_ov_read_func(void *p_dst,size_t p_data, size_t p_count, void *_f) { //printf("read to %p, %i bytes, %i nmemb, %p\n",p_dst,p_data,p_count,_f); FileAccess *fa=(FileAccess*)_f; @@ -46,7 +46,7 @@ size_t AudioStreamOGGVorbis::_ov_read_func(void *p_dst,size_t p_data, size_t p_c return read; } -int AudioStreamOGGVorbis::_ov_seek_func(void *_f,ogg_int64_t offs, int whence) { +int AudioStreamPlaybackOGGVorbis::_ov_seek_func(void *_f,ogg_int64_t offs, int whence) { //printf("seek to %p, offs %i, whence %i\n",_f,(int)offs,whence); @@ -76,7 +76,7 @@ int AudioStreamOGGVorbis::_ov_seek_func(void *_f,ogg_int64_t offs, int whence) { #endif } -int AudioStreamOGGVorbis::_ov_close_func(void *_f) { +int AudioStreamPlaybackOGGVorbis::_ov_close_func(void *_f) { // printf("close %p\n",_f); if (!_f) @@ -86,7 +86,7 @@ int AudioStreamOGGVorbis::_ov_close_func(void *_f) { fa->close(); return 0; } -long AudioStreamOGGVorbis::_ov_tell_func(void *_f) { +long AudioStreamPlaybackOGGVorbis::_ov_tell_func(void *_f) { //printf("close %p\n",_f); @@ -95,38 +95,32 @@ long AudioStreamOGGVorbis::_ov_tell_func(void *_f) { } -bool AudioStreamOGGVorbis::_can_mix() const { - return /*playing &&*/ !paused; -} - - -void AudioStreamOGGVorbis::update() { +int AudioStreamPlaybackOGGVorbis::mix(int16_t* p_bufer,int p_frames) { - _THREAD_SAFE_METHOD_ - - if (!playing && !setting_up) - return; + if (!playing) + return 0; + int total=p_frames; while (true) { - int todo = get_todo(); + int todo = p_frames; - if (todo==0 || todo<MIN_MIX) + if (todo==0 || todo<MIN_MIX) { break; + } //printf("to mix %i - mix me %i bytes\n",to_mix,to_mix*stream_channels*sizeof(int16_t)); #ifdef BIG_ENDIAN_ENABLED - long ret=ov_read(&vf,(char*)get_write_buffer(),todo*stream_channels*sizeof(int16_t), 1, 2, 1, ¤t_section); + long ret=ov_read(&vf,(char*)p_bufer,todo*stream_channels*sizeof(int16_t), 1, 2, 1, ¤t_section); #else - long ret=ov_read(&vf,(char*)get_write_buffer(),todo*stream_channels*sizeof(int16_t), 0, 2, 1, ¤t_section); + long ret=ov_read(&vf,(char*)p_bufer,todo*stream_channels*sizeof(int16_t), 0, 2, 1, ¤t_section); #endif + if (ret<0) { playing = false; - setting_up=false; - ERR_EXPLAIN("Error reading OGG Vorbis File: "+file); ERR_BREAK(ret<0); } else if (ret==0) { // end of song, reload? @@ -138,9 +132,8 @@ void AudioStreamOGGVorbis::update() { if (!has_loop()) { playing=false; - setting_up=false; repeats=1; - return; + break; } f=FileAccess::open(file,FileAccess::READ); @@ -148,11 +141,22 @@ void AudioStreamOGGVorbis::update() { int errv = ov_open_callbacks(f,&vf,NULL,0,_ov_callbacks); if (errv!=0) { playing=false; - setting_up=false; - return; // :( + break;; // :( } - frames_mixed=0; + if (loop_restart_time) { + bool ok = ov_time_seek(&vf,loop_restart_time)==0; + if (!ok) { + playing=false; + //ERR_EXPLAIN("loop restart time rejected"); + ERR_PRINT("loop restart time rejected") + } + + frames_mixed=stream_srate*loop_restart_time; + } else { + + frames_mixed=0; + } repeats++; continue; @@ -162,16 +166,19 @@ void AudioStreamOGGVorbis::update() { ret/=sizeof(int16_t); frames_mixed+=ret; - write(ret); + + p_bufer+=ret*stream_channels; + p_frames-=ret; + } -} + return total-p_frames; +} -void AudioStreamOGGVorbis::play() { - _THREAD_SAFE_METHOD_ +void AudioStreamPlaybackOGGVorbis::play(float p_from) { if (playing) stop(); @@ -179,56 +186,46 @@ void AudioStreamOGGVorbis::play() { if (_load_stream()!=OK) return; + frames_mixed=0; - playing=false; - setting_up=true; - update(); - if (!setting_up) - return; - setting_up=false; playing=true; + if (p_from>0) { + seek_pos(p_from); + } } -void AudioStreamOGGVorbis::_close_file() { +void AudioStreamPlaybackOGGVorbis::_close_file() { if (f) { + memdelete(f); f=NULL; } } -void AudioStreamOGGVorbis::stop() { - - _THREAD_SAFE_METHOD_ +bool AudioStreamPlaybackOGGVorbis::is_playing() const { + return playing; +} +void AudioStreamPlaybackOGGVorbis::stop() { _clear_stream(); playing=false; - _clear(); -} - -AudioStreamOGGVorbis::UpdateMode AudioStreamOGGVorbis::get_update_mode() const { - - return UPDATE_THREAD; + //_clear(); } -bool AudioStreamOGGVorbis::is_playing() const { +float AudioStreamPlaybackOGGVorbis::get_pos() const { - return playing || (get_total() - get_todo() -1 > 0); -} - -float AudioStreamOGGVorbis::get_pos() const { - - int32_t frames = int32_t(frames_mixed) - (int32_t(get_total()) - get_todo()); + int32_t frames = int32_t(frames_mixed); if (frames < 0) frames=0; return double(frames) / stream_srate; } -void AudioStreamOGGVorbis::seek_pos(float p_time) { +void AudioStreamPlaybackOGGVorbis::seek_pos(float p_time) { + - _THREAD_SAFE_METHOD_ if (!playing) return; @@ -237,85 +234,107 @@ void AudioStreamOGGVorbis::seek_pos(float p_time) { frames_mixed=stream_srate*p_time; } -String AudioStreamOGGVorbis::get_stream_name() const { +String AudioStreamPlaybackOGGVorbis::get_stream_name() const { return ""; } -void AudioStreamOGGVorbis::set_loop(bool p_enable) { +void AudioStreamPlaybackOGGVorbis::set_loop(bool p_enable) { loops=p_enable; } -bool AudioStreamOGGVorbis::has_loop() const { +bool AudioStreamPlaybackOGGVorbis::has_loop() const { return loops; } -int AudioStreamOGGVorbis::get_loop_count() const { +int AudioStreamPlaybackOGGVorbis::get_loop_count() const { return repeats; } -void AudioStreamOGGVorbis::set_file(const String& p_file) { +Error AudioStreamPlaybackOGGVorbis::set_file(const String& p_file) { file=p_file; -} - -Error AudioStreamOGGVorbis::_load_stream() { - - _clear_stream(); - if (file=="") - return ERR_INVALID_DATA; - + stream_valid=false; Error err; f=FileAccess::open(file,FileAccess::READ,&err); - if (err) { ERR_FAIL_COND_V( err, err ); } int errv = ov_open_callbacks(f,&vf,NULL,0,_ov_callbacks); + switch(errv) { - + case OV_EREAD: { // - A read from media returned an error. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_FILE_CANT_READ ); + } break; + case OV_EVERSION: // - Vorbis version mismatch. + case OV_ENOTVORBIS: { // - Bitstream is not Vorbis data. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_FILE_UNRECOGNIZED ); + } break; + case OV_EBADHEADER: { // - Invalid Vorbis bitstream header. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_FILE_CORRUPT ); + } break; + case OV_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption. + memdelete(f); f=NULL; + ERR_FAIL_V( ERR_BUG ); + } break; + } const vorbis_info *vinfo=ov_info(&vf,-1); stream_channels=vinfo->channels; stream_srate=vinfo->rate; - Error serr = _setup(stream_channels,stream_srate); + ogg_int64_t len = ov_time_total(&vf,-1); + length=len/1000.0; + ov_clear(&vf); + memdelete(f); + f=NULL; + stream_valid=true; + + + return OK; +} + +Error AudioStreamPlaybackOGGVorbis::_load_stream() { + + ERR_FAIL_COND_V(!stream_valid,ERR_UNCONFIGURED); - if (serr) { - _close_file(); - ERR_FAIL_V( ERR_INVALID_DATA ); + _clear_stream(); + if (file=="") + return ERR_INVALID_DATA; + + Error err; + f=FileAccess::open(file,FileAccess::READ,&err); + if (err) { + ERR_FAIL_COND_V( err, err ); } + int errv = ov_open_callbacks(f,&vf,NULL,0,_ov_callbacks); switch(errv) { case OV_EREAD: { // - A read from media returned an error. - _close_file(); + memdelete(f); f=NULL; ERR_FAIL_V( ERR_FILE_CANT_READ ); } break; case OV_EVERSION: // - Vorbis version mismatch. case OV_ENOTVORBIS: { // - Bitstream is not Vorbis data. - _close_file(); + memdelete(f); f=NULL; ERR_FAIL_V( ERR_FILE_UNRECOGNIZED ); } break; case OV_EBADHEADER: { // - Invalid Vorbis bitstream header. - _close_file(); + memdelete(f); f=NULL; ERR_FAIL_V( ERR_FILE_CORRUPT ); } break; case OV_EFAULT: { // - Internal logic fault; indicates a bug or heap/stack corruption. - - _close_file(); + memdelete(f); f=NULL; ERR_FAIL_V( ERR_BUG ); } break; } - - - ogg_int64_t len = ov_time_total(&vf,-1); - - length=len/1000.0; - repeats=0; stream_loaded=true; @@ -324,16 +343,16 @@ Error AudioStreamOGGVorbis::_load_stream() { } -float AudioStreamOGGVorbis::get_length() const { +float AudioStreamPlaybackOGGVorbis::get_length() const { if (!stream_loaded) { - if (const_cast<AudioStreamOGGVorbis*>(this)->_load_stream()!=OK) + if (const_cast<AudioStreamPlaybackOGGVorbis*>(this)->_load_stream()!=OK) return 0; } return length; } -void AudioStreamOGGVorbis::_clear_stream() { +void AudioStreamPlaybackOGGVorbis::_clear_stream() { if (!stream_loaded) return; @@ -346,18 +365,18 @@ void AudioStreamOGGVorbis::_clear_stream() { playing=false; } -void AudioStreamOGGVorbis::set_paused(bool p_paused) { +void AudioStreamPlaybackOGGVorbis::set_paused(bool p_paused) { paused=p_paused; } -bool AudioStreamOGGVorbis::is_paused(bool p_paused) const { +bool AudioStreamPlaybackOGGVorbis::is_paused(bool p_paused) const { return paused; } -AudioStreamOGGVorbis::AudioStreamOGGVorbis() { +AudioStreamPlaybackOGGVorbis::AudioStreamPlaybackOGGVorbis() { loops=false; playing=false; @@ -367,17 +386,18 @@ AudioStreamOGGVorbis::AudioStreamOGGVorbis() { _ov_callbacks.tell_func=_ov_tell_func; f = NULL; stream_loaded=false; - repeats=0; - setting_up=false; + stream_valid=false; + repeats=0; paused=true; stream_channels=0; stream_srate=0; current_section=0; length=0; + loop_restart_time=0; } -AudioStreamOGGVorbis::~AudioStreamOGGVorbis() { +AudioStreamPlaybackOGGVorbis::~AudioStreamPlaybackOGGVorbis() { _clear_stream(); diff --git a/drivers/vorbis/audio_stream_ogg_vorbis.h b/drivers/vorbis/audio_stream_ogg_vorbis.h index 5e3649d980..827d8b0be3 100644 --- a/drivers/vorbis/audio_stream_ogg_vorbis.h +++ b/drivers/vorbis/audio_stream_ogg_vorbis.h @@ -29,17 +29,16 @@ #ifndef AUDIO_STREAM_OGG_VORBIS_H #define AUDIO_STREAM_OGG_VORBIS_H -#include "scene/resources/audio_stream_resampled.h" +#include "scene/resources/audio_stream.h" #include "vorbis/vorbisfile.h" #include "os/file_access.h" #include "io/resource_loader.h" #include "os/thread_safe.h" -class AudioStreamOGGVorbis : public AudioStreamResampled { - OBJ_TYPE(AudioStreamOGGVorbis,AudioStreamResampled); - _THREAD_SAFE_CLASS_ +class AudioStreamPlaybackOGGVorbis : public AudioStreamPlayback { + OBJ_TYPE(AudioStreamPlaybackOGGVorbis,AudioStreamPlayback); enum { MIN_MIX=1024 @@ -54,9 +53,6 @@ class AudioStreamOGGVorbis : public AudioStreamResampled { static int _ov_close_func(void *_f); static long _ov_tell_func(void *_f); - - virtual bool _can_mix() const; - String file; int64_t frames_mixed; @@ -67,7 +63,7 @@ class AudioStreamOGGVorbis : public AudioStreamResampled { int stream_srate; int current_section; - volatile bool setting_up; + bool paused; bool loops; int repeats; @@ -76,17 +72,21 @@ class AudioStreamOGGVorbis : public AudioStreamResampled { void _clear_stream(); void _close_file(); + bool stream_valid; + float loop_restart_time; -public: +public: - void set_file(const String& p_file); + Error set_file(const String& p_file); - virtual void play(); + virtual void play(float p_from=0); virtual void stop(); virtual bool is_playing() const; + virtual void set_loop_restart_time(float p_time) { loop_restart_time=0; } + virtual void set_paused(bool p_paused); virtual bool is_paused(bool p_paused) const; @@ -102,11 +102,32 @@ public: virtual float get_pos() const; virtual void seek_pos(float p_time); - virtual UpdateMode get_update_mode() const; - virtual void update(); + virtual int get_channels() const { return stream_channels; } + virtual int get_mix_rate() const { return stream_srate; } + + virtual int get_minimum_buffer_size() const { return 0; } + virtual int mix(int16_t* p_bufer,int p_frames); + + AudioStreamPlaybackOGGVorbis(); + ~AudioStreamPlaybackOGGVorbis(); +}; + + +class AudioStreamOGGVorbis : public AudioStream { + + OBJ_TYPE(AudioStreamOGGVorbis,AudioStream); + + String file; +public: + + Ref<AudioStreamPlayback> instance_playback() { + Ref<AudioStreamPlaybackOGGVorbis> pb = memnew( AudioStreamPlaybackOGGVorbis ); + pb->set_file(file); + return pb; + } + + void set_file(const String& p_file) { file=p_file; } - AudioStreamOGGVorbis(); - ~AudioStreamOGGVorbis(); }; class ResourceFormatLoaderAudioStreamOGGVorbis : public ResourceFormatLoader { diff --git a/drivers/webp/dsp/dsp.h b/drivers/webp/dsp/dsp.h index afe30413c6..fd686a8532 100644 --- a/drivers/webp/dsp/dsp.h +++ b/drivers/webp/dsp/dsp.h @@ -29,7 +29,7 @@ extern "C" { #define WEBP_USE_SSE2 #endif -#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) +#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__) #define WEBP_ANDROID_NEON // Android targets that might support NEON #endif |