From d4029aa51a0f0bce5dc73885af74b592e3aa33b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Fri, 28 Apr 2017 19:28:21 +0200 Subject: Move other lone thirdparty files to thirdparty/misc Also move Box2D ConvexDecomposition contrib code to thirdparty/b2d_convexdecomp. --- thirdparty/README.md | 37 +- thirdparty/b2d_convexdecomp/b2Glue.h | 173 + thirdparty/b2d_convexdecomp/b2Polygon.cpp | 1586 ++++++++ thirdparty/b2d_convexdecomp/b2Polygon.h | 133 + thirdparty/b2d_convexdecomp/b2Triangle.cpp | 82 + thirdparty/b2d_convexdecomp/b2Triangle.h | 41 + thirdparty/misc/curl_hostcheck.c | 217 ++ thirdparty/misc/curl_hostcheck.h | 39 + thirdparty/misc/mikktspace.c | 1890 ++++++++++ thirdparty/misc/mikktspace.h | 145 + thirdparty/misc/stb_truetype.h | 3267 +++++++++++++++++ thirdparty/misc/stb_vorbis.c | 5399 ++++++++++++++++++++++++++++ thirdparty/misc/yuv2rgb.h | 1123 ++++++ thirdparty/stb_vorbis/stb_vorbis.c | 5399 ---------------------------- 14 files changed, 14131 insertions(+), 5400 deletions(-) create mode 100644 thirdparty/b2d_convexdecomp/b2Glue.h create mode 100644 thirdparty/b2d_convexdecomp/b2Polygon.cpp create mode 100644 thirdparty/b2d_convexdecomp/b2Polygon.h create mode 100644 thirdparty/b2d_convexdecomp/b2Triangle.cpp create mode 100644 thirdparty/b2d_convexdecomp/b2Triangle.h create mode 100644 thirdparty/misc/curl_hostcheck.c create mode 100644 thirdparty/misc/curl_hostcheck.h create mode 100644 thirdparty/misc/mikktspace.c create mode 100644 thirdparty/misc/mikktspace.h create mode 100644 thirdparty/misc/stb_truetype.h create mode 100644 thirdparty/misc/stb_vorbis.c create mode 100644 thirdparty/misc/yuv2rgb.h delete mode 100644 thirdparty/stb_vorbis/stb_vorbis.c (limited to 'thirdparty') diff --git a/thirdparty/README.md b/thirdparty/README.md index ea731282ef..1c4bad4836 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -1,6 +1,13 @@ # Third party libraries +## b2d_convexdecomp + +- Upstream: https://github.com/erincatto/Box2D (Contributions/Utilities/ConvexDecomposition) +- Version: TBD +- License: zlib + + ## certs - Upstream: ? @@ -158,7 +165,9 @@ TODO: Properly sync with version 1.2.4 and document changes. ## misc -Collection of single-file libraries used in Godot. +Collection of single-file libraries used in Godot components. + +### core - `aes256.{cpp,h}` * Upstream: http://www.literatecode.com/aes256 @@ -198,6 +207,32 @@ Collection of single-file libraries used in Godot. * Version: TBD, class was renamed * License: MIT +### modules + +- `curl_hostcheck.{c,h}` + * Upstream: https://curl.haxx.se/ + * Version: ? (2013) + * License: MIT +- `yuv2rgb.h` + * Upstream: http://wss.co.uk/pinknoise/yuv2rgb/ (to check) + * Version: ? + * License: BSD + +### scene + +- `mikktspace.{c,h}` + * Upstream: https://wiki.blender.org/index.php/Dev:Shading/Tangent_Space_Normal_Maps + * Version: 1.0 + * License: zlib +- `stb_truetype.h` + * Upstream: https://github.com/nothings/stb + * Version: 1.11 + * License: Public Domain (Unlicense) or MIT +- `stb_vorbis.c` + * Upstream: https://github.com/nothings/stb + * Version: 1.09 + * License: Public Domain (Unlicense) or MIT + ## openssl diff --git a/thirdparty/b2d_convexdecomp/b2Glue.h b/thirdparty/b2d_convexdecomp/b2Glue.h new file mode 100644 index 0000000000..425486356e --- /dev/null +++ b/thirdparty/b2d_convexdecomp/b2Glue.h @@ -0,0 +1,173 @@ +/* +* Copyright (c) 2006-2009 Erin Catto http://www.gphysics.com +* +* This software is provided 'as-is', without any express or implied +* warranty. In no event will the authors be held liable for any damages +* arising from the use of this software. +* Permission is granted to anyone to use this software for any purpose, +* including commercial applications, and to alter it and redistribute it +* freely, subject to the following restrictions: +* 1. The origin of this software must not be misrepresented; you must not +* claim that you wrote the original software. If you use this software +* in a product, an acknowledgment in the product documentation would be +* appreciated but is not required. +* 2. Altered source versions must be plainly marked as such, and must not be +* misrepresented as being the original software. +* 3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef B2GLUE_H +#define B2GLUE_H + +#include "math_2d.h" +#include + +namespace b2ConvexDecomp { + +typedef real_t float32; +typedef int32_t int32; + +static inline float32 b2Sqrt(float32 val) { return Math::sqrt(val); } +#define b2_maxFloat FLT_MAX +#define b2_epsilon CMP_EPSILON +#define b2_pi 3.14159265359f +#define b2_maxPolygonVertices 16 +#define b2Max MAX +#define b2Min MIN +#define b2Clamp CLAMP +#define b2Abs ABS +/// A small length used as a collision and constraint tolerance. Usually it is +/// chosen to be numerically significant, but visually insignificant. +#define b2_linearSlop 0.005f + +/// A small angle used as a collision and constraint tolerance. Usually it is +/// chosen to be numerically significant, but visually insignificant. +#define b2_angularSlop (2.0f / 180.0f * b2_pi) + +/// A 2D column vector. +struct b2Vec2 +{ + /// Default constructor does nothing (for performance). + b2Vec2() {} + + /// Construct using coordinates. + b2Vec2(float32 x, float32 y) : x(x), y(y) {} + + /// Set this vector to all zeros. + void SetZero() { x = 0.0f; y = 0.0f; } + + /// Set this vector to some specified coordinates. + void Set(float32 x_, float32 y_) { x = x_; y = y_; } + + /// Negate this vector. + b2Vec2 operator -() const { b2Vec2 v; v.Set(-x, -y); return v; } + + /// Read from and indexed element. + float32 operator () (int32 i) const + { + return (&x)[i]; + } + + /// Write to an indexed element. + float32& operator () (int32 i) + { + return (&x)[i]; + } + + /// Add a vector to this vector. + void operator += (const b2Vec2& v) + { + x += v.x; y += v.y; + } + + /// Subtract a vector from this vector. + void operator -= (const b2Vec2& v) + { + x -= v.x; y -= v.y; + } + + /// Multiply this vector by a scalar. + void operator *= (float32 a) + { + x *= a; y *= a; + } + + /// Get the length of this vector (the norm). + float32 Length() const + { + return b2Sqrt(x * x + y * y); + } + + /// Get the length squared. For performance, use this instead of + /// b2Vec2::Length (if possible). + float32 LengthSquared() const + { + return x * x + y * y; + } + + bool operator==(const b2Vec2& p_v) const { + return x==p_v.x && y==p_v.y; + } + b2Vec2 operator+(const b2Vec2& p_v) const { + return b2Vec2(x+p_v.x,y+p_v.y); + } + b2Vec2 operator-(const b2Vec2& p_v) const { + return b2Vec2(x-p_v.x,y-p_v.y); + } + + b2Vec2 operator*(float32 f) const { + return b2Vec2(f*x,f*y); + } + + /// Convert this vector into a unit vector. Returns the length. + float32 Normalize() + { + float32 length = Length(); + if (length < b2_epsilon) + { + return 0.0f; + } + float32 invLength = 1.0f / length; + x *= invLength; + y *= invLength; + + return length; + } + + /* + /// Does this vector contain finite coordinates? + bool IsValid() const + { + return b2IsValid(x) && b2IsValid(y); + } + */ + + float32 x, y; +}; + +inline b2Vec2 operator*(float32 f,const b2Vec2& p_v) { + return b2Vec2(f*p_v.x,f*p_v.y); +} + +/// Perform the dot product on two vectors. +inline float32 b2Dot(const b2Vec2& a, const b2Vec2& b) +{ + return a.x * b.x + a.y * b.y; +} + +/// Perform the cross product on two vectors. In 2D this produces a scalar. +inline float32 b2Cross(const b2Vec2& a, const b2Vec2& b) +{ + return a.x * b.y - a.y * b.x; +} + +/// Perform the cross product on a vector and a scalar. In 2D this produces +/// a vector. +inline b2Vec2 b2Cross(const b2Vec2& a, float32 s) +{ + return b2Vec2(s * a.y, -s * a.x); +} + +} + +#endif diff --git a/thirdparty/b2d_convexdecomp/b2Polygon.cpp b/thirdparty/b2d_convexdecomp/b2Polygon.cpp new file mode 100644 index 0000000000..b6ead62c63 --- /dev/null +++ b/thirdparty/b2d_convexdecomp/b2Polygon.cpp @@ -0,0 +1,1586 @@ +/* + * Copyright (c) 2007 Eric Jordan + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +// This utility works with Box2d version 2.0 (or higher), and not with 1.4.3 + +#include "b2Triangle.h" +#include "b2Polygon.h" + +#include +#include +#include +#define b2Assert assert + +namespace b2ConvexDecomp { + + +//If you're using 1.4.3, b2_toiSlop won't exist, so set this equal to 0 +static const float32 toiSlop = 0.0f; + +/* + * Check if the lines a0->a1 and b0->b1 cross. + * If they do, intersectionPoint will be filled + * with the point of crossing. + * + * Grazing lines should not return true. + */ +bool intersect(const b2Vec2& a0, const b2Vec2& a1, + const b2Vec2& b0, const b2Vec2& b1, + b2Vec2& intersectionPoint) { + + if (a0 == b0 || a0 == b1 || a1 == b0 || a1 == b1) return false; + float x1 = a0.x; float y1 = a0.y; + float x2 = a1.x; float y2 = a1.y; + float x3 = b0.x; float y3 = b0.y; + float x4 = b1.x; float y4 = b1.y; + + //AABB early exit + if (b2Max(x1,x2) < b2Min(x3,x4) || b2Max(x3,x4) < b2Min(x1,x2) ) return false; + if (b2Max(y1,y2) < b2Min(y3,y4) || b2Max(y3,y4) < b2Min(y1,y2) ) return false; + + float ua = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)); + float ub = ((x2 - x1) * (y1 - y3) - (y2 - y1) * (x1 - x3)); + float denom = (y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1); + if (b2Abs(denom) < CMP_EPSILON) { + //Lines are too close to parallel to call + return false; + } + ua /= denom; + ub /= denom; + + if ((0 < ua) && (ua < 1) && (0 < ub) && (ub < 1)) { + //if (intersectionPoint){ + intersectionPoint.x = (x1 + ua * (x2 - x1)); + intersectionPoint.y = (y1 + ua * (y2 - y1)); + //} + //printf("%f, %f -> %f, %f crosses %f, %f -> %f, %f\n",x1,y1,x2,y2,x3,y3,x4,y4); + return true; + } + + return false; +} + +/* + * True if line from a0->a1 intersects b0->b1 + */ +bool intersect(const b2Vec2& a0, const b2Vec2& a1, + const b2Vec2& b0, const b2Vec2& b1) { + b2Vec2 myVec(0.0f,0.0f); + return intersect(a0, a1, b0, b1, myVec); +} + +b2Polygon::b2Polygon(float32* _x, float32* _y, int32 nVert) { + nVertices = nVert; + x = new float32[nVertices]; + y = new float32[nVertices]; + for (int32 i = 0; i < nVertices; ++i) { + x[i] = _x[i]; + y[i] = _y[i]; + } + areaIsSet = false; +} + +b2Polygon::b2Polygon(b2Vec2* v, int32 nVert) { + nVertices = nVert; + x = new float32[nVertices]; + y = new float32[nVertices]; + for (int32 i = 0; i < nVertices; ++i) { + x[i] = v[i].x; + y[i] = v[i].y; + + } + areaIsSet = false; +} + +b2Polygon::b2Polygon() { + x = NULL; + y = NULL; + nVertices = 0; + areaIsSet = false; +} + +b2Polygon::~b2Polygon() { + //printf("About to delete poly with %d vertices\n",nVertices); + delete[] x; + delete[] y; +} + +float32 b2Polygon::GetArea() { + // TODO: fix up the areaIsSet caching so that it can be used + //if (areaIsSet) return area; + area = 0.0f; + + //First do wraparound + area += x[nVertices-1]*y[0]-x[0]*y[nVertices-1]; + for (int i=0; i 0.0f); +} + +void b2Polygon::MergeParallelEdges(float32 tolerance) { + if (nVertices <= 3) return; //Can't do anything useful here to a triangle + bool* mergeMe = new bool[nVertices]; + int32 newNVertices = nVertices; + for (int32 i = 0; i < nVertices; ++i) { + int32 lower = (i == 0) ? (nVertices - 1) : (i - 1); + int32 middle = i; + int32 upper = (i == nVertices - 1) ? (0) : (i + 1); + float32 dx0 = x[middle] - x[lower]; + float32 dy0 = y[middle] - y[lower]; + float32 dx1 = x[upper] - x[middle]; + float32 dy1 = y[upper] - y[middle]; + float32 norm0 = sqrtf(dx0*dx0+dy0*dy0); + float32 norm1 = sqrtf(dx1*dx1+dy1*dy1); + if ( !(norm0 > 0.0f && norm1 > 0.0f) && newNVertices > 3 ) { + //Merge identical points + mergeMe[i] = true; + --newNVertices; + } + dx0 /= norm0; dy0 /= norm0; + dx1 /= norm1; dy1 /= norm1; + float32 cross = dx0 * dy1 - dx1 * dy0; + float32 dot = dx0 * dx1 + dy0 * dy1; + if (fabs(cross) < tolerance && dot > 0 && newNVertices > 3) { + mergeMe[i] = true; + --newNVertices; + } else { + mergeMe[i] = false; + } + } + if(newNVertices == nVertices || newNVertices == 0) { + delete[] mergeMe; + return; + } + float32* newx = new float32[newNVertices]; + float32* newy = new float32[newNVertices]; + int32 currIndex = 0; + for (int32 i=0; i < nVertices; ++i) { + if (mergeMe[i] || newNVertices == 0 || currIndex == newNVertices) continue; + b2Assert(currIndex < newNVertices); + newx[currIndex] = x[i]; + newy[currIndex] = y[i]; + ++currIndex; + } + delete[] x; + delete[] y; + delete[] mergeMe; + x = newx; + y = newy; + nVertices = newNVertices; + //printf("%d \n", newNVertices); +} + + /* + * Allocates and returns pointer to vector vertex array. + * Length of array is nVertices. + */ +b2Vec2* b2Polygon::GetVertexVecs() { + b2Vec2* out = new b2Vec2[nVertices]; + for (int32 i = 0; i < nVertices; ++i) { + out[i].Set(x[i], y[i]); + } + return out; +} + +b2Polygon::b2Polygon(b2Triangle& t) { + nVertices = 3; + x = new float[nVertices]; + y = new float[nVertices]; + for (int32 i = 0; i < nVertices; ++i) { + x[i] = t.x[i]; + y[i] = t.y[i]; + } +} + +void b2Polygon::Set(const b2Polygon& p) { + if (nVertices != p.nVertices){ + nVertices = p.nVertices; + delete[] x; + delete[] y; + x = new float32[nVertices]; + y = new float32[nVertices]; + } + + for (int32 i = 0; i < nVertices; ++i) { + x[i] = p.x[i]; + y[i] = p.y[i]; + } + areaIsSet = false; +} + + /* + * Assuming the polygon is simple, checks if it is convex. + */ +bool b2Polygon::IsConvex() { + bool isPositive = false; + for (int32 i = 0; i < nVertices; ++i) { + int32 lower = (i == 0) ? (nVertices - 1) : (i - 1); + int32 middle = i; + int32 upper = (i == nVertices - 1) ? (0) : (i + 1); + float32 dx0 = x[middle] - x[lower]; + float32 dy0 = y[middle] - y[lower]; + float32 dx1 = x[upper] - x[middle]; + float32 dy1 = y[upper] - y[middle]; + float32 cross = dx0 * dy1 - dx1 * dy0; + // Cross product should have same sign + // for each vertex if poly is convex. + bool newIsP = (cross >= 0) ? true : false; + if (i == 0) { + isPositive = newIsP; + } + else if (isPositive != newIsP) { + return false; + } + } + return true; +} + +/* + * Pulled from b2Shape.cpp, assertions removed + */ +static b2Vec2 PolyCentroid(const b2Vec2* vs, int32 count) +{ + b2Vec2 c; c.Set(0.0f, 0.0f); + float32 area = 0.0f; + + const float32 inv3 = 1.0f / 3.0f; + b2Vec2 pRef(0.0f, 0.0f); + for (int32 i = 0; i < count; ++i) + { + // Triangle vertices. + b2Vec2 p1 = pRef; + b2Vec2 p2 = vs[i]; + b2Vec2 p3 = i + 1 < count ? vs[i+1] : vs[0]; + + b2Vec2 e1 = p2 - p1; + b2Vec2 e2 = p3 - p1; + + float32 D = b2Cross(e1, e2); + + float32 triangleArea = 0.5f * D; + area += triangleArea; + + // Area weighted centroid + c += (p1 + p2 + p3) * triangleArea * inv3; + } + + // Centroid + c *= 1.0f / area; + return c; +} + + +/* + * Checks if polygon is valid for use in Box2d engine. + * Last ditch effort to ensure no invalid polygons are + * added to world geometry. + * + * Performs a full check, for simplicity, convexity, + * orientation, minimum angle, and volume. This won't + * be very efficient, and a lot of it is redundant when + * other tools in this section are used. + */ +bool b2Polygon::IsUsable(bool printErrors){ + int32 error = -1; + bool noError = true; + if (nVertices < 3 || nVertices > b2_maxPolygonVertices) {noError = false; error = 0;} + if (!IsConvex()) {noError = false; error = 1;} + if (!IsSimple()) {noError = false; error = 2;} + if (GetArea() < CMP_EPSILON) {noError = false; error = 3;} + + //Compute normals + b2Vec2* normals = new b2Vec2[nVertices]; + b2Vec2* vertices = new b2Vec2[nVertices]; + for (int32 i = 0; i < nVertices; ++i){ + vertices[i].Set(x[i],y[i]); + int32 i1 = i; + int32 i2 = i + 1 < nVertices ? i + 1 : 0; + b2Vec2 edge(x[i2]-x[i1],y[i2]-y[i1]); + normals[i] = b2Cross(edge, 1.0f); + normals[i].Normalize(); + } + + //Required side checks + for (int32 i=0; i= -b2_linearSlop){ + noError = false; + error = 5; + } + } + + + b2Vec2 centroid = PolyCentroid(vertices,nVertices); + b2Vec2 n1 = normals[iminus]; + b2Vec2 n2 = normals[i]; + b2Vec2 v = vertices[i] - centroid; + + b2Vec2 d; + d.x = b2Dot(n1, v) - toiSlop; + d.y = b2Dot(n2, v) - toiSlop; + + // Shifting the edge inward by b2_toiSlop should + // not cause the plane to pass the centroid. + if ((d.x < 0.0f)||(d.y < 0.0f)){ + noError = false; + error = 6; + } + + } + delete[] vertices; + delete[] normals; + + if (!noError && printErrors){ + printf("Found invalid polygon, "); + switch(error){ + case 0: + printf("must have between 3 and %d vertices.\n",b2_maxPolygonVertices); + break; + case 1: + printf("must be convex.\n"); + break; + case 2: + printf("must be simple (cannot intersect itself).\n"); + break; + case 3: + printf("area is too small.\n"); + break; + case 4: + printf("sides are too close to parallel.\n"); + break; + case 5: + printf("polygon is too thin.\n"); + break; + case 6: + printf("core shape generation would move edge past centroid (too thin).\n"); + break; + default: + printf("don't know why.\n"); + } + } + return noError; +} + + +bool b2Polygon::IsUsable(){ + return IsUsable(B2_POLYGON_REPORT_ERRORS); +} + +//Check for edge crossings +bool b2Polygon::IsSimple() { + for (int32 i=0; i nVertices-1)?0:i+1; + b2Vec2 a1(x[i],y[i]); + b2Vec2 a2(x[iplus],y[iplus]); + for (int32 j=i+1; j nVertices-1)?0:j+1; + b2Vec2 b1(x[j],y[j]); + b2Vec2 b2(x[jplus],y[jplus]); + if (intersect(a1,a2,b1,b2)){ + return false; + } + } + } + return true; +} + + /* + * Tries to add a triangle to the polygon. Returns null if it can't connect + * properly, otherwise returns a pointer to the new Polygon. Assumes bitwise + * equality of joined vertex positions. + * + * Remember to delete the pointer afterwards. + * Todo: Make this return a b2Polygon instead + * of a pointer to a heap-allocated one. + * + * For internal use. + */ +b2Polygon* b2Polygon::Add(b2Triangle& t) { + // First, find vertices that connect + int32 firstP = -1; + int32 firstT = -1; + int32 secondP = -1; + int32 secondT = -1; + for (int32 i = 0; i < nVertices; i++) { + if (t.x[0] == x[i] && t.y[0] == y[i]) { + if (firstP == -1) { + firstP = i; + firstT = 0; + } + else { + secondP = i; + secondT = 0; + } + } + else if (t.x[1] == x[i] && t.y[1] == y[i]) { + if (firstP == -1) { + firstP = i; + firstT = 1; + } + else { + secondP = i; + secondT = 1; + } + } + else if (t.x[2] == x[i] && t.y[2] == y[i]) { + if (firstP == -1) { + firstP = i; + firstT = 2; + } + else { + secondP = i; + secondT = 2; + } + } + else { + } + } + // Fix ordering if first should be last vertex of poly + if (firstP == 0 && secondP == nVertices - 1) { + firstP = nVertices - 1; + secondP = 0; + } + + // Didn't find it + if (secondP == -1) { + return NULL; + } + + // Find tip index on triangle + int32 tipT = 0; + if (tipT == firstT || tipT == secondT) + tipT = 1; + if (tipT == firstT || tipT == secondT) + tipT = 2; + + float32* newx = new float[nVertices + 1]; + float32* newy = new float[nVertices + 1]; + int32 currOut = 0; + for (int32 i = 0; i < nVertices; i++) { + newx[currOut] = x[i]; + newy[currOut] = y[i]; + if (i == firstP) { + ++currOut; + newx[currOut] = t.x[tipT]; + newy[currOut] = t.y[tipT]; + } + ++currOut; + } + b2Polygon* result = new b2Polygon(newx, newy, nVertices+1); + delete[] newx; + delete[] newy; + return result; +} + + /** + * Adds this polygon to a PolyDef. + */ +#if 0 +void b2Polygon::AddTo(b2FixtureDef& pd) { + if (nVertices < 3) return; + + b2Assert(nVertices <= b2_maxPolygonVertices); + + b2Vec2* vecs = GetVertexVecs(); + b2Vec2* vecsToAdd = new b2Vec2[nVertices]; + + int32 offset = 0; + + b2PolygonShape *polyShape = new b2PolygonShape; + int32 ind; + + for (int32 i = 0; i < nVertices; ++i) { + + //Omit identical neighbors (including wraparound) + ind = i - offset; + if (vecs[i].x==vecs[remainder(i+1,nVertices)].x && + vecs[i].y==vecs[remainder(i+1,nVertices)].y){ + offset++; + continue; + } + + vecsToAdd[ind] = vecs[i]; + + } + + polyShape->Set((const b2Vec2*)vecsToAdd, ind+1); + pd.shape = polyShape; + + delete[] vecs; + delete[] vecsToAdd; +} +#endif + /** + * Finds and fixes "pinch points," points where two polygon + * vertices are at the same point. + * + * If a pinch point is found, pin is broken up into poutA and poutB + * and true is returned; otherwise, returns false. + * + * Mostly for internal use. + */ +bool ResolvePinchPoint(const b2Polygon& pin, b2Polygon& poutA, b2Polygon& poutB){ + if (pin.nVertices < 3) return false; + float32 tol = .001f; + bool hasPinchPoint = false; + int32 pinchIndexA = -1; + int32 pinchIndexB = -1; + for (int i=0; i 3) { + // Find an ear + int32 earIndex = -1; + //float32 earVolume = -1.0f; + float32 earMaxMinCross = -10.0f; + for (int32 i = 0; i < vNum; ++i) { + if (IsEar(i, xrem, yrem, vNum)) { + int32 lower = remainder(i-1,vNum); + int32 upper = remainder(i+1,vNum); + b2Vec2 d1(xrem[upper]-xrem[i],yrem[upper]-yrem[i]); + b2Vec2 d2(xrem[i]-xrem[lower],yrem[i]-yrem[lower]); + b2Vec2 d3(xrem[lower]-xrem[upper],yrem[lower]-yrem[upper]); + + d1.Normalize(); + d2.Normalize(); + d3.Normalize(); + float32 cross12 = b2Abs( b2Cross(d1,d2) ); + float32 cross23 = b2Abs( b2Cross(d2,d3) ); + float32 cross31 = b2Abs( b2Cross(d3,d1) ); + //Find the maximum minimum angle + float32 minCross = b2Min(cross12, b2Min(cross23,cross31)); + if (minCross > earMaxMinCross){ + earIndex = i; + earMaxMinCross = minCross; + } + + /*//This bit chooses the ear with greatest volume first + float32 testVol = b2Abs( d1.x*d2.y-d2.x*d1.y ); + if (testVol > earVolume){ + earIndex = i; + earVolume = testVol; + }*/ + } + } + + // If we still haven't found an ear, we're screwed. + // Note: sometimes this is happening because the + // remaining points are collinear. Really these + // should just be thrown out without halting triangulation. + if (earIndex == -1){ + if (B2_POLYGON_REPORT_ERRORS){ + b2Polygon dump(xrem,yrem,vNum); + printf("Couldn't find an ear, dumping remaining poly:\n"); + dump.printFormatted(); + printf("Please submit this dump to ewjordan at Box2d forums\n"); + } + for (int32 i = 0; i < bufferSize; i++) { + results[i].Set(buffer[i]); + } + + delete[] buffer; + + if (bufferSize > 0) return bufferSize; + else return -1; + } + + // Clip off the ear: + // - remove the ear tip from the list + + --vNum; + float32* newx = new float32[vNum]; + float32* newy = new float32[vNum]; + int32 currDest = 0; + for (int32 i = 0; i < vNum; ++i) { + if (currDest == earIndex) ++currDest; + newx[i] = xrem[currDest]; + newy[i] = yrem[currDest]; + ++currDest; + } + + // - add the clipped triangle to the triangle list + int32 under = (earIndex == 0) ? (vNum) : (earIndex - 1); + int32 over = (earIndex == vNum) ? 0 : (earIndex + 1); + b2Triangle toAdd = b2Triangle(xrem[earIndex], yrem[earIndex], xrem[over], yrem[over], xrem[under], yrem[under]); + buffer[bufferSize].Set(toAdd); + ++bufferSize; + + // - replace the old list with the new one + delete[] xrem; + delete[] yrem; + xrem = newx; + yrem = newy; + } + + b2Triangle toAdd = b2Triangle(xrem[1], yrem[1], xrem[2], yrem[2], + xrem[0], yrem[0]); + buffer[bufferSize].Set(toAdd); + ++bufferSize; + + delete[] xrem; + delete[] yrem; + + b2Assert(bufferSize == xremLength-2); + + for (int32 i = 0; i < bufferSize; i++) { + results[i].Set(buffer[i]); + } + + delete[] buffer; + + return bufferSize; +} + + /** + * Turns a list of triangles into a list of convex polygons. Very simple + * method - start with a seed triangle, keep adding triangles to it until + * you can't add any more without making the polygon non-convex. + * + * Returns an integer telling how many polygons were created. Will fill + * polys array up to polysLength entries, which may be smaller or larger + * than the return value. + * + * Takes O(N*P) where P is the number of resultant polygons, N is triangle + * count. + * + * The final polygon list will not necessarily be minimal, though in + * practice it works fairly well. + */ +int32 PolygonizeTriangles(b2Triangle* triangulated, int32 triangulatedLength, b2Polygon* polys, int32 polysLength) { + int32 polyIndex = 0; + + if (triangulatedLength <= 0) { + return 0; + } + else { + int* covered = new int[triangulatedLength]; + for (int32 i = 0; i < triangulatedLength; ++i) { + covered[i] = 0; + //Check here for degenerate triangles + if ( ( (triangulated[i].x[0] == triangulated[i].x[1]) && (triangulated[i].y[0] == triangulated[i].y[1]) ) + || ( (triangulated[i].x[1] == triangulated[i].x[2]) && (triangulated[i].y[1] == triangulated[i].y[2]) ) + || ( (triangulated[i].x[0] == triangulated[i].x[2]) && (triangulated[i].y[0] == triangulated[i].y[2]) ) ) { + covered[i] = 1; + } + } + + bool notDone = true; + while (notDone) { + int32 currTri = -1; + for (int32 i = 0; i < triangulatedLength; ++i) { + if (covered[i]) + continue; + currTri = i; + break; + } + if (currTri == -1) { + notDone = false; + } + else { + b2Polygon poly(triangulated[currTri]); + covered[currTri] = 1; + int32 index = 0; + for (int32 i = 0; i < 2*triangulatedLength; ++i,++index) { + while (index >= triangulatedLength) index -= triangulatedLength; + if (covered[index]) { + continue; + } + b2Polygon* newP = poly.Add(triangulated[index]); + if (!newP) { + continue; + } + if (newP->nVertices > b2Polygon::maxVerticesPerPolygon) { + delete newP; + newP = NULL; + continue; + } + if (newP->IsConvex()) { //Or should it be IsUsable? Maybe re-write IsConvex to apply the angle threshold from Box2d + poly.Set(*newP); + delete newP; + newP = NULL; + covered[index] = 1; + } else { + delete newP; + newP = NULL; + } + } + if (polyIndex < polysLength){ + poly.MergeParallelEdges(b2_angularSlop); + //If identical points are present, a triangle gets + //borked by the MergeParallelEdges function, hence + //the vertex number check + if (poly.nVertices >= 3) polys[polyIndex].Set(poly); + //else printf("Skipping corrupt poly\n"); + } + if (poly.nVertices >= 3) polyIndex++; //Must be outside (polyIndex < polysLength) test + } + //printf("MEMCHECK: %d\n",_CrtCheckMemory()); + } + delete[] covered; + } + return polyIndex; +} + + /** + * Checks if vertex i is the tip of an ear in polygon defined by xv[] and + * yv[]. + * + * Assumes clockwise orientation of polygon...ick + */ +bool IsEar(int32 i, float32* xv, float32* yv, int32 xvLength) { + float32 dx0, dy0, dx1, dy1; + dx0 = dy0 = dx1 = dy1 = 0; + if (i >= xvLength || i < 0 || xvLength < 3) { + return false; + } + int32 upper = i + 1; + int32 lower = i - 1; + if (i == 0) { + dx0 = xv[0] - xv[xvLength - 1]; + dy0 = yv[0] - yv[xvLength - 1]; + dx1 = xv[1] - xv[0]; + dy1 = yv[1] - yv[0]; + lower = xvLength - 1; + } + else if (i == xvLength - 1) { + dx0 = xv[i] - xv[i - 1]; + dy0 = yv[i] - yv[i - 1]; + dx1 = xv[0] - xv[i]; + dy1 = yv[0] - yv[i]; + upper = 0; + } + else { + dx0 = xv[i] - xv[i - 1]; + dy0 = yv[i] - yv[i - 1]; + dx1 = xv[i + 1] - xv[i]; + dy1 = yv[i + 1] - yv[i]; + } + float32 cross = dx0 * dy1 - dx1 * dy0; + if (cross > 0) + return false; + b2Triangle myTri(xv[i], yv[i], xv[upper], yv[upper], + xv[lower], yv[lower]); + for (int32 j = 0; j < xvLength; ++j) { + if (j == i || j == lower || j == upper) + continue; + if (myTri.IsInside(xv[j], yv[j])) + return false; + } + return true; +} + +void ReversePolygon(b2Polygon& p){ + ReversePolygon(p.x,p.y,p.nVertices); +} + +void ReversePolygon(float* x, float* y, int n) { + if (n == 1) + return; + int32 low = 0; + int32 high = n - 1; + while (low < high) { + float32 buffer = x[low]; + x[low] = x[high]; + x[high] = buffer; + buffer = y[low]; + y[low] = y[high]; + y[high] = buffer; + ++low; + --high; + } +} + + /** + * Decomposes a non-convex polygon into a number of convex polygons, up + * to maxPolys (remaining pieces are thrown out, but the total number + * is returned, so the return value can be greater than maxPolys). + * + * Each resulting polygon will have no more than maxVerticesPerPolygon + * vertices (set to b2MaxPolyVertices by default, though you can change + * this). + * + * Returns -1 if operation fails (usually due to self-intersection of + * polygon). + */ +int32 DecomposeConvex(b2Polygon* p, b2Polygon* results, int32 maxPolys) { + if (p->nVertices < 3) return 0; + + b2Triangle* triangulated = new b2Triangle[p->nVertices - 2]; + int32 nTri; + if (p->IsCCW()) { + //printf("It is ccw \n"); + b2Polygon tempP; + tempP.Set(*p); + ReversePolygon(tempP.x, tempP.y, tempP.nVertices); + nTri = TriangulatePolygon(tempP.x, tempP.y, tempP.nVertices, triangulated); + //ReversePolygon(p->x, p->y, p->nVertices); //reset orientation + } else { + //printf("It is not ccw \n"); + nTri = TriangulatePolygon(p->x, p->y, p->nVertices, triangulated); + } + if (nTri < 1) { + //Still no luck? Oh well... + delete[] triangulated; + return -1; + } + int32 nPolys = PolygonizeTriangles(triangulated, nTri, results, maxPolys); + delete[] triangulated; + return nPolys; +} + + /** + * Decomposes a polygon into convex polygons and adds all pieces to a b2BodyDef + * using a prototype b2PolyDef. All fields of the prototype are used for every + * shape except the vertices (friction, restitution, density, etc). + * + * If you want finer control, you'll have to add everything by hand. + * + * This is the simplest method to add a complicated polygon to a body. + * + * Until Box2D's b2BodyDef behavior changes, this method returns a pointer to + * a heap-allocated array of b2PolyDefs, which must be deleted by the user + * after the b2BodyDef is added to the world. + */ +#if 0 +void DecomposeConvexAndAddTo(b2Polygon* p, b2Body* bd, b2FixtureDef* prototype) { + + if (p->nVertices < 3) return; + b2Polygon* decomposed = new b2Polygon[p->nVertices - 2]; //maximum number of polys + int32 nPolys = DecomposeConvex(p, decomposed, p->nVertices - 2); + //printf("npolys: %d",nPolys); + b2FixtureDef* pdarray = new b2FixtureDef[2*p->nVertices];//extra space in case of splits + int32 extra = 0; + for (int32 i = 0; i < nPolys; ++i) { + b2FixtureDef* toAdd = &pdarray[i+extra]; + *toAdd = *prototype; + //Hmm, shouldn't have to do all this... + /* + toAdd->type = prototype->type; + toAdd->friction = prototype->friction; + toAdd->restitution = prototype->restitution; + toAdd->density = prototype->density; + toAdd->userData = prototype->userData; + toAdd->categoryBits = prototype->categoryBits; + toAdd->maskBits = prototype->maskBits; + toAdd->groupIndex = prototype->groupIndex;//*/ + //decomposed[i].print(); + b2Polygon curr = decomposed[i]; + //TODO ewjordan: move this triangle handling to a better place so that + //it happens even if this convenience function is not called. + if (curr.nVertices == 3){ + //Check here for near-parallel edges, since we can't + //handle this in merge routine + for (int j=0; j<3; ++j){ + int32 lower = (j == 0) ? (curr.nVertices - 1) : (j - 1); + int32 middle = j; + int32 upper = (j == curr.nVertices - 1) ? (0) : (j + 1); + float32 dx0 = curr.x[middle] - curr.x[lower]; float32 dy0 = curr.y[middle] - curr.y[lower]; + float32 dx1 = curr.x[upper] - curr.x[middle]; float32 dy1 = curr.y[upper] - curr.y[middle]; + float32 norm0 = sqrtf(dx0*dx0+dy0*dy0); float32 norm1 = sqrtf(dx1*dx1+dy1*dy1); + if ( !(norm0 > 0.0f && norm1 > 0.0f) ) { + //Identical points, don't do anything! + goto Skip; + } + dx0 /= norm0; dy0 /= norm0; + dx1 /= norm1; dy1 /= norm1; + float32 cross = dx0 * dy1 - dx1 * dy0; + float32 dot = dx0*dx1 + dy0*dy1; + if (fabs(cross) < b2_angularSlop && dot > 0) { + //Angle too close, split the triangle across from this point. + //This is guaranteed to result in two triangles that satify + //the tolerance (one of the angles is 90 degrees) + float32 dx2 = curr.x[lower] - curr.x[upper]; float32 dy2 = curr.y[lower] - curr.y[upper]; + float32 norm2 = sqrtf(dx2*dx2+dy2*dy2); + if (norm2 == 0.0f) { + goto Skip; + } + dx2 /= norm2; dy2 /= norm2; + float32 thisArea = curr.GetArea(); + float32 thisHeight = 2.0f * thisArea / norm2; + float32 buffer2 = dx2; + dx2 = dy2; dy2 = -buffer2; + //Make two new polygons + //printf("dx2: %f, dy2: %f, thisHeight: %f, middle: %d\n",dx2,dy2,thisHeight,middle); + float32 newX1[3] = { curr.x[middle]+dx2*thisHeight, curr.x[lower], curr.x[middle] }; + float32 newY1[3] = { curr.y[middle]+dy2*thisHeight, curr.y[lower], curr.y[middle] }; + float32 newX2[3] = { newX1[0], curr.x[middle], curr.x[upper] }; + float32 newY2[3] = { newY1[0], curr.y[middle], curr.y[upper] }; + b2Polygon p1(newX1,newY1,3); + b2Polygon p2(newX2,newY2,3); + if (p1.IsUsable()){ + p1.AddTo(*toAdd); + + + bd->CreateFixture(toAdd); + ++extra; + } else if (B2_POLYGON_REPORT_ERRORS){ + printf("Didn't add unusable polygon. Dumping vertices:\n"); + p1.print(); + } + if (p2.IsUsable()){ + p2.AddTo(pdarray[i+extra]); + + bd->CreateFixture(toAdd); + } else if (B2_POLYGON_REPORT_ERRORS){ + printf("Didn't add unusable polygon. Dumping vertices:\n"); + p2.print(); + } + goto Skip; + } + } + + } + if (decomposed[i].IsUsable()){ + decomposed[i].AddTo(*toAdd); + + bd->CreateFixture((const b2FixtureDef*)toAdd); + } else if (B2_POLYGON_REPORT_ERRORS){ + printf("Didn't add unusable polygon. Dumping vertices:\n"); + decomposed[i].print(); + } +Skip: + ; + } + delete[] pdarray; + delete[] decomposed; + return;// pdarray; //needs to be deleted after body is created +} + +#endif + /** + * Find the convex hull of a point cloud using "Gift-wrap" algorithm - start + * with an extremal point, and walk around the outside edge by testing + * angles. + * + * Runs in O(N*S) time where S is number of sides of resulting polygon. + * Worst case: point cloud is all vertices of convex polygon -> O(N^2). + * + * There may be faster algorithms to do this, should you need one - + * this is just the simplest. You can get O(N log N) expected time if you + * try, I think, and O(N) if you restrict inputs to simple polygons. + * + * Returns null if number of vertices passed is less than 3. + * + * Results should be passed through convex decomposition afterwards + * to ensure that each shape has few enough points to be used in Box2d. + * + * FIXME?: May be buggy with colinear points on hull. Couldn't find a test + * case that resulted in wrong behavior. If one turns up, the solution is to + * supplement angle check with an equality resolver that always picks the + * longer edge. I think the current solution is working, though it sometimes + * creates an extra edge along a line. + */ + +b2Polygon ConvexHull(b2Vec2* v, int nVert) { + float32* cloudX = new float32[nVert]; + float32* cloudY = new float32[nVert]; + for (int32 i = 0; i < nVert; ++i) { + cloudX[i] = v[i].x; + cloudY[i] = v[i].y; + } + b2Polygon result = ConvexHull(cloudX, cloudY, nVert); + delete[] cloudX; + delete[] cloudY; + return result; +} + +b2Polygon ConvexHull(float32* cloudX, float32* cloudY, int32 nVert) { + b2Assert(nVert > 2); + int32* edgeList = new int32[nVert]; + int32 numEdges = 0; + + float32 minY = 1e10; + int32 minYIndex = nVert; + for (int32 i = 0; i < nVert; ++i) { + if (cloudY[i] < minY) { + minY = cloudY[i]; + minYIndex = i; + } + } + + int32 startIndex = minYIndex; + int32 winIndex = -1; + float32 dx = -1.0f; + float32 dy = 0.0f; + while (winIndex != minYIndex) { + float32 newdx = 0.0f; + float32 newdy = 0.0f; + float32 maxDot = -2.0f; + for (int32 i = 0; i < nVert; ++i) { + if (i == startIndex) + continue; + newdx = cloudX[i] - cloudX[startIndex]; + newdy = cloudY[i] - cloudY[startIndex]; + float32 nrm = sqrtf(newdx * newdx + newdy * newdy); + nrm = (nrm == 0.0f) ? 1.0f : nrm; + newdx /= nrm; + newdy /= nrm; + + //Cross and dot products act as proxy for angle + //without requiring inverse trig. + //FIXED: don't need cross test + //float32 newCross = newdx * dy - newdy * dx; + float32 newDot = newdx * dx + newdy * dy; + if (newDot > maxDot) {//newCross >= 0.0f && newDot > maxDot) { + maxDot = newDot; + winIndex = i; + } + } + edgeList[numEdges++] = winIndex; + dx = cloudX[winIndex] - cloudX[startIndex]; + dy = cloudY[winIndex] - cloudY[startIndex]; + float32 nrm = sqrtf(dx * dx + dy * dy); + nrm = (nrm == 0.0f) ? 1.0f : nrm; + dx /= nrm; + dy /= nrm; + startIndex = winIndex; + } + + float32* xres = new float32[numEdges]; + float32* yres = new float32[numEdges]; + for (int32 i = 0; i < numEdges; i++) { + xres[i] = cloudX[edgeList[i]]; + yres[i] = cloudY[edgeList[i]]; + //("%f, %f\n",xres[i],yres[i]); + } + + b2Polygon returnVal(xres, yres, numEdges); + + delete[] xres; + delete[] yres; + delete[] edgeList; + returnVal.MergeParallelEdges(b2_angularSlop); + return returnVal; +} + + +/* + * Given sines and cosines, tells if A's angle is less than B's on -Pi, Pi + * (in other words, is A "righter" than B) + */ +bool IsRighter(float32 sinA, float32 cosA, float32 sinB, float32 cosB){ + if (sinA < 0){ + if (sinB > 0 || cosA <= cosB) return true; + else return false; + } else { + if (sinB < 0 || cosA <= cosB) return false; + else return true; + } +} + +//Fix for obnoxious behavior for the % operator for negative numbers... +int32 remainder(int32 x, int32 modulus){ + int32 rem = x % modulus; + while (rem < 0){ + rem += modulus; + } + return rem; +} + +/* +Method: +Start at vertex with minimum y (pick maximum x one if there are two). +We aim our "lastDir" vector at (1.0, 0) +We look at the two rays going off from our start vertex, and follow whichever +has the smallest angle (in -Pi -> Pi) wrt lastDir ("rightest" turn) + +Loop until we hit starting vertex: + +We add our current vertex to the list. +We check the seg from current vertex to next vertex for intersections + - if no intersections, follow to next vertex and continue + - if intersections, pick one with minimum distance + - if more than one, pick one with "rightest" next point (two possibilities for each) + +*/ + +b2Polygon TraceEdge(b2Polygon* p){ + b2PolyNode* nodes = new b2PolyNode[p->nVertices*p->nVertices];//overkill, but sufficient (order of mag. is right) + int32 nNodes = 0; + + //Add base nodes (raw outline) + for (int32 i=0; i < p->nVertices; ++i){ + b2Vec2 pos(p->x[i],p->y[i]); + nodes[i].position = pos; + ++nNodes; + int32 iplus = (i==p->nVertices-1)?0:i+1; + int32 iminus = (i==0)?p->nVertices-1:i-1; + nodes[i].AddConnection(nodes[iplus]); + nodes[i].AddConnection(nodes[iminus]); + } + + //Process intersection nodes - horribly inefficient + bool dirty = true; + int counter = 0; + while (dirty){ + dirty = false; + for (int32 i=0; i < nNodes; ++i){ + for (int32 j=0; j < nodes[i].nConnected; ++j){ + for (int32 k=0; k < nNodes; ++k){ + if (k==i || &nodes[k] == nodes[i].connected[j]) continue; + for (int32 l=0; l < nodes[k].nConnected; ++l){ + + if ( nodes[k].connected[l] == nodes[i].connected[j] || + nodes[k].connected[l] == &nodes[i]) continue; + //Check intersection + b2Vec2 intersectPt; + //if (counter > 100) printf("checking intersection: %d, %d, %d, %d\n",i,j,k,l); + bool crosses = intersect(nodes[i].position,nodes[i].connected[j]->position, + nodes[k].position,nodes[k].connected[l]->position, + intersectPt); + if (crosses){ + /*if (counter > 100) { + printf("Found crossing at %f, %f\n",intersectPt.x, intersectPt.y); + printf("Locations: %f,%f - %f,%f | %f,%f - %f,%f\n", + nodes[i].position.x, nodes[i].position.y, + nodes[i].connected[j]->position.x, nodes[i].connected[j]->position.y, + nodes[k].position.x,nodes[k].position.y, + nodes[k].connected[l]->position.x,nodes[k].connected[l]->position.y); + printf("Memory addresses: %d, %d, %d, %d\n",(int)&nodes[i],(int)nodes[i].connected[j],(int)&nodes[k],(int)nodes[k].connected[l]); + }*/ + dirty = true; + //Destroy and re-hook connections at crossing point + b2PolyNode* connj = nodes[i].connected[j]; + b2PolyNode* connl = nodes[k].connected[l]; + nodes[i].connected[j]->RemoveConnection(nodes[i]); + nodes[i].RemoveConnection(*connj); + nodes[k].connected[l]->RemoveConnection(nodes[k]); + nodes[k].RemoveConnection(*connl); + nodes[nNodes] = b2PolyNode(intersectPt); + nodes[nNodes].AddConnection(nodes[i]); + nodes[i].AddConnection(nodes[nNodes]); + nodes[nNodes].AddConnection(nodes[k]); + nodes[k].AddConnection(nodes[nNodes]); + nodes[nNodes].AddConnection(*connj); + connj->AddConnection(nodes[nNodes]); + nodes[nNodes].AddConnection(*connl); + connl->AddConnection(nodes[nNodes]); + ++nNodes; + goto SkipOut; + } + } + } + } + } + SkipOut: + ++counter; + //if (counter > 100) printf("Counter: %d\n",counter); + } + + /* + // Debugging: check for connection consistency + for (int32 i=0; inConnected == 0) b2Assert(false); + b2PolyNode* connect = nodes[i].connected[j]; + bool found = false; + for (int32 k=0; knConnected; ++k) { + if (connect->connected[k] == &nodes[i]) found = true; + } + b2Assert(found); + } + }*/ + + //Collapse duplicate points + bool foundDupe = true; + int nActive = nNodes; + while (foundDupe){ + foundDupe = false; + for (int32 i=0; i < nNodes; ++i){ + if (nodes[i].nConnected == 0) continue; + for (int32 j=i+1; j < nNodes; ++j){ + if (nodes[j].nConnected == 0) continue; + b2Vec2 diff = nodes[i].position - nodes[j].position; + if (diff.LengthSquared() <= COLLAPSE_DIST_SQR){ + if (nActive <= 3) return b2Polygon(); + //printf("Found dupe, %d left\n",nActive); + --nActive; + foundDupe = true; + b2PolyNode* inode = &nodes[i]; + b2PolyNode* jnode = &nodes[j]; + //Move all of j's connections to i, and orphan j + int32 njConn = jnode->nConnected; + for (int32 k=0; k < njConn; ++k){ + b2PolyNode* knode = jnode->connected[k]; + b2Assert(knode != jnode); + if (knode != inode) { + inode->AddConnection(*knode); + knode->AddConnection(*inode); + } + knode->RemoveConnection(*jnode); + //printf("knode %d on node %d now has %d connections\n",k,j,knode->nConnected); + //printf("Found duplicate point.\n"); + } + /* + printf("Orphaning node at address %d\n",(int)jnode); + for (int32 k=0; kconnected[k]->IsConnectedTo(*jnode)) printf("Problem!!!\n"); + } + for (int32 k=0; k < njConn; ++k){ + jnode->RemoveConnectionByIndex(k); + } + */ + jnode->nConnected = 0; + } + } + } + } + + /* + // Debugging: check for connection consistency + for (int32 i=0; inConnected == 0) { + printf("Problem with node %d connection at address %d\n",i,(int)(nodes[i].connected[j])); + b2Assert(false); + } + b2PolyNode* connect = nodes[i].connected[j]; + bool found = false; + for (int32 k=0; knConnected; ++k) { + if (connect->connected[k] == &nodes[i]) found = true; + } + if (!found) printf("Connection %d (of %d) on node %d (of %d) did not have reciprocal connection.\n",j,nConn,i,nNodes); + b2Assert(found); + } + }//*/ + + //Now walk the edge of the list + + //Find node with minimum y value (max x if equal) + float32 minY = 1e10; + float32 maxX = -1e10; + int32 minYIndex = -1; + for (int32 i = 0; i < nNodes; ++i) { + if (nodes[i].position.y < minY && nodes[i].nConnected > 1) { + minY = nodes[i].position.y; + minYIndex = i; + maxX = nodes[i].position.x; + } else if (nodes[i].position.y == minY && nodes[i].position.x > maxX && nodes[i].nConnected > 1) { + minYIndex = i; + maxX = nodes[i].position.x; + } + } + + b2Vec2 origDir(1.0f,0.0f); + b2Vec2* resultVecs = new b2Vec2[4*nNodes];// nodes may be visited more than once, unfortunately - change to growable array! + int32 nResultVecs = 0; + b2PolyNode* currentNode = &nodes[minYIndex]; + b2PolyNode* startNode = currentNode; + b2Assert(currentNode->nConnected > 0); + b2PolyNode* nextNode = currentNode->GetRightestConnection(origDir); + if (!nextNode) goto CleanUp; // Borked, clean up our mess and return + resultVecs[0] = startNode->position; + ++nResultVecs; + while (nextNode != startNode){ + if (nResultVecs > 4*nNodes){ + /* + printf("%d, %d, %d\n",(int)startNode,(int)currentNode,(int)nextNode); + printf("%f, %f -> %f, %f\n",currentNode->position.x,currentNode->position.y, nextNode->position.x, nextNode->position.y); + p->printFormatted(); + printf("Dumping connection graph: \n"); + for (int32 i=0; iposition.x, i,j,nodes[i].connected[j]->position.y); + } + } + printf("Dumping results thus far: \n"); + for (int32 i=0; iposition; + b2PolyNode* oldNode = currentNode; + currentNode = nextNode; + //printf("Old node connections = %d; address %d\n",oldNode->nConnected, (int)oldNode); + //printf("Current node connections = %d; address %d\n",currentNode->nConnected, (int)currentNode); + nextNode = currentNode->GetRightestConnection(oldNode); + if (!nextNode) goto CleanUp; // There was a problem, so jump out of the loop and use whatever garbage we've generated so far + //printf("nextNode address: %d\n",(int)nextNode); + } + + CleanUp: + + float32* xres = new float32[nResultVecs]; + float32* yres = new float32[nResultVecs]; + for (int32 i=0; iposition){ + isFound = true; + foundIndex = i; + break; + } + } + b2Assert(isFound); + --nConnected; + //printf("nConnected: %d\n",nConnected); + for (int32 i=foundIndex; i < nConnected; ++i){ + connected[i] = connected[i+1]; + } +} +void b2PolyNode::RemoveConnectionByIndex(int32 index){ + --nConnected; + //printf("New nConnected = %d\n",nConnected); + for (int32 i=index; i < nConnected; ++i){ + connected[i] = connected[i+1]; + } +} +bool b2PolyNode::IsConnectedTo(b2PolyNode& me){ + bool isFound = false; + for (int32 i=0; iposition){ + isFound = true; + break; + } + } + return isFound; +} +b2PolyNode* b2PolyNode::GetRightestConnection(b2PolyNode* incoming){ + if (nConnected == 0) b2Assert(false); // This means the connection graph is inconsistent + if (nConnected == 1) { + //b2Assert(false); + // Because of the possibility of collapsing nearby points, + // we may end up with "spider legs" dangling off of a region. + // The correct behavior here is to turn around. + return incoming; + } + b2Vec2 inDir = position - incoming->position; + float32 inLength = inDir.Normalize(); + b2Assert(inLength > CMP_EPSILON); + + b2PolyNode* result = NULL; + for (int32 i=0; iposition - position; + float32 testLengthSqr = testDir.LengthSquared(); + testDir.Normalize(); + /* + if (testLengthSqr < COLLAPSE_DIST_SQR) { + printf("Problem with connection %d\n",i); + printf("This node has %d connections\n",nConnected); + printf("That one has %d\n",connected[i]->nConnected); + if (this == connected[i]) printf("This points at itself.\n"); + }*/ + b2Assert (testLengthSqr >= COLLAPSE_DIST_SQR); + float32 myCos = b2Dot(inDir,testDir); + float32 mySin = b2Cross(inDir,testDir); + if (result){ + b2Vec2 resultDir = result->position - position; + resultDir.Normalize(); + float32 resCos = b2Dot(inDir,resultDir); + float32 resSin = b2Cross(inDir,resultDir); + if (IsRighter(mySin,myCos,resSin,resCos)){ + result = connected[i]; + } + } else{ + result = connected[i]; + } + } + if (B2_POLYGON_REPORT_ERRORS && !result) { + printf("nConnected = %d\n",nConnected); + for (int32 i=0; i +#include +namespace b2ConvexDecomp { + +static bool B2_POLYGON_REPORT_ERRORS = false; + +class b2Polygon; + +int32 remainder(int32 x, int32 modulus); +int32 TriangulatePolygon(float32* xv, float32* yv, int32 vNum, b2Triangle* results); +bool IsEar(int32 i, float32* xv, float32* yv, int32 xvLength); //Not for external use +int32 PolygonizeTriangles(b2Triangle* triangulated, int32 triangulatedLength, b2Polygon* polys, int32 polysLength); +int32 DecomposeConvex(b2Polygon* p, b2Polygon* results, int32 maxPolys); +//void DecomposeConvexAndAddTo(b2Polygon* p, b2Body* bd, b2FixtureDef* prototype); + +void ReversePolygon(float32* x, float32* y, int n); + +b2Polygon TraceEdge(b2Polygon* p); //For use with self-intersecting polygons, finds outline + +class b2Polygon { + +public: + const static int32 maxVerticesPerPolygon = b2_maxPolygonVertices; + + float32* x; //vertex arrays + float32* y; + int32 nVertices; + + float32 area; + bool areaIsSet; + + b2Polygon(float32* _x, float32* _y, int32 nVert); + b2Polygon(b2Vec2* v, int32 nVert); + b2Polygon(); + ~b2Polygon(); + + float32 GetArea(); + + void MergeParallelEdges(float32 tolerance); + b2Vec2* GetVertexVecs(); + b2Polygon(b2Triangle& t); + void Set(const b2Polygon& p); + bool IsConvex(); + bool IsCCW(); + bool IsUsable(bool printError); + bool IsUsable(); + bool IsSimple(); + // void AddTo(b2FixtureDef& pd); + + b2Polygon* Add(b2Triangle& t); + + void print(){ + printFormatted(); + /* + for (int32 i=0; i0); + if (ccw){ + x[0] = x1; x[1] = x2; x[2] = x3; + y[0] = y1; y[1] = y2; y[2] = y3; + } else{ + x[0] = x1; x[1] = x3; x[2] = x2; + y[0] = y1; y[1] = y3; y[2] = y2; + } +} + +b2Triangle::b2Triangle(){ + x = new float32[3]; + y = new float32[3]; +} + +b2Triangle::~b2Triangle(){ + delete[] x; + delete[] y; +} + +void b2Triangle::Set(const b2Triangle& toMe) { + for (int32 i=0; i<3; ++i) { + x[i] = toMe.x[i]; + y[i] = toMe.y[i]; + } +} + +bool b2Triangle::IsInside(float32 _x, float32 _y){ + if (_x < x[0] && _x < x[1] && _x < x[2]) return false; + if (_x > x[0] && _x > x[1] && _x > x[2]) return false; + if (_y < y[0] && _y < y[1] && _y < y[2]) return false; + if (_y > y[0] && _y > y[1] && _y > y[2]) return false; + + float32 vx2 = _x-x[0]; float32 vy2 = _y-y[0]; + float32 vx1 = x[1]-x[0]; float32 vy1 = y[1]-y[0]; + float32 vx0 = x[2]-x[0]; float32 vy0 = y[2]-y[0]; + + float32 dot00 = vx0*vx0+vy0*vy0; + float32 dot01 = vx0*vx1+vy0*vy1; + float32 dot02 = vx0*vx2+vy0*vy2; + float32 dot11 = vx1*vx1+vy1*vy1; + float32 dot12 = vx1*vx2+vy1*vy2; + float32 invDenom = 1.0f / (dot00*dot11 - dot01*dot01); + float32 u = (dot11*dot02 - dot01*dot12)*invDenom; + float32 v = (dot00*dot12 - dot01*dot02)*invDenom; + + return ((u>=0)&&(v>=0)&&(u+v<=1)); +} + + +} diff --git a/thirdparty/b2d_convexdecomp/b2Triangle.h b/thirdparty/b2d_convexdecomp/b2Triangle.h new file mode 100644 index 0000000000..99ab5cba69 --- /dev/null +++ b/thirdparty/b2d_convexdecomp/b2Triangle.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2007 Eric Jordan + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#ifndef B2_TRIANGLE_H +#define B2_TRIANGLE_H + +#include "b2Glue.h" + +namespace b2ConvexDecomp { + + + +class b2Triangle{ +public: + float* x; + float* y; + b2Triangle(); + b2Triangle(float32 x1, float32 y1, float32 x2, float32 y2, float32 x3, float32 y3); + ~b2Triangle(); + bool IsInside(float32 _x, float32 _y); + void Set(const b2Triangle& toMe); + +}; + +} +#endif diff --git a/thirdparty/misc/curl_hostcheck.c b/thirdparty/misc/curl_hostcheck.c new file mode 100644 index 0000000000..feef232619 --- /dev/null +++ b/thirdparty/misc/curl_hostcheck.c @@ -0,0 +1,217 @@ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2012, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ + +/* This file is an amalgamation of hostcheck.c and most of rawstr.c + from cURL. The contents of the COPYING file mentioned above are: + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1996 - 2013, Daniel Stenberg, . + +All rights reserved. + +Permission to use, copy, modify, and distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright +notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN +NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall not +be used in advertising or otherwise to promote the sale, use or other dealings +in this Software without prior written authorization of the copyright holder. +*/ + +#include "curl_hostcheck.h" +#include + +/* Portable, consistent toupper (remember EBCDIC). Do not use toupper() because + its behavior is altered by the current locale. */ +static char Curl_raw_toupper(char in) +{ + switch (in) { + case 'a': + return 'A'; + case 'b': + return 'B'; + case 'c': + return 'C'; + case 'd': + return 'D'; + case 'e': + return 'E'; + case 'f': + return 'F'; + case 'g': + return 'G'; + case 'h': + return 'H'; + case 'i': + return 'I'; + case 'j': + return 'J'; + case 'k': + return 'K'; + case 'l': + return 'L'; + case 'm': + return 'M'; + case 'n': + return 'N'; + case 'o': + return 'O'; + case 'p': + return 'P'; + case 'q': + return 'Q'; + case 'r': + return 'R'; + case 's': + return 'S'; + case 't': + return 'T'; + case 'u': + return 'U'; + case 'v': + return 'V'; + case 'w': + return 'W'; + case 'x': + return 'X'; + case 'y': + return 'Y'; + case 'z': + return 'Z'; + } + return in; +} + +/* + * Curl_raw_equal() is for doing "raw" case insensitive strings. This is meant + * to be locale independent and only compare strings we know are safe for + * this. See http://daniel.haxx.se/blog/2008/10/15/strcasecmp-in-turkish/ for + * some further explanation to why this function is necessary. + * + * The function is capable of comparing a-z case insensitively even for + * non-ascii. + */ + +static int Curl_raw_equal(const char *first, const char *second) +{ + while(*first && *second) { + if(Curl_raw_toupper(*first) != Curl_raw_toupper(*second)) + /* get out of the loop as soon as they don't match */ + break; + first++; + second++; + } + /* we do the comparison here (possibly again), just to make sure that if the + loop above is skipped because one of the strings reached zero, we must not + return this as a successful match */ + return (Curl_raw_toupper(*first) == Curl_raw_toupper(*second)); +} + +static int Curl_raw_nequal(const char *first, const char *second, size_t max) +{ + while(*first && *second && max) { + if(Curl_raw_toupper(*first) != Curl_raw_toupper(*second)) { + break; + } + max--; + first++; + second++; + } + if(0 == max) + return 1; /* they are equal this far */ + + return Curl_raw_toupper(*first) == Curl_raw_toupper(*second); +} + +/* + * Match a hostname against a wildcard pattern. + * E.g. + * "foo.host.com" matches "*.host.com". + * + * We use the matching rule described in RFC6125, section 6.4.3. + * http://tools.ietf.org/html/rfc6125#section-6.4.3 + */ + +static int hostmatch(const char *hostname, const char *pattern) +{ + const char *pattern_label_end, *pattern_wildcard, *hostname_label_end; + int wildcard_enabled; + size_t prefixlen, suffixlen; + pattern_wildcard = strchr(pattern, '*'); + if(pattern_wildcard == NULL) + return Curl_raw_equal(pattern, hostname) ? + CURL_HOST_MATCH : CURL_HOST_NOMATCH; + + /* We require at least 2 dots in pattern to avoid too wide wildcard + match. */ + wildcard_enabled = 1; + pattern_label_end = strchr(pattern, '.'); + if(pattern_label_end == NULL || strchr(pattern_label_end+1, '.') == NULL || + pattern_wildcard > pattern_label_end || + Curl_raw_nequal(pattern, "xn--", 4)) { + wildcard_enabled = 0; + } + if(!wildcard_enabled) + return Curl_raw_equal(pattern, hostname) ? + CURL_HOST_MATCH : CURL_HOST_NOMATCH; + + hostname_label_end = strchr(hostname, '.'); + if(hostname_label_end == NULL || + !Curl_raw_equal(pattern_label_end, hostname_label_end)) + return CURL_HOST_NOMATCH; + + /* The wildcard must match at least one character, so the left-most + label of the hostname is at least as large as the left-most label + of the pattern. */ + if(hostname_label_end - hostname < pattern_label_end - pattern) + return CURL_HOST_NOMATCH; + + prefixlen = pattern_wildcard - pattern; + suffixlen = pattern_label_end - (pattern_wildcard+1); + return Curl_raw_nequal(pattern, hostname, prefixlen) && + Curl_raw_nequal(pattern_wildcard+1, hostname_label_end - suffixlen, + suffixlen) ? + CURL_HOST_MATCH : CURL_HOST_NOMATCH; +} + +int Tool_Curl_cert_hostcheck(const char *match_pattern, const char *hostname) +{ + if(!match_pattern || !*match_pattern || + !hostname || !*hostname) /* sanity check */ + return 0; + + if(Curl_raw_equal(hostname, match_pattern)) /* trivial case */ + return 1; + + if(hostmatch(hostname,match_pattern) == CURL_HOST_MATCH) + return 1; + return 0; +} diff --git a/thirdparty/misc/curl_hostcheck.h b/thirdparty/misc/curl_hostcheck.h new file mode 100644 index 0000000000..1b7fbe81e3 --- /dev/null +++ b/thirdparty/misc/curl_hostcheck.h @@ -0,0 +1,39 @@ +#ifndef HEADER_TOOL_CURL_HOSTCHECK_H +#define HEADER_TOOL_CURL_HOSTCHECK_H + +#ifdef __cplusplus +extern "C" { +#endif + +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2012, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ + +#define CURL_HOST_NOMATCH 0 +#define CURL_HOST_MATCH 1 +int Tool_Curl_cert_hostcheck(const char *match_pattern, const char *hostname); + +#ifdef __cplusplus +} +#endif + +#endif /* HEADER_CURL_HOSTCHECK_H */ + diff --git a/thirdparty/misc/mikktspace.c b/thirdparty/misc/mikktspace.c new file mode 100644 index 0000000000..62aa2da251 --- /dev/null +++ b/thirdparty/misc/mikktspace.c @@ -0,0 +1,1890 @@ +/** \file mikktspace/mikktspace.c + * \ingroup mikktspace + */ +/** + * Copyright (C) 2011 by Morten S. Mikkelsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#include +#include +#include +#include +#include +#include + +#include "mikktspace.h" + +#define TFALSE 0 +#define TTRUE 1 + +#ifndef M_PI +#define M_PI 3.1415926535897932384626433832795 +#endif + +#define INTERNAL_RND_SORT_SEED 39871946 + +// internal structure +typedef struct { + float x, y, z; +} SVec3; + +static tbool veq( const SVec3 v1, const SVec3 v2 ) +{ + return (v1.x == v2.x) && (v1.y == v2.y) && (v1.z == v2.z); +} + +static SVec3 vadd( const SVec3 v1, const SVec3 v2 ) +{ + SVec3 vRes; + + vRes.x = v1.x + v2.x; + vRes.y = v1.y + v2.y; + vRes.z = v1.z + v2.z; + + return vRes; +} + + +static SVec3 vsub( const SVec3 v1, const SVec3 v2 ) +{ + SVec3 vRes; + + vRes.x = v1.x - v2.x; + vRes.y = v1.y - v2.y; + vRes.z = v1.z - v2.z; + + return vRes; +} + +static SVec3 vscale(const float fS, const SVec3 v) +{ + SVec3 vRes; + + vRes.x = fS * v.x; + vRes.y = fS * v.y; + vRes.z = fS * v.z; + + return vRes; +} + +static float LengthSquared( const SVec3 v ) +{ + return v.x*v.x + v.y*v.y + v.z*v.z; +} + +static float Length( const SVec3 v ) +{ + return sqrtf(LengthSquared(v)); +} + +static SVec3 Normalize( const SVec3 v ) +{ + return vscale(1 / Length(v), v); +} + +static float vdot( const SVec3 v1, const SVec3 v2) +{ + return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z; +} + + +static tbool NotZero(const float fX) +{ + // could possibly use FLT_EPSILON instead + return fabsf(fX) > FLT_MIN; +} + +static tbool VNotZero(const SVec3 v) +{ + // might change this to an epsilon based test + return NotZero(v.x) || NotZero(v.y) || NotZero(v.z); +} + + + +typedef struct { + int iNrFaces; + int * pTriMembers; +} SSubGroup; + +typedef struct { + int iNrFaces; + int * pFaceIndices; + int iVertexRepresentitive; + tbool bOrientPreservering; +} SGroup; + +// +#define MARK_DEGENERATE 1 +#define QUAD_ONE_DEGEN_TRI 2 +#define GROUP_WITH_ANY 4 +#define ORIENT_PRESERVING 8 + + + +typedef struct { + int FaceNeighbors[3]; + SGroup * AssignedGroup[3]; + + // normalized first order face derivatives + SVec3 vOs, vOt; + float fMagS, fMagT; // original magnitudes + + // determines if the current and the next triangle are a quad. + int iOrgFaceNumber; + int iFlag, iTSpacesOffs; + unsigned char vert_num[4]; +} STriInfo; + +typedef struct { + SVec3 vOs; + float fMagS; + SVec3 vOt; + float fMagT; + int iCounter; // this is to average back into quads. + tbool bOrient; +} STSpace; + +static int GenerateInitialVerticesIndexList(STriInfo pTriInfos[], int piTriList_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); +static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); +static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); +static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn); +static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], const SGroup pGroups[], + const int iNrActiveGroups, const int piTriListIn[], const float fThresCos, + const SMikkTSpaceContext * pContext); + +static int MakeIndex(const int iFace, const int iVert) +{ + assert(iVert>=0 && iVert<4 && iFace>=0); + return (iFace<<2) | (iVert&0x3); +} + +static void IndexToData(int * piFace, int * piVert, const int iIndexIn) +{ + piVert[0] = iIndexIn&0x3; + piFace[0] = iIndexIn>>2; +} + +static STSpace AvgTSpace(const STSpace * pTS0, const STSpace * pTS1) +{ + STSpace ts_res; + + // this if is important. Due to floating point precision + // averaging when ts0==ts1 will cause a slight difference + // which results in tangent space splits later on + if (pTS0->fMagS==pTS1->fMagS && pTS0->fMagT==pTS1->fMagT && + veq(pTS0->vOs,pTS1->vOs) && veq(pTS0->vOt, pTS1->vOt)) + { + ts_res.fMagS = pTS0->fMagS; + ts_res.fMagT = pTS0->fMagT; + ts_res.vOs = pTS0->vOs; + ts_res.vOt = pTS0->vOt; + } + else + { + ts_res.fMagS = 0.5f*(pTS0->fMagS+pTS1->fMagS); + ts_res.fMagT = 0.5f*(pTS0->fMagT+pTS1->fMagT); + ts_res.vOs = vadd(pTS0->vOs,pTS1->vOs); + ts_res.vOt = vadd(pTS0->vOt,pTS1->vOt); + if ( VNotZero(ts_res.vOs) ) ts_res.vOs = Normalize(ts_res.vOs); + if ( VNotZero(ts_res.vOt) ) ts_res.vOt = Normalize(ts_res.vOt); + } + + return ts_res; +} + + + +static SVec3 GetPosition(const SMikkTSpaceContext * pContext, const int index); +static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index); +static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index); + + +// degen triangles +static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int iNrTrianglesIn, const int iTotTris); +static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn, const int iTotTris); + + +tbool genTangSpaceDefault(const SMikkTSpaceContext * pContext) +{ + return genTangSpace(pContext, 180.0f); +} + +tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThreshold) +{ + // count nr_triangles + int * piTriListIn = NULL, * piGroupTrianglesBuffer = NULL; + STriInfo * pTriInfos = NULL; + SGroup * pGroups = NULL; + STSpace * psTspace = NULL; + int iNrTrianglesIn = 0, f=0, t=0, i=0; + int iNrTSPaces = 0, iTotTris = 0, iDegenTriangles = 0, iNrMaxGroups = 0; + int iNrActiveGroups = 0, index = 0; + const int iNrFaces = pContext->m_pInterface->m_getNumFaces(pContext); + tbool bRes = TFALSE; + const float fThresCos = (float) cos((fAngularThreshold*(float)M_PI)/180.0f); + + // verify all call-backs have been set + if ( pContext->m_pInterface->m_getNumFaces==NULL || + pContext->m_pInterface->m_getNumVerticesOfFace==NULL || + pContext->m_pInterface->m_getPosition==NULL || + pContext->m_pInterface->m_getNormal==NULL || + pContext->m_pInterface->m_getTexCoord==NULL ) + return TFALSE; + + // count triangles on supported faces + for (f=0; fm_pInterface->m_getNumVerticesOfFace(pContext, f); + if (verts==3) ++iNrTrianglesIn; + else if (verts==4) iNrTrianglesIn += 2; + } + if (iNrTrianglesIn<=0) return TFALSE; + + // allocate memory for an index list + piTriListIn = (int *) malloc(sizeof(int)*3*iNrTrianglesIn); + pTriInfos = (STriInfo *) malloc(sizeof(STriInfo)*iNrTrianglesIn); + if (piTriListIn==NULL || pTriInfos==NULL) + { + if (piTriListIn!=NULL) free(piTriListIn); + if (pTriInfos!=NULL) free(pTriInfos); + return TFALSE; + } + + // make an initial triangle --> face index list + iNrTSPaces = GenerateInitialVerticesIndexList(pTriInfos, piTriListIn, pContext, iNrTrianglesIn); + + // make a welded index list of identical positions and attributes (pos, norm, texc) + //printf("gen welded index list begin\n"); + GenerateSharedVerticesIndexList(piTriListIn, pContext, iNrTrianglesIn); + //printf("gen welded index list end\n"); + + // Mark all degenerate triangles + iTotTris = iNrTrianglesIn; + iDegenTriangles = 0; + for (t=0; tm_pInterface->m_getNumVerticesOfFace(pContext, f); + if (verts!=3 && verts!=4) continue; + + + // I've decided to let degenerate triangles and group-with-anythings + // vary between left/right hand coordinate systems at the vertices. + // All healthy triangles on the other hand are built to always be either or. + + /*// force the coordinate system orientation to be uniform for every face. + // (this is already the case for good triangles but not for + // degenerate ones and those with bGroupWithAnything==true) + bool bOrient = psTspace[index].bOrient; + if (psTspace[index].iCounter == 0) // tspace was not derived from a group + { + // look for a space created in GenerateTSpaces() by iCounter>0 + bool bNotFound = true; + int i=1; + while (i 0) bNotFound=false; + else ++i; + } + if (!bNotFound) bOrient = psTspace[index+i].bOrient; + }*/ + + // set data + for (i=0; ivOs.x, pTSpace->vOs.y, pTSpace->vOs.z}; + float bitang[] = {pTSpace->vOt.x, pTSpace->vOt.y, pTSpace->vOt.z}; + if (pContext->m_pInterface->m_setTSpace!=NULL) + pContext->m_pInterface->m_setTSpace(pContext, tang, bitang, pTSpace->fMagS, pTSpace->fMagT, pTSpace->bOrient, f, i); + if (pContext->m_pInterface->m_setTSpaceBasic!=NULL) + pContext->m_pInterface->m_setTSpaceBasic(pContext, tang, pTSpace->bOrient==TTRUE ? 1.0f : (-1.0f), f, i); + + ++index; + } + } + + free(psTspace); + + + return TTRUE; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef struct { + float vert[3]; + int index; +} STmpVert; + +static const int g_iCells = 2048; + +#ifdef _MSC_VER + #define NOINLINE __declspec(noinline) +#else + #define NOINLINE __attribute__ ((noinline)) +#endif + +// it is IMPORTANT that this function is called to evaluate the hash since +// inlining could potentially reorder instructions and generate different +// results for the same effective input value fVal. +static NOINLINE int FindGridCell(const float fMin, const float fMax, const float fVal) +{ + const float fIndex = g_iCells * ((fVal-fMin)/(fMax-fMin)); + const int iIndex = (int)fIndex; + return iIndex < g_iCells ? (iIndex >= 0 ? iIndex : 0) : (g_iCells - 1); +} + +static void MergeVertsFast(int piTriList_in_and_out[], STmpVert pTmpVert[], const SMikkTSpaceContext * pContext, const int iL_in, const int iR_in); +static void MergeVertsSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int pTable[], const int iEntries); +static void GenerateSharedVerticesIndexListSlow(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn); + +static void GenerateSharedVerticesIndexList(int piTriList_in_and_out[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) +{ + + // Generate bounding box + int * piHashTable=NULL, * piHashCount=NULL, * piHashOffsets=NULL, * piHashCount2=NULL; + STmpVert * pTmpVert = NULL; + int i=0, iChannel=0, k=0, e=0; + int iMaxCount=0; + SVec3 vMin = GetPosition(pContext, 0), vMax = vMin, vDim; + float fMin, fMax; + for (i=1; i<(iNrTrianglesIn*3); i++) + { + const int index = piTriList_in_and_out[i]; + + const SVec3 vP = GetPosition(pContext, index); + if (vMin.x > vP.x) vMin.x = vP.x; + else if (vMax.x < vP.x) vMax.x = vP.x; + if (vMin.y > vP.y) vMin.y = vP.y; + else if (vMax.y < vP.y) vMax.y = vP.y; + if (vMin.z > vP.z) vMin.z = vP.z; + else if (vMax.z < vP.z) vMax.z = vP.z; + } + + vDim = vsub(vMax,vMin); + iChannel = 0; + fMin = vMin.x; fMax=vMax.x; + if (vDim.y>vDim.x && vDim.y>vDim.z) + { + iChannel=1; + fMin = vMin.y, fMax=vMax.y; + } + else if (vDim.z>vDim.x) + { + iChannel=2; + fMin = vMin.z, fMax=vMax.z; + } + + // make allocations + piHashTable = (int *) malloc(sizeof(int)*iNrTrianglesIn*3); + piHashCount = (int *) malloc(sizeof(int)*g_iCells); + piHashOffsets = (int *) malloc(sizeof(int)*g_iCells); + piHashCount2 = (int *) malloc(sizeof(int)*g_iCells); + + if (piHashTable==NULL || piHashCount==NULL || piHashOffsets==NULL || piHashCount2==NULL) + { + if (piHashTable!=NULL) free(piHashTable); + if (piHashCount!=NULL) free(piHashCount); + if (piHashOffsets!=NULL) free(piHashOffsets); + if (piHashCount2!=NULL) free(piHashCount2); + GenerateSharedVerticesIndexListSlow(piTriList_in_and_out, pContext, iNrTrianglesIn); + return; + } + memset(piHashCount, 0, sizeof(int)*g_iCells); + memset(piHashCount2, 0, sizeof(int)*g_iCells); + + // count amount of elements in each cell unit + for (i=0; i<(iNrTrianglesIn*3); i++) + { + const int index = piTriList_in_and_out[i]; + const SVec3 vP = GetPosition(pContext, index); + const float fVal = iChannel==0 ? vP.x : (iChannel==1 ? vP.y : vP.z); + const int iCell = FindGridCell(fMin, fMax, fVal); + ++piHashCount[iCell]; + } + + // evaluate start index of each cell. + piHashOffsets[0]=0; + for (k=1; kpTmpVert[l].vert[c]) fvMin[c]=pTmpVert[l].vert[c]; + else if (fvMax[c]dx && dy>dz) channel=1; + else if (dz>dx) channel=2; + + fSep = 0.5f*(fvMax[channel]+fvMin[channel]); + + // terminate recursion when the separation/average value + // is no longer strictly between fMin and fMax values. + if (fSep>=fvMax[channel] || fSep<=fvMin[channel]) + { + // complete the weld + for (l=iL_in; l<=iR_in; l++) + { + int i = pTmpVert[l].index; + const int index = piTriList_in_and_out[i]; + const SVec3 vP = GetPosition(pContext, index); + const SVec3 vN = GetNormal(pContext, index); + const SVec3 vT = GetTexCoord(pContext, index); + + tbool bNotFound = TTRUE; + int l2=iL_in, i2rec=-1; + while (l20); // at least 2 entries + + // separate (by fSep) all points between iL_in and iR_in in pTmpVert[] + while (iL < iR) + { + tbool bReadyLeftSwap = TFALSE, bReadyRightSwap = TFALSE; + while ((!bReadyLeftSwap) && iL=iL_in && iL<=iR_in); + bReadyLeftSwap = !(pTmpVert[iL].vert[channel]=iL_in && iR<=iR_in); + bReadyRightSwap = pTmpVert[iR].vert[channel]m_pInterface->m_getNumFaces(pContext); f++) + { + const int verts = pContext->m_pInterface->m_getNumVerticesOfFace(pContext, f); + if (verts!=3 && verts!=4) continue; + + pTriInfos[iDstTriIndex].iOrgFaceNumber = f; + pTriInfos[iDstTriIndex].iTSpacesOffs = iTSpacesOffs; + + if (verts==3) + { + unsigned char * pVerts = pTriInfos[iDstTriIndex].vert_num; + pVerts[0]=0; pVerts[1]=1; pVerts[2]=2; + piTriList_out[iDstTriIndex*3+0] = MakeIndex(f, 0); + piTriList_out[iDstTriIndex*3+1] = MakeIndex(f, 1); + piTriList_out[iDstTriIndex*3+2] = MakeIndex(f, 2); + ++iDstTriIndex; // next + } + else + { + { + pTriInfos[iDstTriIndex+1].iOrgFaceNumber = f; + pTriInfos[iDstTriIndex+1].iTSpacesOffs = iTSpacesOffs; + } + + { + // need an order independent way to evaluate + // tspace on quads. This is done by splitting + // along the shortest diagonal. + const int i0 = MakeIndex(f, 0); + const int i1 = MakeIndex(f, 1); + const int i2 = MakeIndex(f, 2); + const int i3 = MakeIndex(f, 3); + const SVec3 T0 = GetTexCoord(pContext, i0); + const SVec3 T1 = GetTexCoord(pContext, i1); + const SVec3 T2 = GetTexCoord(pContext, i2); + const SVec3 T3 = GetTexCoord(pContext, i3); + const float distSQ_02 = LengthSquared(vsub(T2,T0)); + const float distSQ_13 = LengthSquared(vsub(T3,T1)); + tbool bQuadDiagIs_02; + if (distSQ_02m_pInterface->m_getPosition(pContext, pos, iF, iI); + res.x=pos[0]; res.y=pos[1]; res.z=pos[2]; + return res; +} + +static SVec3 GetNormal(const SMikkTSpaceContext * pContext, const int index) +{ + int iF, iI; + SVec3 res; float norm[3]; + IndexToData(&iF, &iI, index); + pContext->m_pInterface->m_getNormal(pContext, norm, iF, iI); + res.x=norm[0]; res.y=norm[1]; res.z=norm[2]; + return res; +} + +static SVec3 GetTexCoord(const SMikkTSpaceContext * pContext, const int index) +{ + int iF, iI; + SVec3 res; float texc[2]; + IndexToData(&iF, &iI, index); + pContext->m_pInterface->m_getTexCoord(pContext, texc, iF, iI); + res.x=texc[0]; res.y=texc[1]; res.z=1.0f; + return res; +} + +///////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////////// + +typedef union { + struct + { + int i0, i1, f; + }; + int array[3]; +} SEdge; + +static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int piTriListIn[], const int iNrTrianglesIn); +static void BuildNeighborsSlow(STriInfo pTriInfos[], const int piTriListIn[], const int iNrTrianglesIn); + +// returns the texture area times 2 +static float CalcTexArea(const SMikkTSpaceContext * pContext, const int indices[]) +{ + const SVec3 t1 = GetTexCoord(pContext, indices[0]); + const SVec3 t2 = GetTexCoord(pContext, indices[1]); + const SVec3 t3 = GetTexCoord(pContext, indices[2]); + + const float t21x = t2.x-t1.x; + const float t21y = t2.y-t1.y; + const float t31x = t3.x-t1.x; + const float t31y = t3.y-t1.y; + + const float fSignedAreaSTx2 = t21x*t31y - t21y*t31x; + + return fSignedAreaSTx2<0 ? (-fSignedAreaSTx2) : fSignedAreaSTx2; +} + +static void InitTriInfo(STriInfo pTriInfos[], const int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn) +{ + int f=0, i=0, t=0; + // pTriInfos[f].iFlag is cleared in GenerateInitialVerticesIndexList() which is called before this function. + + // generate neighbor info list + for (f=0; f0 ? ORIENT_PRESERVING : 0); + + if ( NotZero(fSignedAreaSTx2) ) + { + const float fAbsArea = fabsf(fSignedAreaSTx2); + const float fLenOs = Length(vOs); + const float fLenOt = Length(vOt); + const float fS = (pTriInfos[f].iFlag&ORIENT_PRESERVING)==0 ? (-1.0f) : 1.0f; + if ( NotZero(fLenOs) ) pTriInfos[f].vOs = vscale(fS/fLenOs, vOs); + if ( NotZero(fLenOt) ) pTriInfos[f].vOt = vscale(fS/fLenOt, vOt); + + // evaluate magnitudes prior to normalization of vOs and vOt + pTriInfos[f].fMagS = fLenOs / fAbsArea; + pTriInfos[f].fMagT = fLenOt / fAbsArea; + + // if this is a good triangle + if ( NotZero(pTriInfos[f].fMagS) && NotZero(pTriInfos[f].fMagT)) + pTriInfos[f].iFlag &= (~GROUP_WITH_ANY); + } + } + + // force otherwise healthy quads to a fixed orientation + while (t<(iNrTrianglesIn-1)) + { + const int iFO_a = pTriInfos[t].iOrgFaceNumber; + const int iFO_b = pTriInfos[t+1].iOrgFaceNumber; + if (iFO_a==iFO_b) // this is a quad + { + const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; + const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; + + // bad triangles should already have been removed by + // DegenPrologue(), but just in case check bIsDeg_a and bIsDeg_a are false + if ((bIsDeg_a||bIsDeg_b)==TFALSE) + { + const tbool bOrientA = (pTriInfos[t].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + const tbool bOrientB = (pTriInfos[t+1].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + // if this happens the quad has extremely bad mapping!! + if (bOrientA!=bOrientB) + { + //printf("found quad with bad mapping\n"); + tbool bChooseOrientFirstTri = TFALSE; + if ((pTriInfos[t+1].iFlag&GROUP_WITH_ANY)!=0) bChooseOrientFirstTri = TTRUE; + else if ( CalcTexArea(pContext, &piTriListIn[t*3+0]) >= CalcTexArea(pContext, &piTriListIn[(t+1)*3+0]) ) + bChooseOrientFirstTri = TTRUE; + + // force match + { + const int t0 = bChooseOrientFirstTri ? t : (t+1); + const int t1 = bChooseOrientFirstTri ? (t+1) : t; + pTriInfos[t1].iFlag &= (~ORIENT_PRESERVING); // clear first + pTriInfos[t1].iFlag |= (pTriInfos[t0].iFlag&ORIENT_PRESERVING); // copy bit + } + } + } + t += 2; + } + else + ++t; + } + + // match up edge pairs + { + SEdge * pEdges = (SEdge *) malloc(sizeof(SEdge)*iNrTrianglesIn*3); + if (pEdges==NULL) + BuildNeighborsSlow(pTriInfos, piTriListIn, iNrTrianglesIn); + else + { + BuildNeighborsFast(pTriInfos, pEdges, piTriListIn, iNrTrianglesIn); + + free(pEdges); + } + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////////// + +static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], const int iMyTriIndex, SGroup * pGroup); +static void AddTriToGroup(SGroup * pGroup, const int iTriIndex); + +static int Build4RuleGroups(STriInfo pTriInfos[], SGroup pGroups[], int piGroupTrianglesBuffer[], const int piTriListIn[], const int iNrTrianglesIn) +{ + const int iNrMaxGroups = iNrTrianglesIn*3; + int iNrActiveGroups = 0; + int iOffset = 0, f=0, i=0; + (void)iNrMaxGroups; /* quiet warnings in non debug mode */ + for (f=0; fiVertexRepresentitive = vert_index; + pTriInfos[f].AssignedGroup[i]->bOrientPreservering = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0; + pTriInfos[f].AssignedGroup[i]->iNrFaces = 0; + pTriInfos[f].AssignedGroup[i]->pFaceIndices = &piGroupTrianglesBuffer[iOffset]; + ++iNrActiveGroups; + + AddTriToGroup(pTriInfos[f].AssignedGroup[i], f); + bOrPre = (pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + neigh_indexL = pTriInfos[f].FaceNeighbors[i]; + neigh_indexR = pTriInfos[f].FaceNeighbors[i>0?(i-1):2]; + if (neigh_indexL>=0) // neighbor + { + const tbool bAnswer = + AssignRecur(piTriListIn, pTriInfos, neigh_indexL, + pTriInfos[f].AssignedGroup[i] ); + + const tbool bOrPre2 = (pTriInfos[neigh_indexL].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE; + assert(bAnswer || bDiff); + (void)bAnswer, (void)bDiff; /* quiet warnings in non debug mode */ + } + if (neigh_indexR>=0) // neighbor + { + const tbool bAnswer = + AssignRecur(piTriListIn, pTriInfos, neigh_indexR, + pTriInfos[f].AssignedGroup[i] ); + + const tbool bOrPre2 = (pTriInfos[neigh_indexR].iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + const tbool bDiff = bOrPre!=bOrPre2 ? TTRUE : TFALSE; + assert(bAnswer || bDiff); + (void)bAnswer, (void)bDiff; /* quiet warnings in non debug mode */ + } + + // update offset + iOffset += pTriInfos[f].AssignedGroup[i]->iNrFaces; + // since the groups are disjoint a triangle can never + // belong to more than 3 groups. Subsequently something + // is completely screwed if this assertion ever hits. + assert(iOffset <= iNrMaxGroups); + } + } + } + + return iNrActiveGroups; +} + +static void AddTriToGroup(SGroup * pGroup, const int iTriIndex) +{ + pGroup->pFaceIndices[pGroup->iNrFaces] = iTriIndex; + ++pGroup->iNrFaces; +} + +static tbool AssignRecur(const int piTriListIn[], STriInfo psTriInfos[], + const int iMyTriIndex, SGroup * pGroup) +{ + STriInfo * pMyTriInfo = &psTriInfos[iMyTriIndex]; + + // track down vertex + const int iVertRep = pGroup->iVertexRepresentitive; + const int * pVerts = &piTriListIn[3*iMyTriIndex+0]; + int i=-1; + if (pVerts[0]==iVertRep) i=0; + else if (pVerts[1]==iVertRep) i=1; + else if (pVerts[2]==iVertRep) i=2; + assert(i>=0 && i<3); + + // early out + if (pMyTriInfo->AssignedGroup[i] == pGroup) return TTRUE; + else if (pMyTriInfo->AssignedGroup[i]!=NULL) return TFALSE; + if ((pMyTriInfo->iFlag&GROUP_WITH_ANY)!=0) + { + // first to group with a group-with-anything triangle + // determines it's orientation. + // This is the only existing order dependency in the code!! + if ( pMyTriInfo->AssignedGroup[0] == NULL && + pMyTriInfo->AssignedGroup[1] == NULL && + pMyTriInfo->AssignedGroup[2] == NULL ) + { + pMyTriInfo->iFlag &= (~ORIENT_PRESERVING); + pMyTriInfo->iFlag |= (pGroup->bOrientPreservering ? ORIENT_PRESERVING : 0); + } + } + { + const tbool bOrient = (pMyTriInfo->iFlag&ORIENT_PRESERVING)!=0 ? TTRUE : TFALSE; + if (bOrient != pGroup->bOrientPreservering) return TFALSE; + } + + AddTriToGroup(pGroup, iMyTriIndex); + pMyTriInfo->AssignedGroup[i] = pGroup; + + { + const int neigh_indexL = pMyTriInfo->FaceNeighbors[i]; + const int neigh_indexR = pMyTriInfo->FaceNeighbors[i>0?(i-1):2]; + if (neigh_indexL>=0) + AssignRecur(piTriListIn, psTriInfos, neigh_indexL, pGroup); + if (neigh_indexR>=0) + AssignRecur(piTriListIn, psTriInfos, neigh_indexR, pGroup); + } + + + + return TTRUE; +} + +///////////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////////// + +static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2); +static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSeed); +static STSpace EvalTspace(int face_indices[], const int iFaces, const int piTriListIn[], const STriInfo pTriInfos[], const SMikkTSpaceContext * pContext, const int iVertexRepresentitive); + +static tbool GenerateTSpaces(STSpace psTspace[], const STriInfo pTriInfos[], const SGroup pGroups[], + const int iNrActiveGroups, const int piTriListIn[], const float fThresCos, + const SMikkTSpaceContext * pContext) +{ + STSpace * pSubGroupTspace = NULL; + SSubGroup * pUniSubGroups = NULL; + int * pTmpMembers = NULL; + int iMaxNrFaces=0, iUniqueTspaces=0, g=0, i=0; + for (g=0; giNrFaces; i++) // triangles + { + const int f = pGroup->pFaceIndices[i]; // triangle number + int index=-1, iVertIndex=-1, iOF_1=-1, iMembers=0, j=0, l=0; + SSubGroup tmp_group; + tbool bFound; + SVec3 n, vOs, vOt; + if (pTriInfos[f].AssignedGroup[0]==pGroup) index=0; + else if (pTriInfos[f].AssignedGroup[1]==pGroup) index=1; + else if (pTriInfos[f].AssignedGroup[2]==pGroup) index=2; + assert(index>=0 && index<3); + + iVertIndex = piTriListIn[f*3+index]; + assert(iVertIndex==pGroup->iVertexRepresentitive); + + // is normalized already + n = GetNormal(pContext, iVertIndex); + + // project + vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n)); + vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n)); + if ( VNotZero(vOs) ) vOs = Normalize(vOs); + if ( VNotZero(vOt) ) vOt = Normalize(vOt); + + // original face number + iOF_1 = pTriInfos[f].iOrgFaceNumber; + + iMembers = 0; + for (j=0; jiNrFaces; j++) + { + const int t = pGroup->pFaceIndices[j]; // triangle number + const int iOF_2 = pTriInfos[t].iOrgFaceNumber; + + // project + SVec3 vOs2 = vsub(pTriInfos[t].vOs, vscale(vdot(n,pTriInfos[t].vOs), n)); + SVec3 vOt2 = vsub(pTriInfos[t].vOt, vscale(vdot(n,pTriInfos[t].vOt), n)); + if ( VNotZero(vOs2) ) vOs2 = Normalize(vOs2); + if ( VNotZero(vOt2) ) vOt2 = Normalize(vOt2); + + { + const tbool bAny = ( (pTriInfos[f].iFlag | pTriInfos[t].iFlag) & GROUP_WITH_ANY )!=0 ? TTRUE : TFALSE; + // make sure triangles which belong to the same quad are joined. + const tbool bSameOrgFace = iOF_1==iOF_2 ? TTRUE : TFALSE; + + const float fCosS = vdot(vOs,vOs2); + const float fCosT = vdot(vOt,vOt2); + + assert(f!=t || bSameOrgFace); // sanity check + if (bAny || bSameOrgFace || (fCosS>fThresCos && fCosT>fThresCos)) + pTmpMembers[iMembers++] = t; + } + } + + // sort pTmpMembers + tmp_group.iNrFaces = iMembers; + tmp_group.pTriMembers = pTmpMembers; + if (iMembers>1) + { + unsigned int uSeed = INTERNAL_RND_SORT_SEED; // could replace with a random seed? + QuickSort(pTmpMembers, 0, iMembers-1, uSeed); + } + + // look for an existing match + bFound = TFALSE; + l=0; + while (liVertexRepresentitive); + ++iUniqueSubGroups; + } + + // output tspace + { + const int iOffs = pTriInfos[f].iTSpacesOffs; + const int iVert = pTriInfos[f].vert_num[index]; + STSpace * pTS_out = &psTspace[iOffs+iVert]; + assert(pTS_out->iCounter<2); + assert(((pTriInfos[f].iFlag&ORIENT_PRESERVING)!=0) == pGroup->bOrientPreservering); + if (pTS_out->iCounter==1) + { + *pTS_out = AvgTSpace(pTS_out, &pSubGroupTspace[l]); + pTS_out->iCounter = 2; // update counter + pTS_out->bOrient = pGroup->bOrientPreservering; + } + else + { + assert(pTS_out->iCounter==0); + *pTS_out = pSubGroupTspace[l]; + pTS_out->iCounter = 1; // update counter + pTS_out->bOrient = pGroup->bOrientPreservering; + } + } + } + + // clean up and offset iUniqueTspaces + for (s=0; s=0 && i<3); + + // project + index = piTriListIn[3*f+i]; + n = GetNormal(pContext, index); + vOs = vsub(pTriInfos[f].vOs, vscale(vdot(n,pTriInfos[f].vOs), n)); + vOt = vsub(pTriInfos[f].vOt, vscale(vdot(n,pTriInfos[f].vOt), n)); + if ( VNotZero(vOs) ) vOs = Normalize(vOs); + if ( VNotZero(vOt) ) vOt = Normalize(vOt); + + i2 = piTriListIn[3*f + (i<2?(i+1):0)]; + i1 = piTriListIn[3*f + i]; + i0 = piTriListIn[3*f + (i>0?(i-1):2)]; + + p0 = GetPosition(pContext, i0); + p1 = GetPosition(pContext, i1); + p2 = GetPosition(pContext, i2); + v1 = vsub(p0,p1); + v2 = vsub(p2,p1); + + // project + v1 = vsub(v1, vscale(vdot(n,v1),n)); if ( VNotZero(v1) ) v1 = Normalize(v1); + v2 = vsub(v2, vscale(vdot(n,v2),n)); if ( VNotZero(v2) ) v2 = Normalize(v2); + + // weight contribution by the angle + // between the two edge vectors + fCos = vdot(v1,v2); fCos=fCos>1?1:(fCos<(-1) ? (-1) : fCos); + fAngle = (float) acos(fCos); + fMagS = pTriInfos[f].fMagS; + fMagT = pTriInfos[f].fMagT; + + res.vOs=vadd(res.vOs, vscale(fAngle,vOs)); + res.vOt=vadd(res.vOt,vscale(fAngle,vOt)); + res.fMagS+=(fAngle*fMagS); + res.fMagT+=(fAngle*fMagT); + fAngleSum += fAngle; + } + } + + // normalize + if ( VNotZero(res.vOs) ) res.vOs = Normalize(res.vOs); + if ( VNotZero(res.vOt) ) res.vOt = Normalize(res.vOt); + if (fAngleSum>0) + { + res.fMagS /= fAngleSum; + res.fMagT /= fAngleSum; + } + + return res; +} + +static tbool CompareSubGroups(const SSubGroup * pg1, const SSubGroup * pg2) +{ + tbool bStillSame=TTRUE; + int i=0; + if (pg1->iNrFaces!=pg2->iNrFaces) return TFALSE; + while (iiNrFaces && bStillSame) + { + bStillSame = pg1->pTriMembers[i]==pg2->pTriMembers[i] ? TTRUE : TFALSE; + if (bStillSame) ++i; + } + return bStillSame; +} + +static void QuickSort(int* pSortBuffer, int iLeft, int iRight, unsigned int uSeed) +{ + int iL, iR, n, index, iMid, iTmp; + + // Random + unsigned int t=uSeed&31; + t=(uSeed<>(32-t)); + uSeed=uSeed+t+3; + // Random end + + iL=iLeft; iR=iRight; + n = (iR-iL)+1; + assert(n>=0); + index = (int) (uSeed%n); + + iMid=pSortBuffer[index + iL]; + + + do + { + while (pSortBuffer[iL] < iMid) + ++iL; + while (pSortBuffer[iR] > iMid) + --iR; + + if (iL <= iR) + { + iTmp = pSortBuffer[iL]; + pSortBuffer[iL] = pSortBuffer[iR]; + pSortBuffer[iR] = iTmp; + ++iL; --iR; + } + } + while (iL <= iR); + + if (iLeft < iR) + QuickSort(pSortBuffer, iLeft, iR, uSeed); + if (iL < iRight) + QuickSort(pSortBuffer, iL, iRight, uSeed); +} + +///////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////// + +static void QuickSortEdges(SEdge * pSortBuffer, int iLeft, int iRight, const int channel, unsigned int uSeed); +static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int indices[], const int i0_in, const int i1_in); + +static void BuildNeighborsFast(STriInfo pTriInfos[], SEdge * pEdges, const int piTriListIn[], const int iNrTrianglesIn) +{ + // build array of edges + unsigned int uSeed = INTERNAL_RND_SORT_SEED; // could replace with a random seed? + int iEntries=0, iCurStartIndex=-1, f=0, i=0; + for (f=0; f pSortBuffer[iRight].array[channel]) + { + sTmp = pSortBuffer[iLeft]; + pSortBuffer[iLeft] = pSortBuffer[iRight]; + pSortBuffer[iRight] = sTmp; + } + return; + } + + // Random + t=uSeed&31; + t=(uSeed<>(32-t)); + uSeed=uSeed+t+3; + // Random end + + iL=iLeft, iR=iRight; + n = (iR-iL)+1; + assert(n>=0); + index = (int) (uSeed%n); + + iMid=pSortBuffer[index + iL].array[channel]; + + do + { + while (pSortBuffer[iL].array[channel] < iMid) + ++iL; + while (pSortBuffer[iR].array[channel] > iMid) + --iR; + + if (iL <= iR) + { + sTmp = pSortBuffer[iL]; + pSortBuffer[iL] = pSortBuffer[iR]; + pSortBuffer[iR] = sTmp; + ++iL; --iR; + } + } + while (iL <= iR); + + if (iLeft < iR) + QuickSortEdges(pSortBuffer, iLeft, iR, channel, uSeed); + if (iL < iRight) + QuickSortEdges(pSortBuffer, iL, iRight, channel, uSeed); +} + +// resolve ordering and edge number +static void GetEdge(int * i0_out, int * i1_out, int * edgenum_out, const int indices[], const int i0_in, const int i1_in) +{ + *edgenum_out = -1; + + // test if first index is on the edge + if (indices[0]==i0_in || indices[0]==i1_in) + { + // test if second index is on the edge + if (indices[1]==i0_in || indices[1]==i1_in) + { + edgenum_out[0]=0; // first edge + i0_out[0]=indices[0]; + i1_out[0]=indices[1]; + } + else + { + edgenum_out[0]=2; // third edge + i0_out[0]=indices[2]; + i1_out[0]=indices[0]; + } + } + else + { + // only second and third index is on the edge + edgenum_out[0]=1; // second edge + i0_out[0]=indices[1]; + i1_out[0]=indices[2]; + } +} + + +///////////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////// Degenerate triangles //////////////////////////////////// + +static void DegenPrologue(STriInfo pTriInfos[], int piTriList_out[], const int iNrTrianglesIn, const int iTotTris) +{ + int iNextGoodTriangleSearchIndex=-1; + tbool bStillFindingGoodOnes; + + // locate quads with only one good triangle + int t=0; + while (t<(iTotTris-1)) + { + const int iFO_a = pTriInfos[t].iOrgFaceNumber; + const int iFO_b = pTriInfos[t+1].iOrgFaceNumber; + if (iFO_a==iFO_b) // this is a quad + { + const tbool bIsDeg_a = (pTriInfos[t].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; + const tbool bIsDeg_b = (pTriInfos[t+1].iFlag&MARK_DEGENERATE)!=0 ? TTRUE : TFALSE; + if ((bIsDeg_a^bIsDeg_b)!=0) + { + pTriInfos[t].iFlag |= QUAD_ONE_DEGEN_TRI; + pTriInfos[t+1].iFlag |= QUAD_ONE_DEGEN_TRI; + } + t += 2; + } + else + ++t; + } + + // reorder list so all degen triangles are moved to the back + // without reordering the good triangles + iNextGoodTriangleSearchIndex = 1; + t=0; + bStillFindingGoodOnes = TTRUE; + while (t (t+1)); + + // swap triangle t0 and t1 + if (!bJustADegenerate) + { + int i=0; + for (i=0; i<3; i++) + { + const int index = piTriList_out[t0*3+i]; + piTriList_out[t0*3+i] = piTriList_out[t1*3+i]; + piTriList_out[t1*3+i] = index; + } + { + const STriInfo tri_info = pTriInfos[t0]; + pTriInfos[t0] = pTriInfos[t1]; + pTriInfos[t1] = tri_info; + } + } + else + bStillFindingGoodOnes = TFALSE; // this is not supposed to happen + } + + if (bStillFindingGoodOnes) ++t; + } + + assert(bStillFindingGoodOnes); // code will still work. + assert(iNrTrianglesIn == t); +} + +static void DegenEpilogue(STSpace psTspace[], STriInfo pTriInfos[], int piTriListIn[], const SMikkTSpaceContext * pContext, const int iNrTrianglesIn, const int iTotTris) +{ + int t=0, i=0; + // deal with degenerate triangles + // punishment for degenerate triangles is O(N^2) + for (t=iNrTrianglesIn; t http://image.diku.dk/projects/media/morten.mikkelsen.08.pdf + * Note that though the tangent spaces at the vertices are generated in an order-independent way, + * by this implementation, the interpolated tangent space is still affected by which diagonal is + * chosen to split each quad. A sensible solution is to have your tools pipeline always + * split quads by the shortest diagonal. This choice is order-independent and works with mirroring. + * If these have the same length then compare the diagonals defined by the texture coordinates. + * XNormal which is a tool for baking normal maps allows you to write your own tangent space plugin + * and also quad triangulator plugin. + */ + + +typedef int tbool; +typedef struct SMikkTSpaceContext SMikkTSpaceContext; + +typedef struct { + // Returns the number of faces (triangles/quads) on the mesh to be processed. + int (*m_getNumFaces)(const SMikkTSpaceContext * pContext); + + // Returns the number of vertices on face number iFace + // iFace is a number in the range {0, 1, ..., getNumFaces()-1} + int (*m_getNumVerticesOfFace)(const SMikkTSpaceContext * pContext, const int iFace); + + // returns the position/normal/texcoord of the referenced face of vertex number iVert. + // iVert is in the range {0,1,2} for triangles and {0,1,2,3} for quads. + void (*m_getPosition)(const SMikkTSpaceContext * pContext, float fvPosOut[], const int iFace, const int iVert); + void (*m_getNormal)(const SMikkTSpaceContext * pContext, float fvNormOut[], const int iFace, const int iVert); + void (*m_getTexCoord)(const SMikkTSpaceContext * pContext, float fvTexcOut[], const int iFace, const int iVert); + + // either (or both) of the two setTSpace callbacks can be set. + // The call-back m_setTSpaceBasic() is sufficient for basic normal mapping. + + // This function is used to return the tangent and fSign to the application. + // fvTangent is a unit length vector. + // For normal maps it is sufficient to use the following simplified version of the bitangent which is generated at pixel/vertex level. + // bitangent = fSign * cross(vN, tangent); + // Note that the results are returned unindexed. It is possible to generate a new index list + // But averaging/overwriting tangent spaces by using an already existing index list WILL produce INCRORRECT results. + // DO NOT! use an already existing index list. + void (*m_setTSpaceBasic)(const SMikkTSpaceContext * pContext, const float fvTangent[], const float fSign, const int iFace, const int iVert); + + // This function is used to return tangent space results to the application. + // fvTangent and fvBiTangent are unit length vectors and fMagS and fMagT are their + // true magnitudes which can be used for relief mapping effects. + // fvBiTangent is the "real" bitangent and thus may not be perpendicular to fvTangent. + // However, both are perpendicular to the vertex normal. + // For normal maps it is sufficient to use the following simplified version of the bitangent which is generated at pixel/vertex level. + // fSign = bIsOrientationPreserving ? 1.0f : (-1.0f); + // bitangent = fSign * cross(vN, tangent); + // Note that the results are returned unindexed. It is possible to generate a new index list + // But averaging/overwriting tangent spaces by using an already existing index list WILL produce INCRORRECT results. + // DO NOT! use an already existing index list. + void (*m_setTSpace)(const SMikkTSpaceContext * pContext, const float fvTangent[], const float fvBiTangent[], const float fMagS, const float fMagT, + const tbool bIsOrientationPreserving, const int iFace, const int iVert); +} SMikkTSpaceInterface; + +struct SMikkTSpaceContext +{ + SMikkTSpaceInterface * m_pInterface; // initialized with callback functions + void * m_pUserData; // pointer to client side mesh data etc. (passed as the first parameter with every interface call) +}; + +// these are both thread safe! +tbool genTangSpaceDefault(const SMikkTSpaceContext * pContext); // Default (recommended) fAngularThreshold is 180 degrees (which means threshold disabled) +tbool genTangSpace(const SMikkTSpaceContext * pContext, const float fAngularThreshold); + + +// To avoid visual errors (distortions/unwanted hard edges in lighting), when using sampled normal maps, the +// normal map sampler must use the exact inverse of the pixel shader transformation. +// The most efficient transformation we can possibly do in the pixel shader is +// achieved by using, directly, the "unnormalized" interpolated tangent, bitangent and vertex normal: vT, vB and vN. +// pixel shader (fast transform out) +// vNout = normalize( vNt.x * vT + vNt.y * vB + vNt.z * vN ); +// where vNt is the tangent space normal. The normal map sampler must likewise use the +// interpolated and "unnormalized" tangent, bitangent and vertex normal to be compliant with the pixel shader. +// sampler does (exact inverse of pixel shader): +// float3 row0 = cross(vB, vN); +// float3 row1 = cross(vN, vT); +// float3 row2 = cross(vT, vB); +// float fSign = dot(vT, row0)<0 ? -1 : 1; +// vNt = normalize( fSign * float3(dot(vNout,row0), dot(vNout,row1), dot(vNout,row2)) ); +// where vNout is the sampled normal in some chosen 3D space. +// +// Should you choose to reconstruct the bitangent in the pixel shader instead +// of the vertex shader, as explained earlier, then be sure to do this in the normal map sampler also. +// Finally, beware of quad triangulations. If the normal map sampler doesn't use the same triangulation of +// quads as your renderer then problems will occur since the interpolated tangent spaces will differ +// eventhough the vertex level tangent spaces match. This can be solved either by triangulating before +// sampling/exporting or by using the order-independent choice of diagonal for splitting quads suggested earlier. +// However, this must be used both by the sampler and your tools/rendering pipeline. + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/thirdparty/misc/stb_truetype.h b/thirdparty/misc/stb_truetype.h new file mode 100644 index 0000000000..016972785a --- /dev/null +++ b/thirdparty/misc/stb_truetype.h @@ -0,0 +1,3267 @@ +// stb_truetype.h - v1.11 - public domain +// authored from 2009-2015 by Sean Barrett / RAD Game Tools +// +// This library processes TrueType files: +// parse files +// extract glyph metrics +// extract glyph shapes +// render glyphs to one-channel bitmaps with antialiasing (box filter) +// +// Todo: +// non-MS cmaps +// crashproof on bad data +// hinting? (no longer patented) +// cleartype-style AA? +// optimize: use simple memory allocator for intermediates +// optimize: build edge-list directly from curves +// optimize: rasterize directly from curves? +// +// ADDITIONAL CONTRIBUTORS +// +// Mikko Mononen: compound shape support, more cmap formats +// Tor Andersson: kerning, subpixel rendering +// +// Misc other: +// Ryan Gordon +// Simon Glass +// +// Bug/warning reports/fixes: +// "Zer" on mollyrocket (with fix) +// Cass Everitt +// stoiko (Haemimont Games) +// Brian Hook +// Walter van Niftrik +// David Gow +// David Given +// Ivan-Assen Ivanov +// Anthony Pesch +// Johan Duparc +// Hou Qiming +// Fabian "ryg" Giesen +// Martins Mozeiko +// Cap Petschulat +// Omar Cornut +// github:aloucks +// Peter LaValle +// Sergey Popov +// Giumo X. Clanjor +// Higor Euripedes +// Thomas Fields +// Derek Vinyard +// +// VERSION HISTORY +// +// 1.11 (2016-04-02) fix unused-variable warning +// 1.10 (2016-04-02) user-defined fabs(); rare memory leak; remove duplicate typedef +// 1.09 (2016-01-16) warning fix; avoid crash on outofmem; use allocation userdata properly +// 1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges +// 1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints; +// variant PackFontRanges to pack and render in separate phases; +// fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?); +// fixed an assert() bug in the new rasterizer +// replace assert() with STBTT_assert() in new rasterizer +// 1.06 (2015-07-14) performance improvements (~35% faster on x86 and x64 on test machine) +// also more precise AA rasterizer, except if shapes overlap +// remove need for STBTT_sort +// 1.05 (2015-04-15) fix misplaced definitions for STBTT_STATIC +// 1.04 (2015-04-15) typo in example +// 1.03 (2015-04-12) STBTT_STATIC, fix memory leak in new packing, various fixes +// +// Full history can be found at the end of this file. +// +// LICENSE +// +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// USAGE +// +// Include this file in whatever places neeed to refer to it. In ONE C/C++ +// file, write: +// #define STB_TRUETYPE_IMPLEMENTATION +// before the #include of this file. This expands out the actual +// implementation into that C/C++ file. +// +// To make the implementation private to the file that generates the implementation, +// #define STBTT_STATIC +// +// Simple 3D API (don't ship this, but it's fine for tools and quick start) +// stbtt_BakeFontBitmap() -- bake a font to a bitmap for use as texture +// stbtt_GetBakedQuad() -- compute quad to draw for a given char +// +// Improved 3D API (more shippable): +// #include "stb_rect_pack.h" -- optional, but you really want it +// stbtt_PackBegin() +// stbtt_PackSetOversample() -- for improved quality on small fonts +// stbtt_PackFontRanges() -- pack and renders +// stbtt_PackEnd() +// stbtt_GetPackedQuad() +// +// "Load" a font file from a memory buffer (you have to keep the buffer loaded) +// stbtt_InitFont() +// stbtt_GetFontOffsetForIndex() -- use for TTC font collections +// +// Render a unicode codepoint to a bitmap +// stbtt_GetCodepointBitmap() -- allocates and returns a bitmap +// stbtt_MakeCodepointBitmap() -- renders into bitmap you provide +// stbtt_GetCodepointBitmapBox() -- how big the bitmap must be +// +// Character advance/positioning +// stbtt_GetCodepointHMetrics() +// stbtt_GetFontVMetrics() +// stbtt_GetCodepointKernAdvance() +// +// Starting with version 1.06, the rasterizer was replaced with a new, +// faster and generally-more-precise rasterizer. The new rasterizer more +// accurately measures pixel coverage for anti-aliasing, except in the case +// where multiple shapes overlap, in which case it overestimates the AA pixel +// coverage. Thus, anti-aliasing of intersecting shapes may look wrong. If +// this turns out to be a problem, you can re-enable the old rasterizer with +// #define STBTT_RASTERIZER_VERSION 1 +// which will incur about a 15% speed hit. +// +// ADDITIONAL DOCUMENTATION +// +// Immediately after this block comment are a series of sample programs. +// +// After the sample programs is the "header file" section. This section +// includes documentation for each API function. +// +// Some important concepts to understand to use this library: +// +// Codepoint +// Characters are defined by unicode codepoints, e.g. 65 is +// uppercase A, 231 is lowercase c with a cedilla, 0x7e30 is +// the hiragana for "ma". +// +// Glyph +// A visual character shape (every codepoint is rendered as +// some glyph) +// +// Glyph index +// A font-specific integer ID representing a glyph +// +// Baseline +// Glyph shapes are defined relative to a baseline, which is the +// bottom of uppercase characters. Characters extend both above +// and below the baseline. +// +// Current Point +// As you draw text to the screen, you keep track of a "current point" +// which is the origin of each character. The current point's vertical +// position is the baseline. Even "baked fonts" use this model. +// +// Vertical Font Metrics +// The vertical qualities of the font, used to vertically position +// and space the characters. See docs for stbtt_GetFontVMetrics. +// +// Font Size in Pixels or Points +// The preferred interface for specifying font sizes in stb_truetype +// is to specify how tall the font's vertical extent should be in pixels. +// If that sounds good enough, skip the next paragraph. +// +// Most font APIs instead use "points", which are a common typographic +// measurement for describing font size, defined as 72 points per inch. +// stb_truetype provides a point API for compatibility. However, true +// "per inch" conventions don't make much sense on computer displays +// since they different monitors have different number of pixels per +// inch. For example, Windows traditionally uses a convention that +// there are 96 pixels per inch, thus making 'inch' measurements have +// nothing to do with inches, and thus effectively defining a point to +// be 1.333 pixels. Additionally, the TrueType font data provides +// an explicit scale factor to scale a given font's glyphs to points, +// but the author has observed that this scale factor is often wrong +// for non-commercial fonts, thus making fonts scaled in points +// according to the TrueType spec incoherently sized in practice. +// +// ADVANCED USAGE +// +// Quality: +// +// - Use the functions with Subpixel at the end to allow your characters +// to have subpixel positioning. Since the font is anti-aliased, not +// hinted, this is very import for quality. (This is not possible with +// baked fonts.) +// +// - Kerning is now supported, and if you're supporting subpixel rendering +// then kerning is worth using to give your text a polished look. +// +// Performance: +// +// - Convert Unicode codepoints to glyph indexes and operate on the glyphs; +// if you don't do this, stb_truetype is forced to do the conversion on +// every call. +// +// - There are a lot of memory allocations. We should modify it to take +// a temp buffer and allocate from the temp buffer (without freeing), +// should help performance a lot. +// +// NOTES +// +// The system uses the raw data found in the .ttf file without changing it +// and without building auxiliary data structures. This is a bit inefficient +// on little-endian systems (the data is big-endian), but assuming you're +// caching the bitmaps or glyph shapes this shouldn't be a big deal. +// +// It appears to be very hard to programmatically determine what font a +// given file is in a general way. I provide an API for this, but I don't +// recommend it. +// +// +// SOURCE STATISTICS (based on v0.6c, 2050 LOC) +// +// Documentation & header file 520 LOC \___ 660 LOC documentation +// Sample code 140 LOC / +// Truetype parsing 620 LOC ---- 620 LOC TrueType +// Software rasterization 240 LOC \ . +// Curve tesselation 120 LOC \__ 550 LOC Bitmap creation +// Bitmap management 100 LOC / +// Baked bitmap interface 70 LOC / +// Font name matching & access 150 LOC ---- 150 +// C runtime library abstraction 60 LOC ---- 60 +// +// +// PERFORMANCE MEASUREMENTS FOR 1.06: +// +// 32-bit 64-bit +// Previous release: 8.83 s 7.68 s +// Pool allocations: 7.72 s 6.34 s +// Inline sort : 6.54 s 5.65 s +// New rasterizer : 5.63 s 5.00 s + +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +//// +//// SAMPLE PROGRAMS +//// +// +// Incomplete text-in-3d-api example, which draws quads properly aligned to be lossless +// +#if 0 +#define STB_TRUETYPE_IMPLEMENTATION // force following include to generate implementation +#include "stb_truetype.h" + +unsigned char ttf_buffer[1<<20]; +unsigned char temp_bitmap[512*512]; + +stbtt_bakedchar cdata[96]; // ASCII 32..126 is 95 glyphs +GLuint ftex; + +void my_stbtt_initfont(void) +{ + fread(ttf_buffer, 1, 1<<20, fopen("c:/windows/fonts/times.ttf", "rb")); + stbtt_BakeFontBitmap(ttf_buffer,0, 32.0, temp_bitmap,512,512, 32,96, cdata); // no guarantee this fits! + // can free ttf_buffer at this point + glGenTextures(1, &ftex); + glBindTexture(GL_TEXTURE_2D, ftex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, 512,512, 0, GL_ALPHA, GL_UNSIGNED_BYTE, temp_bitmap); + // can free temp_bitmap at this point + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); +} + +void my_stbtt_print(float x, float y, char *text) +{ + // assume orthographic projection with units = screen pixels, origin at top left + glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, ftex); + glBegin(GL_QUADS); + while (*text) { + if (*text >= 32 && *text < 128) { + stbtt_aligned_quad q; + stbtt_GetBakedQuad(cdata, 512,512, *text-32, &x,&y,&q,1);//1=opengl & d3d10+,0=d3d9 + glTexCoord2f(q.s0,q.t1); glVertex2f(q.x0,q.y0); + glTexCoord2f(q.s1,q.t1); glVertex2f(q.x1,q.y0); + glTexCoord2f(q.s1,q.t0); glVertex2f(q.x1,q.y1); + glTexCoord2f(q.s0,q.t0); glVertex2f(q.x0,q.y1); + } + ++text; + } + glEnd(); +} +#endif +// +// +////////////////////////////////////////////////////////////////////////////// +// +// Complete program (this compiles): get a single bitmap, print as ASCII art +// +#if 0 +#include +#define STB_TRUETYPE_IMPLEMENTATION // force following include to generate implementation +#include "stb_truetype.h" + +char ttf_buffer[1<<25]; + +int main(int argc, char **argv) +{ + stbtt_fontinfo font; + unsigned char *bitmap; + int w,h,i,j,c = (argc > 1 ? atoi(argv[1]) : 'a'), s = (argc > 2 ? atoi(argv[2]) : 20); + + fread(ttf_buffer, 1, 1<<25, fopen(argc > 3 ? argv[3] : "c:/windows/fonts/arialbd.ttf", "rb")); + + stbtt_InitFont(&font, ttf_buffer, stbtt_GetFontOffsetForIndex(ttf_buffer,0)); + bitmap = stbtt_GetCodepointBitmap(&font, 0,stbtt_ScaleForPixelHeight(&font, s), c, &w, &h, 0,0); + + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) + putchar(" .:ioVM@"[bitmap[j*w+i]>>5]); + putchar('\n'); + } + return 0; +} +#endif +// +// Output: +// +// .ii. +// @@@@@@. +// V@Mio@@o +// :i. V@V +// :oM@@M +// :@@@MM@M +// @@o o@M +// :@@. M@M +// @@@o@@@@ +// :M@@V:@@. +// +////////////////////////////////////////////////////////////////////////////// +// +// Complete program: print "Hello World!" banner, with bugs +// +#if 0 +char buffer[24<<20]; +unsigned char screen[20][79]; + +int main(int arg, char **argv) +{ + stbtt_fontinfo font; + int i,j,ascent,baseline,ch=0; + float scale, xpos=2; // leave a little padding in case the character extends left + char *text = "Heljo World!"; // intentionally misspelled to show 'lj' brokenness + + fread(buffer, 1, 1000000, fopen("c:/windows/fonts/arialbd.ttf", "rb")); + stbtt_InitFont(&font, buffer, 0); + + scale = stbtt_ScaleForPixelHeight(&font, 15); + stbtt_GetFontVMetrics(&font, &ascent,0,0); + baseline = (int) (ascent*scale); + + while (text[ch]) { + int advance,lsb,x0,y0,x1,y1; + float x_shift = xpos - (float) floor(xpos); + stbtt_GetCodepointHMetrics(&font, text[ch], &advance, &lsb); + stbtt_GetCodepointBitmapBoxSubpixel(&font, text[ch], scale,scale,x_shift,0, &x0,&y0,&x1,&y1); + stbtt_MakeCodepointBitmapSubpixel(&font, &screen[baseline + y0][(int) xpos + x0], x1-x0,y1-y0, 79, scale,scale,x_shift,0, text[ch]); + // note that this stomps the old data, so where character boxes overlap (e.g. 'lj') it's wrong + // because this API is really for baking character bitmaps into textures. if you want to render + // a sequence of characters, you really need to render each bitmap to a temp buffer, then + // "alpha blend" that into the working buffer + xpos += (advance * scale); + if (text[ch+1]) + xpos += scale*stbtt_GetCodepointKernAdvance(&font, text[ch],text[ch+1]); + ++ch; + } + + for (j=0; j < 20; ++j) { + for (i=0; i < 78; ++i) + putchar(" .:ioVM@"[screen[j][i]>>5]); + putchar('\n'); + } + + return 0; +} +#endif + + +////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////// +//// +//// INTEGRATION WITH YOUR CODEBASE +//// +//// The following sections allow you to supply alternate definitions +//// of C library functions used by stb_truetype. + +#ifdef STB_TRUETYPE_IMPLEMENTATION + // #define your own (u)stbtt_int8/16/32 before including to override this + #ifndef stbtt_uint8 + typedef unsigned char stbtt_uint8; + typedef signed char stbtt_int8; + typedef unsigned short stbtt_uint16; + typedef signed short stbtt_int16; + typedef unsigned int stbtt_uint32; + typedef signed int stbtt_int32; + #endif + + typedef char stbtt__check_size32[sizeof(stbtt_int32)==4 ? 1 : -1]; + typedef char stbtt__check_size16[sizeof(stbtt_int16)==2 ? 1 : -1]; + + // #define your own STBTT_ifloor/STBTT_iceil() to avoid math.h + #ifndef STBTT_ifloor + #include + #define STBTT_ifloor(x) ((int) floor(x)) + #define STBTT_iceil(x) ((int) ceil(x)) + #endif + + #ifndef STBTT_sqrt + #include + #define STBTT_sqrt(x) sqrt(x) + #endif + + #ifndef STBTT_fabs + #include + #define STBTT_fabs(x) fabs(x) + #endif + + // #define your own functions "STBTT_malloc" / "STBTT_free" to avoid malloc.h + #ifndef STBTT_malloc + #include + #define STBTT_malloc(x,u) ((void)(u),malloc(x)) + #define STBTT_free(x,u) ((void)(u),free(x)) + #endif + + #ifndef STBTT_assert + #include + #define STBTT_assert(x) assert(x) + #endif + + #ifndef STBTT_strlen + #include + #define STBTT_strlen(x) strlen(x) + #endif + + #ifndef STBTT_memcpy + #include + #define STBTT_memcpy memcpy + #define STBTT_memset memset + #endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +//// +//// INTERFACE +//// +//// + +#ifndef __STB_INCLUDE_STB_TRUETYPE_H__ +#define __STB_INCLUDE_STB_TRUETYPE_H__ + +#ifdef STBTT_STATIC +#define STBTT_DEF static +#else +#define STBTT_DEF extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// TEXTURE BAKING API +// +// If you use this API, you only have to call two functions ever. +// + +typedef struct +{ + unsigned short x0,y0,x1,y1; // coordinates of bbox in bitmap + float xoff,yoff,xadvance; +} stbtt_bakedchar; + +STBTT_DEF int stbtt_BakeFontBitmap(const unsigned char *data, int offset, // font location (use offset=0 for plain .ttf) + float pixel_height, // height of font in pixels + unsigned char *pixels, int pw, int ph, // bitmap to be filled in + int first_char, int num_chars, // characters to bake + stbtt_bakedchar *chardata); // you allocate this, it's num_chars long +// if return is positive, the first unused row of the bitmap +// if return is negative, returns the negative of the number of characters that fit +// if return is 0, no characters fit and no rows were used +// This uses a very crappy packing. + +typedef struct +{ + float x0,y0,s0,t0; // top-left + float x1,y1,s1,t1; // bottom-right +} stbtt_aligned_quad; + +STBTT_DEF void stbtt_GetBakedQuad(stbtt_bakedchar *chardata, int pw, int ph, // same data as above + int char_index, // character to display + float *xpos, float *ypos, // pointers to current position in screen pixel space + stbtt_aligned_quad *q, // output: quad to draw + int opengl_fillrule); // true if opengl fill rule; false if DX9 or earlier +// Call GetBakedQuad with char_index = 'character - first_char', and it +// creates the quad you need to draw and advances the current position. +// +// The coordinate system used assumes y increases downwards. +// +// Characters will extend both above and below the current position; +// see discussion of "BASELINE" above. +// +// It's inefficient; you might want to c&p it and optimize it. + + + +////////////////////////////////////////////////////////////////////////////// +// +// NEW TEXTURE BAKING API +// +// This provides options for packing multiple fonts into one atlas, not +// perfectly but better than nothing. + +typedef struct +{ + unsigned short x0,y0,x1,y1; // coordinates of bbox in bitmap + float xoff,yoff,xadvance; + float xoff2,yoff2; +} stbtt_packedchar; + +typedef struct stbtt_pack_context stbtt_pack_context; +typedef struct stbtt_fontinfo stbtt_fontinfo; +#ifndef STB_RECT_PACK_VERSION +typedef struct stbrp_rect stbrp_rect; +#endif + +STBTT_DEF int stbtt_PackBegin(stbtt_pack_context *spc, unsigned char *pixels, int width, int height, int stride_in_bytes, int padding, void *alloc_context); +// Initializes a packing context stored in the passed-in stbtt_pack_context. +// Future calls using this context will pack characters into the bitmap passed +// in here: a 1-channel bitmap that is weight x height. stride_in_bytes is +// the distance from one row to the next (or 0 to mean they are packed tightly +// together). "padding" is the amount of padding to leave between each +// character (normally you want '1' for bitmaps you'll use as textures with +// bilinear filtering). +// +// Returns 0 on failure, 1 on success. + +STBTT_DEF void stbtt_PackEnd (stbtt_pack_context *spc); +// Cleans up the packing context and frees all memory. + +#define STBTT_POINT_SIZE(x) (-(x)) + +STBTT_DEF int stbtt_PackFontRange(stbtt_pack_context *spc, unsigned char *fontdata, int font_index, float font_size, + int first_unicode_char_in_range, int num_chars_in_range, stbtt_packedchar *chardata_for_range); +// Creates character bitmaps from the font_index'th font found in fontdata (use +// font_index=0 if you don't know what that is). It creates num_chars_in_range +// bitmaps for characters with unicode values starting at first_unicode_char_in_range +// and increasing. Data for how to render them is stored in chardata_for_range; +// pass these to stbtt_GetPackedQuad to get back renderable quads. +// +// font_size is the full height of the character from ascender to descender, +// as computed by stbtt_ScaleForPixelHeight. To use a point size as computed +// by stbtt_ScaleForMappingEmToPixels, wrap the point size in STBTT_POINT_SIZE() +// and pass that result as 'font_size': +// ..., 20 , ... // font max minus min y is 20 pixels tall +// ..., STBTT_POINT_SIZE(20), ... // 'M' is 20 pixels tall + +typedef struct +{ + float font_size; + int first_unicode_codepoint_in_range; // if non-zero, then the chars are continuous, and this is the first codepoint + int *array_of_unicode_codepoints; // if non-zero, then this is an array of unicode codepoints + int num_chars; + stbtt_packedchar *chardata_for_range; // output + unsigned char h_oversample, v_oversample; // don't set these, they're used internally +} stbtt_pack_range; + +STBTT_DEF int stbtt_PackFontRanges(stbtt_pack_context *spc, unsigned char *fontdata, int font_index, stbtt_pack_range *ranges, int num_ranges); +// Creates character bitmaps from multiple ranges of characters stored in +// ranges. This will usually create a better-packed bitmap than multiple +// calls to stbtt_PackFontRange. Note that you can call this multiple +// times within a single PackBegin/PackEnd. + +STBTT_DEF void stbtt_PackSetOversampling(stbtt_pack_context *spc, unsigned int h_oversample, unsigned int v_oversample); +// Oversampling a font increases the quality by allowing higher-quality subpixel +// positioning, and is especially valuable at smaller text sizes. +// +// This function sets the amount of oversampling for all following calls to +// stbtt_PackFontRange(s) or stbtt_PackFontRangesGatherRects for a given +// pack context. The default (no oversampling) is achieved by h_oversample=1 +// and v_oversample=1. The total number of pixels required is +// h_oversample*v_oversample larger than the default; for example, 2x2 +// oversampling requires 4x the storage of 1x1. For best results, render +// oversampled textures with bilinear filtering. Look at the readme in +// stb/tests/oversample for information about oversampled fonts +// +// To use with PackFontRangesGather etc., you must set it before calls +// call to PackFontRangesGatherRects. + +STBTT_DEF void stbtt_GetPackedQuad(stbtt_packedchar *chardata, int pw, int ph, // same data as above + int char_index, // character to display + float *xpos, float *ypos, // pointers to current position in screen pixel space + stbtt_aligned_quad *q, // output: quad to draw + int align_to_integer); + +STBTT_DEF int stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects); +STBTT_DEF void stbtt_PackFontRangesPackRects(stbtt_pack_context *spc, stbrp_rect *rects, int num_rects); +STBTT_DEF int stbtt_PackFontRangesRenderIntoRects(stbtt_pack_context *spc, stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects); +// Calling these functions in sequence is roughly equivalent to calling +// stbtt_PackFontRanges(). If you more control over the packing of multiple +// fonts, or if you want to pack custom data into a font texture, take a look +// at the source to of stbtt_PackFontRanges() and create a custom version +// using these functions, e.g. call GatherRects multiple times, +// building up a single array of rects, then call PackRects once, +// then call RenderIntoRects repeatedly. This may result in a +// better packing than calling PackFontRanges multiple times +// (or it may not). + +// this is an opaque structure that you shouldn't mess with which holds +// all the context needed from PackBegin to PackEnd. +struct stbtt_pack_context { + void *user_allocator_context; + void *pack_info; + int width; + int height; + int stride_in_bytes; + int padding; + unsigned int h_oversample, v_oversample; + unsigned char *pixels; + void *nodes; +}; + +////////////////////////////////////////////////////////////////////////////// +// +// FONT LOADING +// +// + +STBTT_DEF int stbtt_GetFontOffsetForIndex(const unsigned char *data, int index); +// Each .ttf/.ttc file may have more than one font. Each font has a sequential +// index number starting from 0. Call this function to get the font offset for +// a given index; it returns -1 if the index is out of range. A regular .ttf +// file will only define one font and it always be at offset 0, so it will +// return '0' for index 0, and -1 for all other indices. You can just skip +// this step if you know it's that kind of font. + + +// The following structure is defined publically so you can declare one on +// the stack or as a global or etc, but you should treat it as opaque. +struct stbtt_fontinfo +{ + void * userdata; + unsigned char * data; // pointer to .ttf file + int fontstart; // offset of start of font + + int numGlyphs; // number of glyphs, needed for range checking + + int loca,head,glyf,hhea,hmtx,kern; // table locations as offset from start of .ttf + int index_map; // a cmap mapping for our chosen character encoding + int indexToLocFormat; // format needed to map from glyph index to glyph +}; + +STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data, int offset); +// Given an offset into the file that defines a font, this function builds +// the necessary cached info for the rest of the system. You must allocate +// the stbtt_fontinfo yourself, and stbtt_InitFont will fill it out. You don't +// need to do anything special to free it, because the contents are pure +// value data with no additional data structures. Returns 0 on failure. + + +////////////////////////////////////////////////////////////////////////////// +// +// CHARACTER TO GLYPH-INDEX CONVERSIOn + +STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codepoint); +// If you're going to perform multiple operations on the same character +// and you want a speed-up, call this function with the character you're +// going to process, then use glyph-based functions instead of the +// codepoint-based functions. + + +////////////////////////////////////////////////////////////////////////////// +// +// CHARACTER PROPERTIES +// + +STBTT_DEF float stbtt_ScaleForPixelHeight(const stbtt_fontinfo *info, float pixels); +// computes a scale factor to produce a font whose "height" is 'pixels' tall. +// Height is measured as the distance from the highest ascender to the lowest +// descender; in other words, it's equivalent to calling stbtt_GetFontVMetrics +// and computing: +// scale = pixels / (ascent - descent) +// so if you prefer to measure height by the ascent only, use a similar calculation. + +STBTT_DEF float stbtt_ScaleForMappingEmToPixels(const stbtt_fontinfo *info, float pixels); +// computes a scale factor to produce a font whose EM size is mapped to +// 'pixels' tall. This is probably what traditional APIs compute, but +// I'm not positive. + +STBTT_DEF void stbtt_GetFontVMetrics(const stbtt_fontinfo *info, int *ascent, int *descent, int *lineGap); +// ascent is the coordinate above the baseline the font extends; descent +// is the coordinate below the baseline the font extends (i.e. it is typically negative) +// lineGap is the spacing between one row's descent and the next row's ascent... +// so you should advance the vertical position by "*ascent - *descent + *lineGap" +// these are expressed in unscaled coordinates, so you must multiply by +// the scale factor for a given size + +STBTT_DEF void stbtt_GetFontBoundingBox(const stbtt_fontinfo *info, int *x0, int *y0, int *x1, int *y1); +// the bounding box around all possible characters + +STBTT_DEF void stbtt_GetCodepointHMetrics(const stbtt_fontinfo *info, int codepoint, int *advanceWidth, int *leftSideBearing); +// leftSideBearing is the offset from the current horizontal position to the left edge of the character +// advanceWidth is the offset from the current horizontal position to the next horizontal position +// these are expressed in unscaled coordinates + +STBTT_DEF int stbtt_GetCodepointKernAdvance(const stbtt_fontinfo *info, int ch1, int ch2); +// an additional amount to add to the 'advance' value between ch1 and ch2 + +STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, int *x0, int *y0, int *x1, int *y1); +// Gets the bounding box of the visible part of the glyph, in unscaled coordinates + +STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing); +STBTT_DEF int stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2); +STBTT_DEF int stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1); +// as above, but takes one or more glyph indices for greater efficiency + + +////////////////////////////////////////////////////////////////////////////// +// +// GLYPH SHAPES (you probably don't need these, but they have to go before +// the bitmaps for C declaration-order reasons) +// + +#ifndef STBTT_vmove // you can predefine these to use different values (but why?) + enum { + STBTT_vmove=1, + STBTT_vline, + STBTT_vcurve + }; +#endif + +#ifndef stbtt_vertex // you can predefine this to use different values + // (we share this with other code at RAD) + #define stbtt_vertex_type short // can't use stbtt_int16 because that's not visible in the header file + typedef struct + { + stbtt_vertex_type x,y,cx,cy; + unsigned char type,padding; + } stbtt_vertex; +#endif + +STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index); +// returns non-zero if nothing is drawn for this glyph + +STBTT_DEF int stbtt_GetCodepointShape(const stbtt_fontinfo *info, int unicode_codepoint, stbtt_vertex **vertices); +STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **vertices); +// returns # of vertices and fills *vertices with the pointer to them +// these are expressed in "unscaled" coordinates +// +// The shape is a series of countours. Each one starts with +// a STBTT_moveto, then consists of a series of mixed +// STBTT_lineto and STBTT_curveto segments. A lineto +// draws a line from previous endpoint to its x,y; a curveto +// draws a quadratic bezier from previous endpoint to +// its x,y, using cx,cy as the bezier control point. + +STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *vertices); +// frees the data allocated above + +////////////////////////////////////////////////////////////////////////////// +// +// BITMAP RENDERING +// + +STBTT_DEF void stbtt_FreeBitmap(unsigned char *bitmap, void *userdata); +// frees the bitmap allocated below + +STBTT_DEF unsigned char *stbtt_GetCodepointBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int codepoint, int *width, int *height, int *xoff, int *yoff); +// allocates a large-enough single-channel 8bpp bitmap and renders the +// specified character/glyph at the specified scale into it, with +// antialiasing. 0 is no coverage (transparent), 255 is fully covered (opaque). +// *width & *height are filled out with the width & height of the bitmap, +// which is stored left-to-right, top-to-bottom. +// +// xoff/yoff are the offset it pixel space from the glyph origin to the top-left of the bitmap + +STBTT_DEF unsigned char *stbtt_GetCodepointBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint, int *width, int *height, int *xoff, int *yoff); +// the same as stbtt_GetCodepoitnBitmap, but you can specify a subpixel +// shift for the character + +STBTT_DEF void stbtt_MakeCodepointBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int codepoint); +// the same as stbtt_GetCodepointBitmap, but you pass in storage for the bitmap +// in the form of 'output', with row spacing of 'out_stride' bytes. the bitmap +// is clipped to out_w/out_h bytes. Call stbtt_GetCodepointBitmapBox to get the +// width and height and positioning info for it first. + +STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint); +// same as stbtt_MakeCodepointBitmap, but you can specify a subpixel +// shift for the character + +STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1); +// get the bbox of the bitmap centered around the glyph origin; so the +// bitmap width is ix1-ix0, height is iy1-iy0, and location to place +// the bitmap top left is (leftSideBearing*scale,iy0). +// (Note that the bitmap uses y-increases-down, but the shape uses +// y-increases-up, so CodepointBitmapBox and CodepointBox are inverted.) + +STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1); +// same as stbtt_GetCodepointBitmapBox, but you can specify a subpixel +// shift for the character + +// the following functions are equivalent to the above functions, but operate +// on glyph indices instead of Unicode codepoints (for efficiency) +STBTT_DEF unsigned char *stbtt_GetGlyphBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int glyph, int *width, int *height, int *xoff, int *yoff); +STBTT_DEF unsigned char *stbtt_GetGlyphBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int glyph, int *width, int *height, int *xoff, int *yoff); +STBTT_DEF void stbtt_MakeGlyphBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int glyph); +STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int glyph); +STBTT_DEF void stbtt_GetGlyphBitmapBox(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1); +STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y,float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1); + + +// @TODO: don't expose this structure +typedef struct +{ + int w,h,stride; + unsigned char *pixels; +} stbtt__bitmap; + +// rasterize a shape with quadratic beziers into a bitmap +STBTT_DEF void stbtt_Rasterize(stbtt__bitmap *result, // 1-channel bitmap to draw into + float flatness_in_pixels, // allowable error of curve in pixels + stbtt_vertex *vertices, // array of vertices defining shape + int num_verts, // number of vertices in above array + float scale_x, float scale_y, // scale applied to input vertices + float shift_x, float shift_y, // translation applied to input vertices + int x_off, int y_off, // another translation applied to input + int invert, // if non-zero, vertically flip shape + void *userdata); // context for to STBTT_MALLOC + +////////////////////////////////////////////////////////////////////////////// +// +// Finding the right font... +// +// You should really just solve this offline, keep your own tables +// of what font is what, and don't try to get it out of the .ttf file. +// That's because getting it out of the .ttf file is really hard, because +// the names in the file can appear in many possible encodings, in many +// possible languages, and e.g. if you need a case-insensitive comparison, +// the details of that depend on the encoding & language in a complex way +// (actually underspecified in truetype, but also gigantic). +// +// But you can use the provided functions in two possible ways: +// stbtt_FindMatchingFont() will use *case-sensitive* comparisons on +// unicode-encoded names to try to find the font you want; +// you can run this before calling stbtt_InitFont() +// +// stbtt_GetFontNameString() lets you get any of the various strings +// from the file yourself and do your own comparisons on them. +// You have to have called stbtt_InitFont() first. + + +STBTT_DEF int stbtt_FindMatchingFont(const unsigned char *fontdata, const char *name, int flags); +// returns the offset (not index) of the font that matches, or -1 if none +// if you use STBTT_MACSTYLE_DONTCARE, use a font name like "Arial Bold". +// if you use any other flag, use a font name like "Arial"; this checks +// the 'macStyle' header field; i don't know if fonts set this consistently +#define STBTT_MACSTYLE_DONTCARE 0 +#define STBTT_MACSTYLE_BOLD 1 +#define STBTT_MACSTYLE_ITALIC 2 +#define STBTT_MACSTYLE_UNDERSCORE 4 +#define STBTT_MACSTYLE_NONE 8 // <= not same as 0, this makes us check the bitfield is 0 + +STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const char *s2, int len2); +// returns 1/0 whether the first string interpreted as utf8 is identical to +// the second string interpreted as big-endian utf16... useful for strings from next func + +STBTT_DEF const char *stbtt_GetFontNameString(const stbtt_fontinfo *font, int *length, int platformID, int encodingID, int languageID, int nameID); +// returns the string (which may be big-endian double byte, e.g. for unicode) +// and puts the length in bytes in *length. +// +// some of the values for the IDs are below; for more see the truetype spec: +// http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6name.html +// http://www.microsoft.com/typography/otspec/name.htm + +enum { // platformID + STBTT_PLATFORM_ID_UNICODE =0, + STBTT_PLATFORM_ID_MAC =1, + STBTT_PLATFORM_ID_ISO =2, + STBTT_PLATFORM_ID_MICROSOFT =3 +}; + +enum { // encodingID for STBTT_PLATFORM_ID_UNICODE + STBTT_UNICODE_EID_UNICODE_1_0 =0, + STBTT_UNICODE_EID_UNICODE_1_1 =1, + STBTT_UNICODE_EID_ISO_10646 =2, + STBTT_UNICODE_EID_UNICODE_2_0_BMP=3, + STBTT_UNICODE_EID_UNICODE_2_0_FULL=4 +}; + +enum { // encodingID for STBTT_PLATFORM_ID_MICROSOFT + STBTT_MS_EID_SYMBOL =0, + STBTT_MS_EID_UNICODE_BMP =1, + STBTT_MS_EID_SHIFTJIS =2, + STBTT_MS_EID_UNICODE_FULL =10 +}; + +enum { // encodingID for STBTT_PLATFORM_ID_MAC; same as Script Manager codes + STBTT_MAC_EID_ROMAN =0, STBTT_MAC_EID_ARABIC =4, + STBTT_MAC_EID_JAPANESE =1, STBTT_MAC_EID_HEBREW =5, + STBTT_MAC_EID_CHINESE_TRAD =2, STBTT_MAC_EID_GREEK =6, + STBTT_MAC_EID_KOREAN =3, STBTT_MAC_EID_RUSSIAN =7 +}; + +enum { // languageID for STBTT_PLATFORM_ID_MICROSOFT; same as LCID... + // problematic because there are e.g. 16 english LCIDs and 16 arabic LCIDs + STBTT_MS_LANG_ENGLISH =0x0409, STBTT_MS_LANG_ITALIAN =0x0410, + STBTT_MS_LANG_CHINESE =0x0804, STBTT_MS_LANG_JAPANESE =0x0411, + STBTT_MS_LANG_DUTCH =0x0413, STBTT_MS_LANG_KOREAN =0x0412, + STBTT_MS_LANG_FRENCH =0x040c, STBTT_MS_LANG_RUSSIAN =0x0419, + STBTT_MS_LANG_GERMAN =0x0407, STBTT_MS_LANG_SPANISH =0x0409, + STBTT_MS_LANG_HEBREW =0x040d, STBTT_MS_LANG_SWEDISH =0x041D +}; + +enum { // languageID for STBTT_PLATFORM_ID_MAC + STBTT_MAC_LANG_ENGLISH =0 , STBTT_MAC_LANG_JAPANESE =11, + STBTT_MAC_LANG_ARABIC =12, STBTT_MAC_LANG_KOREAN =23, + STBTT_MAC_LANG_DUTCH =4 , STBTT_MAC_LANG_RUSSIAN =32, + STBTT_MAC_LANG_FRENCH =1 , STBTT_MAC_LANG_SPANISH =6 , + STBTT_MAC_LANG_GERMAN =2 , STBTT_MAC_LANG_SWEDISH =5 , + STBTT_MAC_LANG_HEBREW =10, STBTT_MAC_LANG_CHINESE_SIMPLIFIED =33, + STBTT_MAC_LANG_ITALIAN =3 , STBTT_MAC_LANG_CHINESE_TRAD =19 +}; + +#ifdef __cplusplus +} +#endif + +#endif // __STB_INCLUDE_STB_TRUETYPE_H__ + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +//// +//// IMPLEMENTATION +//// +//// + +#ifdef STB_TRUETYPE_IMPLEMENTATION + +#ifndef STBTT_MAX_OVERSAMPLE +#define STBTT_MAX_OVERSAMPLE 8 +#endif + +#if STBTT_MAX_OVERSAMPLE > 255 +#error "STBTT_MAX_OVERSAMPLE cannot be > 255" +#endif + +typedef int stbtt__test_oversample_pow2[(STBTT_MAX_OVERSAMPLE & (STBTT_MAX_OVERSAMPLE-1)) == 0 ? 1 : -1]; + +#ifndef STBTT_RASTERIZER_VERSION +#define STBTT_RASTERIZER_VERSION 2 +#endif + +#ifdef _MSC_VER +#define STBTT__NOTUSED(v) (void)(v) +#else +#define STBTT__NOTUSED(v) (void)sizeof(v) +#endif + +////////////////////////////////////////////////////////////////////////// +// +// accessors to parse data from file +// + +// on platforms that don't allow misaligned reads, if we want to allow +// truetype fonts that aren't padded to alignment, define ALLOW_UNALIGNED_TRUETYPE + +#define ttBYTE(p) (* (stbtt_uint8 *) (p)) +#define ttCHAR(p) (* (stbtt_int8 *) (p)) +#define ttFixed(p) ttLONG(p) + +#if defined(STB_TRUETYPE_BIGENDIAN) && !defined(ALLOW_UNALIGNED_TRUETYPE) + + #define ttUSHORT(p) (* (stbtt_uint16 *) (p)) + #define ttSHORT(p) (* (stbtt_int16 *) (p)) + #define ttULONG(p) (* (stbtt_uint32 *) (p)) + #define ttLONG(p) (* (stbtt_int32 *) (p)) + +#else + + static stbtt_uint16 ttUSHORT(const stbtt_uint8 *p) { return p[0]*256 + p[1]; } + static stbtt_int16 ttSHORT(const stbtt_uint8 *p) { return p[0]*256 + p[1]; } + static stbtt_uint32 ttULONG(const stbtt_uint8 *p) { return (p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]; } + static stbtt_int32 ttLONG(const stbtt_uint8 *p) { return (p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]; } + +#endif + +#define stbtt_tag4(p,c0,c1,c2,c3) ((p)[0] == (c0) && (p)[1] == (c1) && (p)[2] == (c2) && (p)[3] == (c3)) +#define stbtt_tag(p,str) stbtt_tag4(p,str[0],str[1],str[2],str[3]) + +static int stbtt__isfont(const stbtt_uint8 *font) +{ + // check the version number + if (stbtt_tag4(font, '1',0,0,0)) return 1; // TrueType 1 + if (stbtt_tag(font, "typ1")) return 1; // TrueType with type 1 font -- we don't support this! + if (stbtt_tag(font, "OTTO")) return 1; // OpenType with CFF + if (stbtt_tag4(font, 0,1,0,0)) return 1; // OpenType 1.0 + return 0; +} + +// @OPTIMIZE: binary search +static stbtt_uint32 stbtt__find_table(stbtt_uint8 *data, stbtt_uint32 fontstart, const char *tag) +{ + stbtt_int32 num_tables = ttUSHORT(data+fontstart+4); + stbtt_uint32 tabledir = fontstart + 12; + stbtt_int32 i; + for (i=0; i < num_tables; ++i) { + stbtt_uint32 loc = tabledir + 16*i; + if (stbtt_tag(data+loc+0, tag)) + return ttULONG(data+loc+8); + } + return 0; +} + +STBTT_DEF int stbtt_GetFontOffsetForIndex(const unsigned char *font_collection, int index) +{ + // if it's just a font, there's only one valid index + if (stbtt__isfont(font_collection)) + return index == 0 ? 0 : -1; + + // check if it's a TTC + if (stbtt_tag(font_collection, "ttcf")) { + // version 1? + if (ttULONG(font_collection+4) == 0x00010000 || ttULONG(font_collection+4) == 0x00020000) { + stbtt_int32 n = ttLONG(font_collection+8); + if (index >= n) + return -1; + return ttULONG(font_collection+12+index*4); + } + } + return -1; +} + +STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data2, int fontstart) +{ + stbtt_uint8 *data = (stbtt_uint8 *) data2; + stbtt_uint32 cmap, t; + stbtt_int32 i,numTables; + + info->data = data; + info->fontstart = fontstart; + + cmap = stbtt__find_table(data, fontstart, "cmap"); // required + info->loca = stbtt__find_table(data, fontstart, "loca"); // required + info->head = stbtt__find_table(data, fontstart, "head"); // required + info->glyf = stbtt__find_table(data, fontstart, "glyf"); // required + info->hhea = stbtt__find_table(data, fontstart, "hhea"); // required + info->hmtx = stbtt__find_table(data, fontstart, "hmtx"); // required + info->kern = stbtt__find_table(data, fontstart, "kern"); // not required + if (!cmap || !info->loca || !info->head || !info->glyf || !info->hhea || !info->hmtx) + return 0; + + t = stbtt__find_table(data, fontstart, "maxp"); + if (t) + info->numGlyphs = ttUSHORT(data+t+4); + else + info->numGlyphs = 0xffff; + + // find a cmap encoding table we understand *now* to avoid searching + // later. (todo: could make this installable) + // the same regardless of glyph. + numTables = ttUSHORT(data + cmap + 2); + info->index_map = 0; + for (i=0; i < numTables; ++i) { + stbtt_uint32 encoding_record = cmap + 4 + 8 * i; + // find an encoding we understand: + switch(ttUSHORT(data+encoding_record)) { + case STBTT_PLATFORM_ID_MICROSOFT: + switch (ttUSHORT(data+encoding_record+2)) { + case STBTT_MS_EID_UNICODE_BMP: + case STBTT_MS_EID_UNICODE_FULL: + // MS/Unicode + info->index_map = cmap + ttULONG(data+encoding_record+4); + break; + } + break; + case STBTT_PLATFORM_ID_UNICODE: + // Mac/iOS has these + // all the encodingIDs are unicode, so we don't bother to check it + info->index_map = cmap + ttULONG(data+encoding_record+4); + break; + } + } + if (info->index_map == 0) + return 0; + + info->indexToLocFormat = ttUSHORT(data+info->head + 50); + return 1; +} + +STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codepoint) +{ + stbtt_uint8 *data = info->data; + stbtt_uint32 index_map = info->index_map; + + stbtt_uint16 format = ttUSHORT(data + index_map + 0); + if (format == 0) { // apple byte encoding + stbtt_int32 bytes = ttUSHORT(data + index_map + 2); + if (unicode_codepoint < bytes-6) + return ttBYTE(data + index_map + 6 + unicode_codepoint); + return 0; + } else if (format == 6) { + stbtt_uint32 first = ttUSHORT(data + index_map + 6); + stbtt_uint32 count = ttUSHORT(data + index_map + 8); + if ((stbtt_uint32) unicode_codepoint >= first && (stbtt_uint32) unicode_codepoint < first+count) + return ttUSHORT(data + index_map + 10 + (unicode_codepoint - first)*2); + return 0; + } else if (format == 2) { + STBTT_assert(0); // @TODO: high-byte mapping for japanese/chinese/korean + return 0; + } else if (format == 4) { // standard mapping for windows fonts: binary search collection of ranges + stbtt_uint16 segcount = ttUSHORT(data+index_map+6) >> 1; + stbtt_uint16 searchRange = ttUSHORT(data+index_map+8) >> 1; + stbtt_uint16 entrySelector = ttUSHORT(data+index_map+10); + stbtt_uint16 rangeShift = ttUSHORT(data+index_map+12) >> 1; + + // do a binary search of the segments + stbtt_uint32 endCount = index_map + 14; + stbtt_uint32 search = endCount; + + if (unicode_codepoint > 0xffff) + return 0; + + // they lie from endCount .. endCount + segCount + // but searchRange is the nearest power of two, so... + if (unicode_codepoint >= ttUSHORT(data + search + rangeShift*2)) + search += rangeShift*2; + + // now decrement to bias correctly to find smallest + search -= 2; + while (entrySelector) { + stbtt_uint16 end; + searchRange >>= 1; + end = ttUSHORT(data + search + searchRange*2); + if (unicode_codepoint > end) + search += searchRange*2; + --entrySelector; + } + search += 2; + + { + stbtt_uint16 offset, start; + stbtt_uint16 item = (stbtt_uint16) ((search - endCount) >> 1); + + STBTT_assert(unicode_codepoint <= ttUSHORT(data + endCount + 2*item)); + start = ttUSHORT(data + index_map + 14 + segcount*2 + 2 + 2*item); + if (unicode_codepoint < start) + return 0; + + offset = ttUSHORT(data + index_map + 14 + segcount*6 + 2 + 2*item); + if (offset == 0) + return (stbtt_uint16) (unicode_codepoint + ttSHORT(data + index_map + 14 + segcount*4 + 2 + 2*item)); + + return ttUSHORT(data + offset + (unicode_codepoint-start)*2 + index_map + 14 + segcount*6 + 2 + 2*item); + } + } else if (format == 12 || format == 13) { + stbtt_uint32 ngroups = ttULONG(data+index_map+12); + stbtt_int32 low,high; + low = 0; high = (stbtt_int32)ngroups; + // Binary search the right group. + while (low < high) { + stbtt_int32 mid = low + ((high-low) >> 1); // rounds down, so low <= mid < high + stbtt_uint32 start_char = ttULONG(data+index_map+16+mid*12); + stbtt_uint32 end_char = ttULONG(data+index_map+16+mid*12+4); + if ((stbtt_uint32) unicode_codepoint < start_char) + high = mid; + else if ((stbtt_uint32) unicode_codepoint > end_char) + low = mid+1; + else { + stbtt_uint32 start_glyph = ttULONG(data+index_map+16+mid*12+8); + if (format == 12) + return start_glyph + unicode_codepoint-start_char; + else // format == 13 + return start_glyph; + } + } + return 0; // not found + } + // @TODO + STBTT_assert(0); + return 0; +} + +STBTT_DEF int stbtt_GetCodepointShape(const stbtt_fontinfo *info, int unicode_codepoint, stbtt_vertex **vertices) +{ + return stbtt_GetGlyphShape(info, stbtt_FindGlyphIndex(info, unicode_codepoint), vertices); +} + +static void stbtt_setvertex(stbtt_vertex *v, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy) +{ + v->type = type; + v->x = (stbtt_int16) x; + v->y = (stbtt_int16) y; + v->cx = (stbtt_int16) cx; + v->cy = (stbtt_int16) cy; +} + +static int stbtt__GetGlyfOffset(const stbtt_fontinfo *info, int glyph_index) +{ + int g1,g2; + + if (glyph_index >= info->numGlyphs) return -1; // glyph index out of range + if (info->indexToLocFormat >= 2) return -1; // unknown index->glyph map format + + if (info->indexToLocFormat == 0) { + g1 = info->glyf + ttUSHORT(info->data + info->loca + glyph_index * 2) * 2; + g2 = info->glyf + ttUSHORT(info->data + info->loca + glyph_index * 2 + 2) * 2; + } else { + g1 = info->glyf + ttULONG (info->data + info->loca + glyph_index * 4); + g2 = info->glyf + ttULONG (info->data + info->loca + glyph_index * 4 + 4); + } + + return g1==g2 ? -1 : g1; // if length is 0, return -1 +} + +STBTT_DEF int stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1) +{ + int g = stbtt__GetGlyfOffset(info, glyph_index); + if (g < 0) return 0; + + if (x0) *x0 = ttSHORT(info->data + g + 2); + if (y0) *y0 = ttSHORT(info->data + g + 4); + if (x1) *x1 = ttSHORT(info->data + g + 6); + if (y1) *y1 = ttSHORT(info->data + g + 8); + return 1; +} + +STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, int *x0, int *y0, int *x1, int *y1) +{ + return stbtt_GetGlyphBox(info, stbtt_FindGlyphIndex(info,codepoint), x0,y0,x1,y1); +} + +STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index) +{ + stbtt_int16 numberOfContours; + int g = stbtt__GetGlyfOffset(info, glyph_index); + if (g < 0) return 1; + numberOfContours = ttSHORT(info->data + g); + return numberOfContours == 0; +} + +static int stbtt__close_shape(stbtt_vertex *vertices, int num_vertices, int was_off, int start_off, + stbtt_int32 sx, stbtt_int32 sy, stbtt_int32 scx, stbtt_int32 scy, stbtt_int32 cx, stbtt_int32 cy) +{ + if (start_off) { + if (was_off) + stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, (cx+scx)>>1, (cy+scy)>>1, cx,cy); + stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, sx,sy,scx,scy); + } else { + if (was_off) + stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve,sx,sy,cx,cy); + else + stbtt_setvertex(&vertices[num_vertices++], STBTT_vline,sx,sy,0,0); + } + return num_vertices; +} + +STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices) +{ + stbtt_int16 numberOfContours; + stbtt_uint8 *endPtsOfContours; + stbtt_uint8 *data = info->data; + stbtt_vertex *vertices=0; + int num_vertices=0; + int g = stbtt__GetGlyfOffset(info, glyph_index); + + *pvertices = NULL; + + if (g < 0) return 0; + + numberOfContours = ttSHORT(data + g); + + if (numberOfContours > 0) { + stbtt_uint8 flags=0,flagcount; + stbtt_int32 ins, i,j=0,m,n, next_move, was_off=0, off, start_off=0; + stbtt_int32 x,y,cx,cy,sx,sy, scx,scy; + stbtt_uint8 *points; + endPtsOfContours = (data + g + 10); + ins = ttUSHORT(data + g + 10 + numberOfContours * 2); + points = data + g + 10 + numberOfContours * 2 + 2 + ins; + + n = 1+ttUSHORT(endPtsOfContours + numberOfContours*2-2); + + m = n + 2*numberOfContours; // a loose bound on how many vertices we might need + vertices = (stbtt_vertex *) STBTT_malloc(m * sizeof(vertices[0]), info->userdata); + if (vertices == 0) + return 0; + + next_move = 0; + flagcount=0; + + // in first pass, we load uninterpreted data into the allocated array + // above, shifted to the end of the array so we won't overwrite it when + // we create our final data starting from the front + + off = m - n; // starting offset for uninterpreted data, regardless of how m ends up being calculated + + // first load flags + + for (i=0; i < n; ++i) { + if (flagcount == 0) { + flags = *points++; + if (flags & 8) + flagcount = *points++; + } else + --flagcount; + vertices[off+i].type = flags; + } + + // now load x coordinates + x=0; + for (i=0; i < n; ++i) { + flags = vertices[off+i].type; + if (flags & 2) { + stbtt_int16 dx = *points++; + x += (flags & 16) ? dx : -dx; // ??? + } else { + if (!(flags & 16)) { + x = x + (stbtt_int16) (points[0]*256 + points[1]); + points += 2; + } + } + vertices[off+i].x = (stbtt_int16) x; + } + + // now load y coordinates + y=0; + for (i=0; i < n; ++i) { + flags = vertices[off+i].type; + if (flags & 4) { + stbtt_int16 dy = *points++; + y += (flags & 32) ? dy : -dy; // ??? + } else { + if (!(flags & 32)) { + y = y + (stbtt_int16) (points[0]*256 + points[1]); + points += 2; + } + } + vertices[off+i].y = (stbtt_int16) y; + } + + // now convert them to our format + num_vertices=0; + sx = sy = cx = cy = scx = scy = 0; + for (i=0; i < n; ++i) { + flags = vertices[off+i].type; + x = (stbtt_int16) vertices[off+i].x; + y = (stbtt_int16) vertices[off+i].y; + + if (next_move == i) { + if (i != 0) + num_vertices = stbtt__close_shape(vertices, num_vertices, was_off, start_off, sx,sy,scx,scy,cx,cy); + + // now start the new one + start_off = !(flags & 1); + if (start_off) { + // if we start off with an off-curve point, then when we need to find a point on the curve + // where we can start, and we need to save some state for when we wraparound. + scx = x; + scy = y; + if (!(vertices[off+i+1].type & 1)) { + // next point is also a curve point, so interpolate an on-point curve + sx = (x + (stbtt_int32) vertices[off+i+1].x) >> 1; + sy = (y + (stbtt_int32) vertices[off+i+1].y) >> 1; + } else { + // otherwise just use the next point as our start point + sx = (stbtt_int32) vertices[off+i+1].x; + sy = (stbtt_int32) vertices[off+i+1].y; + ++i; // we're using point i+1 as the starting point, so skip it + } + } else { + sx = x; + sy = y; + } + stbtt_setvertex(&vertices[num_vertices++], STBTT_vmove,sx,sy,0,0); + was_off = 0; + next_move = 1 + ttUSHORT(endPtsOfContours+j*2); + ++j; + } else { + if (!(flags & 1)) { // if it's a curve + if (was_off) // two off-curve control points in a row means interpolate an on-curve midpoint + stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, (cx+x)>>1, (cy+y)>>1, cx, cy); + cx = x; + cy = y; + was_off = 1; + } else { + if (was_off) + stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, x,y, cx, cy); + else + stbtt_setvertex(&vertices[num_vertices++], STBTT_vline, x,y,0,0); + was_off = 0; + } + } + } + num_vertices = stbtt__close_shape(vertices, num_vertices, was_off, start_off, sx,sy,scx,scy,cx,cy); + } else if (numberOfContours == -1) { + // Compound shapes. + int more = 1; + stbtt_uint8 *comp = data + g + 10; + num_vertices = 0; + vertices = 0; + while (more) { + stbtt_uint16 flags, gidx; + int comp_num_verts = 0, i; + stbtt_vertex *comp_verts = 0, *tmp = 0; + float mtx[6] = {1,0,0,1,0,0}, m, n; + + flags = ttSHORT(comp); comp+=2; + gidx = ttSHORT(comp); comp+=2; + + if (flags & 2) { // XY values + if (flags & 1) { // shorts + mtx[4] = ttSHORT(comp); comp+=2; + mtx[5] = ttSHORT(comp); comp+=2; + } else { + mtx[4] = ttCHAR(comp); comp+=1; + mtx[5] = ttCHAR(comp); comp+=1; + } + } + else { + // @TODO handle matching point + STBTT_assert(0); + } + if (flags & (1<<3)) { // WE_HAVE_A_SCALE + mtx[0] = mtx[3] = ttSHORT(comp)/16384.0f; comp+=2; + mtx[1] = mtx[2] = 0; + } else if (flags & (1<<6)) { // WE_HAVE_AN_X_AND_YSCALE + mtx[0] = ttSHORT(comp)/16384.0f; comp+=2; + mtx[1] = mtx[2] = 0; + mtx[3] = ttSHORT(comp)/16384.0f; comp+=2; + } else if (flags & (1<<7)) { // WE_HAVE_A_TWO_BY_TWO + mtx[0] = ttSHORT(comp)/16384.0f; comp+=2; + mtx[1] = ttSHORT(comp)/16384.0f; comp+=2; + mtx[2] = ttSHORT(comp)/16384.0f; comp+=2; + mtx[3] = ttSHORT(comp)/16384.0f; comp+=2; + } + + // Find transformation scales. + m = (float) STBTT_sqrt(mtx[0]*mtx[0] + mtx[1]*mtx[1]); + n = (float) STBTT_sqrt(mtx[2]*mtx[2] + mtx[3]*mtx[3]); + + // Get indexed glyph. + comp_num_verts = stbtt_GetGlyphShape(info, gidx, &comp_verts); + if (comp_num_verts > 0) { + // Transform vertices. + for (i = 0; i < comp_num_verts; ++i) { + stbtt_vertex* v = &comp_verts[i]; + stbtt_vertex_type x,y; + x=v->x; y=v->y; + v->x = (stbtt_vertex_type)(m * (mtx[0]*x + mtx[2]*y + mtx[4])); + v->y = (stbtt_vertex_type)(n * (mtx[1]*x + mtx[3]*y + mtx[5])); + x=v->cx; y=v->cy; + v->cx = (stbtt_vertex_type)(m * (mtx[0]*x + mtx[2]*y + mtx[4])); + v->cy = (stbtt_vertex_type)(n * (mtx[1]*x + mtx[3]*y + mtx[5])); + } + // Append vertices. + tmp = (stbtt_vertex*)STBTT_malloc((num_vertices+comp_num_verts)*sizeof(stbtt_vertex), info->userdata); + if (!tmp) { + if (vertices) STBTT_free(vertices, info->userdata); + if (comp_verts) STBTT_free(comp_verts, info->userdata); + return 0; + } + if (num_vertices > 0) STBTT_memcpy(tmp, vertices, num_vertices*sizeof(stbtt_vertex)); + STBTT_memcpy(tmp+num_vertices, comp_verts, comp_num_verts*sizeof(stbtt_vertex)); + if (vertices) STBTT_free(vertices, info->userdata); + vertices = tmp; + STBTT_free(comp_verts, info->userdata); + num_vertices += comp_num_verts; + } + // More components ? + more = flags & (1<<5); + } + } else if (numberOfContours < 0) { + // @TODO other compound variations? + STBTT_assert(0); + } else { + // numberOfCounters == 0, do nothing + } + + *pvertices = vertices; + return num_vertices; +} + +STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing) +{ + stbtt_uint16 numOfLongHorMetrics = ttUSHORT(info->data+info->hhea + 34); + if (glyph_index < numOfLongHorMetrics) { + if (advanceWidth) *advanceWidth = ttSHORT(info->data + info->hmtx + 4*glyph_index); + if (leftSideBearing) *leftSideBearing = ttSHORT(info->data + info->hmtx + 4*glyph_index + 2); + } else { + if (advanceWidth) *advanceWidth = ttSHORT(info->data + info->hmtx + 4*(numOfLongHorMetrics-1)); + if (leftSideBearing) *leftSideBearing = ttSHORT(info->data + info->hmtx + 4*numOfLongHorMetrics + 2*(glyph_index - numOfLongHorMetrics)); + } +} + +STBTT_DEF int stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2) +{ + stbtt_uint8 *data = info->data + info->kern; + stbtt_uint32 needle, straw; + int l, r, m; + + // we only look at the first table. it must be 'horizontal' and format 0. + if (!info->kern) + return 0; + if (ttUSHORT(data+2) < 1) // number of tables, need at least 1 + return 0; + if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format + return 0; + + l = 0; + r = ttUSHORT(data+10) - 1; + needle = glyph1 << 16 | glyph2; + while (l <= r) { + m = (l + r) >> 1; + straw = ttULONG(data+18+(m*6)); // note: unaligned read + if (needle < straw) + r = m - 1; + else if (needle > straw) + l = m + 1; + else + return ttSHORT(data+22+(m*6)); + } + return 0; +} + +STBTT_DEF int stbtt_GetCodepointKernAdvance(const stbtt_fontinfo *info, int ch1, int ch2) +{ + if (!info->kern) // if no kerning table, don't waste time looking up both codepoint->glyphs + return 0; + return stbtt_GetGlyphKernAdvance(info, stbtt_FindGlyphIndex(info,ch1), stbtt_FindGlyphIndex(info,ch2)); +} + +STBTT_DEF void stbtt_GetCodepointHMetrics(const stbtt_fontinfo *info, int codepoint, int *advanceWidth, int *leftSideBearing) +{ + stbtt_GetGlyphHMetrics(info, stbtt_FindGlyphIndex(info,codepoint), advanceWidth, leftSideBearing); +} + +STBTT_DEF void stbtt_GetFontVMetrics(const stbtt_fontinfo *info, int *ascent, int *descent, int *lineGap) +{ + if (ascent ) *ascent = ttSHORT(info->data+info->hhea + 4); + if (descent) *descent = ttSHORT(info->data+info->hhea + 6); + if (lineGap) *lineGap = ttSHORT(info->data+info->hhea + 8); +} + +STBTT_DEF void stbtt_GetFontBoundingBox(const stbtt_fontinfo *info, int *x0, int *y0, int *x1, int *y1) +{ + *x0 = ttSHORT(info->data + info->head + 36); + *y0 = ttSHORT(info->data + info->head + 38); + *x1 = ttSHORT(info->data + info->head + 40); + *y1 = ttSHORT(info->data + info->head + 42); +} + +STBTT_DEF float stbtt_ScaleForPixelHeight(const stbtt_fontinfo *info, float height) +{ + int fheight = ttSHORT(info->data + info->hhea + 4) - ttSHORT(info->data + info->hhea + 6); + return (float) height / fheight; +} + +STBTT_DEF float stbtt_ScaleForMappingEmToPixels(const stbtt_fontinfo *info, float pixels) +{ + int unitsPerEm = ttUSHORT(info->data + info->head + 18); + return pixels / unitsPerEm; +} + +STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *v) +{ + STBTT_free(v, info->userdata); +} + +////////////////////////////////////////////////////////////////////////////// +// +// antialiasing software rasterizer +// + +STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y,float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1) +{ + int x0=0,y0=0,x1,y1; // =0 suppresses compiler warning + if (!stbtt_GetGlyphBox(font, glyph, &x0,&y0,&x1,&y1)) { + // e.g. space character + if (ix0) *ix0 = 0; + if (iy0) *iy0 = 0; + if (ix1) *ix1 = 0; + if (iy1) *iy1 = 0; + } else { + // move to integral bboxes (treating pixels as little squares, what pixels get touched)? + if (ix0) *ix0 = STBTT_ifloor( x0 * scale_x + shift_x); + if (iy0) *iy0 = STBTT_ifloor(-y1 * scale_y + shift_y); + if (ix1) *ix1 = STBTT_iceil ( x1 * scale_x + shift_x); + if (iy1) *iy1 = STBTT_iceil (-y0 * scale_y + shift_y); + } +} + +STBTT_DEF void stbtt_GetGlyphBitmapBox(const stbtt_fontinfo *font, int glyph, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1) +{ + stbtt_GetGlyphBitmapBoxSubpixel(font, glyph, scale_x, scale_y,0.0f,0.0f, ix0, iy0, ix1, iy1); +} + +STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, float shift_x, float shift_y, int *ix0, int *iy0, int *ix1, int *iy1) +{ + stbtt_GetGlyphBitmapBoxSubpixel(font, stbtt_FindGlyphIndex(font,codepoint), scale_x, scale_y,shift_x,shift_y, ix0,iy0,ix1,iy1); +} + +STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codepoint, float scale_x, float scale_y, int *ix0, int *iy0, int *ix1, int *iy1) +{ + stbtt_GetCodepointBitmapBoxSubpixel(font, codepoint, scale_x, scale_y,0.0f,0.0f, ix0,iy0,ix1,iy1); +} + +////////////////////////////////////////////////////////////////////////////// +// +// Rasterizer + +typedef struct stbtt__hheap_chunk +{ + struct stbtt__hheap_chunk *next; +} stbtt__hheap_chunk; + +typedef struct stbtt__hheap +{ + struct stbtt__hheap_chunk *head; + void *first_free; + int num_remaining_in_head_chunk; +} stbtt__hheap; + +static void *stbtt__hheap_alloc(stbtt__hheap *hh, size_t size, void *userdata) +{ + if (hh->first_free) { + void *p = hh->first_free; + hh->first_free = * (void **) p; + return p; + } else { + if (hh->num_remaining_in_head_chunk == 0) { + int count = (size < 32 ? 2000 : size < 128 ? 800 : 100); + stbtt__hheap_chunk *c = (stbtt__hheap_chunk *) STBTT_malloc(sizeof(stbtt__hheap_chunk) + size * count, userdata); + if (c == NULL) + return NULL; + c->next = hh->head; + hh->head = c; + hh->num_remaining_in_head_chunk = count; + } + --hh->num_remaining_in_head_chunk; + return (char *) (hh->head) + size * hh->num_remaining_in_head_chunk; + } +} + +static void stbtt__hheap_free(stbtt__hheap *hh, void *p) +{ + *(void **) p = hh->first_free; + hh->first_free = p; +} + +static void stbtt__hheap_cleanup(stbtt__hheap *hh, void *userdata) +{ + stbtt__hheap_chunk *c = hh->head; + while (c) { + stbtt__hheap_chunk *n = c->next; + STBTT_free(c, userdata); + c = n; + } +} + +typedef struct stbtt__edge { + float x0,y0, x1,y1; + int invert; +} stbtt__edge; + + +typedef struct stbtt__active_edge +{ + struct stbtt__active_edge *next; + #if STBTT_RASTERIZER_VERSION==1 + int x,dx; + float ey; + int direction; + #elif STBTT_RASTERIZER_VERSION==2 + float fx,fdx,fdy; + float direction; + float sy; + float ey; + #else + #error "Unrecognized value of STBTT_RASTERIZER_VERSION" + #endif +} stbtt__active_edge; + +#if STBTT_RASTERIZER_VERSION == 1 +#define STBTT_FIXSHIFT 10 +#define STBTT_FIX (1 << STBTT_FIXSHIFT) +#define STBTT_FIXMASK (STBTT_FIX-1) + +static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, float start_point, void *userdata) +{ + stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata); + float dxdy = (e->x1 - e->x0) / (e->y1 - e->y0); + STBTT_assert(z != NULL); + if (!z) return z; + + // round dx down to avoid overshooting + if (dxdy < 0) + z->dx = -STBTT_ifloor(STBTT_FIX * -dxdy); + else + z->dx = STBTT_ifloor(STBTT_FIX * dxdy); + + z->x = STBTT_ifloor(STBTT_FIX * e->x0 + z->dx * (start_point - e->y0)); // use z->dx so when we offset later it's by the same amount + z->x -= off_x * STBTT_FIX; + + z->ey = e->y1; + z->next = 0; + z->direction = e->invert ? 1 : -1; + return z; +} +#elif STBTT_RASTERIZER_VERSION == 2 +static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, float start_point, void *userdata) +{ + stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata); + float dxdy = (e->x1 - e->x0) / (e->y1 - e->y0); + STBTT_assert(z != NULL); + //STBTT_assert(e->y0 <= start_point); + if (!z) return z; + z->fdx = dxdy; + z->fdy = dxdy != 0.0f ? (1.0f/dxdy) : 0.0f; + z->fx = e->x0 + dxdy * (start_point - e->y0); + z->fx -= off_x; + z->direction = e->invert ? 1.0f : -1.0f; + z->sy = e->y0; + z->ey = e->y1; + z->next = 0; + return z; +} +#else +#error "Unrecognized value of STBTT_RASTERIZER_VERSION" +#endif + +#if STBTT_RASTERIZER_VERSION == 1 +// note: this routine clips fills that extend off the edges... ideally this +// wouldn't happen, but it could happen if the truetype glyph bounding boxes +// are wrong, or if the user supplies a too-small bitmap +static void stbtt__fill_active_edges(unsigned char *scanline, int len, stbtt__active_edge *e, int max_weight) +{ + // non-zero winding fill + int x0=0, w=0; + + while (e) { + if (w == 0) { + // if we're currently at zero, we need to record the edge start point + x0 = e->x; w += e->direction; + } else { + int x1 = e->x; w += e->direction; + // if we went to zero, we need to draw + if (w == 0) { + int i = x0 >> STBTT_FIXSHIFT; + int j = x1 >> STBTT_FIXSHIFT; + + if (i < len && j >= 0) { + if (i == j) { + // x0,x1 are the same pixel, so compute combined coverage + scanline[i] = scanline[i] + (stbtt_uint8) ((x1 - x0) * max_weight >> STBTT_FIXSHIFT); + } else { + if (i >= 0) // add antialiasing for x0 + scanline[i] = scanline[i] + (stbtt_uint8) (((STBTT_FIX - (x0 & STBTT_FIXMASK)) * max_weight) >> STBTT_FIXSHIFT); + else + i = -1; // clip + + if (j < len) // add antialiasing for x1 + scanline[j] = scanline[j] + (stbtt_uint8) (((x1 & STBTT_FIXMASK) * max_weight) >> STBTT_FIXSHIFT); + else + j = len; // clip + + for (++i; i < j; ++i) // fill pixels between x0 and x1 + scanline[i] = scanline[i] + (stbtt_uint8) max_weight; + } + } + } + } + + e = e->next; + } +} + +static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata) +{ + stbtt__hheap hh = { 0, 0, 0 }; + stbtt__active_edge *active = NULL; + int y,j=0; + int max_weight = (255 / vsubsample); // weight per vertical scanline + int s; // vertical subsample index + unsigned char scanline_data[512], *scanline; + + if (result->w > 512) + scanline = (unsigned char *) STBTT_malloc(result->w, userdata); + else + scanline = scanline_data; + + y = off_y * vsubsample; + e[n].y0 = (off_y + result->h) * (float) vsubsample + 1; + + while (j < result->h) { + STBTT_memset(scanline, 0, result->w); + for (s=0; s < vsubsample; ++s) { + // find center of pixel for this scanline + float scan_y = y + 0.5f; + stbtt__active_edge **step = &active; + + // update all active edges; + // remove all active edges that terminate before the center of this scanline + while (*step) { + stbtt__active_edge * z = *step; + if (z->ey <= scan_y) { + *step = z->next; // delete from list + STBTT_assert(z->direction); + z->direction = 0; + stbtt__hheap_free(&hh, z); + } else { + z->x += z->dx; // advance to position for current scanline + step = &((*step)->next); // advance through list + } + } + + // resort the list if needed + for(;;) { + int changed=0; + step = &active; + while (*step && (*step)->next) { + if ((*step)->x > (*step)->next->x) { + stbtt__active_edge *t = *step; + stbtt__active_edge *q = t->next; + + t->next = q->next; + q->next = t; + *step = q; + changed = 1; + } + step = &(*step)->next; + } + if (!changed) break; + } + + // insert all edges that start before the center of this scanline -- omit ones that also end on this scanline + while (e->y0 <= scan_y) { + if (e->y1 > scan_y) { + stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y, userdata); + if (z != NULL) { + // find insertion point + if (active == NULL) + active = z; + else if (z->x < active->x) { + // insert at front + z->next = active; + active = z; + } else { + // find thing to insert AFTER + stbtt__active_edge *p = active; + while (p->next && p->next->x < z->x) + p = p->next; + // at this point, p->next->x is NOT < z->x + z->next = p->next; + p->next = z; + } + } + } + ++e; + } + + // now process all active edges in XOR fashion + if (active) + stbtt__fill_active_edges(scanline, result->w, active, max_weight); + + ++y; + } + STBTT_memcpy(result->pixels + j * result->stride, scanline, result->w); + ++j; + } + + stbtt__hheap_cleanup(&hh, userdata); + + if (scanline != scanline_data) + STBTT_free(scanline, userdata); +} + +#elif STBTT_RASTERIZER_VERSION == 2 + +// the edge passed in here does not cross the vertical line at x or the vertical line at x+1 +// (i.e. it has already been clipped to those) +static void stbtt__handle_clipped_edge(float *scanline, int x, stbtt__active_edge *e, float x0, float y0, float x1, float y1) +{ + if (y0 == y1) return; + STBTT_assert(y0 < y1); + STBTT_assert(e->sy <= e->ey); + if (y0 > e->ey) return; + if (y1 < e->sy) return; + if (y0 < e->sy) { + x0 += (x1-x0) * (e->sy - y0) / (y1-y0); + y0 = e->sy; + } + if (y1 > e->ey) { + x1 += (x1-x0) * (e->ey - y1) / (y1-y0); + y1 = e->ey; + } + + if (x0 == x) + STBTT_assert(x1 <= x+1); + else if (x0 == x+1) + STBTT_assert(x1 >= x); + else if (x0 <= x) + STBTT_assert(x1 <= x); + else if (x0 >= x+1) + STBTT_assert(x1 >= x+1); + else + STBTT_assert(x1 >= x && x1 <= x+1); + + if (x0 <= x && x1 <= x) + scanline[x] += e->direction * (y1-y0); + else if (x0 >= x+1 && x1 >= x+1) + ; + else { + STBTT_assert(x0 >= x && x0 <= x+1 && x1 >= x && x1 <= x+1); + scanline[x] += e->direction * (y1-y0) * (1-((x0-x)+(x1-x))/2); // coverage = 1 - average x position + } +} + +static void stbtt__fill_active_edges_new(float *scanline, float *scanline_fill, int len, stbtt__active_edge *e, float y_top) +{ + float y_bottom = y_top+1; + + while (e) { + // brute force every pixel + + // compute intersection points with top & bottom + STBTT_assert(e->ey >= y_top); + + if (e->fdx == 0) { + float x0 = e->fx; + if (x0 < len) { + if (x0 >= 0) { + stbtt__handle_clipped_edge(scanline,(int) x0,e, x0,y_top, x0,y_bottom); + stbtt__handle_clipped_edge(scanline_fill-1,(int) x0+1,e, x0,y_top, x0,y_bottom); + } else { + stbtt__handle_clipped_edge(scanline_fill-1,0,e, x0,y_top, x0,y_bottom); + } + } + } else { + float x0 = e->fx; + float dx = e->fdx; + float xb = x0 + dx; + float x_top, x_bottom; + float sy0,sy1; + float dy = e->fdy; + STBTT_assert(e->sy <= y_bottom && e->ey >= y_top); + + // compute endpoints of line segment clipped to this scanline (if the + // line segment starts on this scanline. x0 is the intersection of the + // line with y_top, but that may be off the line segment. + if (e->sy > y_top) { + x_top = x0 + dx * (e->sy - y_top); + sy0 = e->sy; + } else { + x_top = x0; + sy0 = y_top; + } + if (e->ey < y_bottom) { + x_bottom = x0 + dx * (e->ey - y_top); + sy1 = e->ey; + } else { + x_bottom = xb; + sy1 = y_bottom; + } + + if (x_top >= 0 && x_bottom >= 0 && x_top < len && x_bottom < len) { + // from here on, we don't have to range check x values + + if ((int) x_top == (int) x_bottom) { + float height; + // simple case, only spans one pixel + int x = (int) x_top; + height = sy1 - sy0; + STBTT_assert(x >= 0 && x < len); + scanline[x] += e->direction * (1-((x_top - x) + (x_bottom-x))/2) * height; + scanline_fill[x] += e->direction * height; // everything right of this pixel is filled + } else { + int x,x1,x2; + float y_crossing, step, sign, area; + // covers 2+ pixels + if (x_top > x_bottom) { + // flip scanline vertically; signed area is the same + float t; + sy0 = y_bottom - (sy0 - y_top); + sy1 = y_bottom - (sy1 - y_top); + t = sy0, sy0 = sy1, sy1 = t; + t = x_bottom, x_bottom = x_top, x_top = t; + dx = -dx; + dy = -dy; + t = x0, x0 = xb, xb = t; + } + + x1 = (int) x_top; + x2 = (int) x_bottom; + // compute intersection with y axis at x1+1 + y_crossing = (x1+1 - x0) * dy + y_top; + + sign = e->direction; + // area of the rectangle covered from y0..y_crossing + area = sign * (y_crossing-sy0); + // area of the triangle (x_top,y0), (x+1,y0), (x+1,y_crossing) + scanline[x1] += area * (1-((x_top - x1)+(x1+1-x1))/2); + + step = sign * dy; + for (x = x1+1; x < x2; ++x) { + scanline[x] += area + step/2; + area += step; + } + y_crossing += dy * (x2 - (x1+1)); + + STBTT_assert(STBTT_fabs(area) <= 1.01f); + + scanline[x2] += area + sign * (1-((x2-x2)+(x_bottom-x2))/2) * (sy1-y_crossing); + + scanline_fill[x2] += sign * (sy1-sy0); + } + } else { + // if edge goes outside of box we're drawing, we require + // clipping logic. since this does not match the intended use + // of this library, we use a different, very slow brute + // force implementation + int x; + for (x=0; x < len; ++x) { + // cases: + // + // there can be up to two intersections with the pixel. any intersection + // with left or right edges can be handled by splitting into two (or three) + // regions. intersections with top & bottom do not necessitate case-wise logic. + // + // the old way of doing this found the intersections with the left & right edges, + // then used some simple logic to produce up to three segments in sorted order + // from top-to-bottom. however, this had a problem: if an x edge was epsilon + // across the x border, then the corresponding y position might not be distinct + // from the other y segment, and it might ignored as an empty segment. to avoid + // that, we need to explicitly produce segments based on x positions. + + // rename variables to clear pairs + float y0 = y_top; + float x1 = (float) (x); + float x2 = (float) (x+1); + float x3 = xb; + float y3 = y_bottom; + float y1,y2; + + // x = e->x + e->dx * (y-y_top) + // (y-y_top) = (x - e->x) / e->dx + // y = (x - e->x) / e->dx + y_top + y1 = (x - x0) / dx + y_top; + y2 = (x+1 - x0) / dx + y_top; + + if (x0 < x1 && x3 > x2) { // three segments descending down-right + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1); + stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x2,y2); + stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3); + } else if (x3 < x1 && x0 > x2) { // three segments descending down-left + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2); + stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x1,y1); + stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3); + } else if (x0 < x1 && x3 > x1) { // two segments across x, down-right + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1); + stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3); + } else if (x3 < x1 && x0 > x1) { // two segments across x, down-left + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1); + stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3); + } else if (x0 < x2 && x3 > x2) { // two segments across x+1, down-right + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2); + stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3); + } else if (x3 < x2 && x0 > x2) { // two segments across x+1, down-left + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2); + stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3); + } else { // one segment + stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x3,y3); + } + } + } + } + e = e->next; + } +} + +// directly AA rasterize edges w/o supersampling +static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata) +{ + stbtt__hheap hh = { 0, 0, 0 }; + stbtt__active_edge *active = NULL; + int y,j=0, i; + float scanline_data[129], *scanline, *scanline2; + + STBTT__NOTUSED(vsubsample); + + if (result->w > 64) + scanline = (float *) STBTT_malloc((result->w*2+1) * sizeof(float), userdata); + else + scanline = scanline_data; + + scanline2 = scanline + result->w; + + y = off_y; + e[n].y0 = (float) (off_y + result->h) + 1; + + while (j < result->h) { + // find center of pixel for this scanline + float scan_y_top = y + 0.0f; + float scan_y_bottom = y + 1.0f; + stbtt__active_edge **step = &active; + + STBTT_memset(scanline , 0, result->w*sizeof(scanline[0])); + STBTT_memset(scanline2, 0, (result->w+1)*sizeof(scanline[0])); + + // update all active edges; + // remove all active edges that terminate before the top of this scanline + while (*step) { + stbtt__active_edge * z = *step; + if (z->ey <= scan_y_top) { + *step = z->next; // delete from list + STBTT_assert(z->direction); + z->direction = 0; + stbtt__hheap_free(&hh, z); + } else { + step = &((*step)->next); // advance through list + } + } + + // insert all edges that start before the bottom of this scanline + while (e->y0 <= scan_y_bottom) { + if (e->y0 != e->y1) { + stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y_top, userdata); + if (z != NULL) { + STBTT_assert(z->ey >= scan_y_top); + // insert at front + z->next = active; + active = z; + } + } + ++e; + } + + // now process all active edges + if (active) + stbtt__fill_active_edges_new(scanline, scanline2+1, result->w, active, scan_y_top); + + { + float sum = 0; + for (i=0; i < result->w; ++i) { + float k; + int m; + sum += scanline2[i]; + k = scanline[i] + sum; + k = (float) STBTT_fabs(k)*255 + 0.5f; + m = (int) k; + if (m > 255) m = 255; + result->pixels[j*result->stride + i] = (unsigned char) m; + } + } + // advance all the edges + step = &active; + while (*step) { + stbtt__active_edge *z = *step; + z->fx += z->fdx; // advance to position for current scanline + step = &((*step)->next); // advance through list + } + + ++y; + ++j; + } + + stbtt__hheap_cleanup(&hh, userdata); + + if (scanline != scanline_data) + STBTT_free(scanline, userdata); +} +#else +#error "Unrecognized value of STBTT_RASTERIZER_VERSION" +#endif + +#define STBTT__COMPARE(a,b) ((a)->y0 < (b)->y0) + +static void stbtt__sort_edges_ins_sort(stbtt__edge *p, int n) +{ + int i,j; + for (i=1; i < n; ++i) { + stbtt__edge t = p[i], *a = &t; + j = i; + while (j > 0) { + stbtt__edge *b = &p[j-1]; + int c = STBTT__COMPARE(a,b); + if (!c) break; + p[j] = p[j-1]; + --j; + } + if (i != j) + p[j] = t; + } +} + +static void stbtt__sort_edges_quicksort(stbtt__edge *p, int n) +{ + /* threshhold for transitioning to insertion sort */ + while (n > 12) { + stbtt__edge t; + int c01,c12,c,m,i,j; + + /* compute median of three */ + m = n >> 1; + c01 = STBTT__COMPARE(&p[0],&p[m]); + c12 = STBTT__COMPARE(&p[m],&p[n-1]); + /* if 0 >= mid >= end, or 0 < mid < end, then use mid */ + if (c01 != c12) { + /* otherwise, we'll need to swap something else to middle */ + int z; + c = STBTT__COMPARE(&p[0],&p[n-1]); + /* 0>mid && midn => n; 0 0 */ + /* 0n: 0>n => 0; 0 n */ + z = (c == c12) ? 0 : n-1; + t = p[z]; + p[z] = p[m]; + p[m] = t; + } + /* now p[m] is the median-of-three */ + /* swap it to the beginning so it won't move around */ + t = p[0]; + p[0] = p[m]; + p[m] = t; + + /* partition loop */ + i=1; + j=n-1; + for(;;) { + /* handling of equality is crucial here */ + /* for sentinels & efficiency with duplicates */ + for (;;++i) { + if (!STBTT__COMPARE(&p[i], &p[0])) break; + } + for (;;--j) { + if (!STBTT__COMPARE(&p[0], &p[j])) break; + } + /* make sure we haven't crossed */ + if (i >= j) break; + t = p[i]; + p[i] = p[j]; + p[j] = t; + + ++i; + --j; + } + /* recurse on smaller side, iterate on larger */ + if (j < (n-i)) { + stbtt__sort_edges_quicksort(p,j); + p = p+i; + n = n-i; + } else { + stbtt__sort_edges_quicksort(p+i, n-i); + n = j; + } + } +} + +static void stbtt__sort_edges(stbtt__edge *p, int n) +{ + stbtt__sort_edges_quicksort(p, n); + stbtt__sort_edges_ins_sort(p, n); +} + +typedef struct +{ + float x,y; +} stbtt__point; + +static void stbtt__rasterize(stbtt__bitmap *result, stbtt__point *pts, int *wcount, int windings, float scale_x, float scale_y, float shift_x, float shift_y, int off_x, int off_y, int invert, void *userdata) +{ + float y_scale_inv = invert ? -scale_y : scale_y; + stbtt__edge *e; + int n,i,j,k,m; +#if STBTT_RASTERIZER_VERSION == 1 + int vsubsample = result->h < 8 ? 15 : 5; +#elif STBTT_RASTERIZER_VERSION == 2 + int vsubsample = 1; +#else + #error "Unrecognized value of STBTT_RASTERIZER_VERSION" +#endif + // vsubsample should divide 255 evenly; otherwise we won't reach full opacity + + // now we have to blow out the windings into explicit edge lists + n = 0; + for (i=0; i < windings; ++i) + n += wcount[i]; + + e = (stbtt__edge *) STBTT_malloc(sizeof(*e) * (n+1), userdata); // add an extra one as a sentinel + if (e == 0) return; + n = 0; + + m=0; + for (i=0; i < windings; ++i) { + stbtt__point *p = pts + m; + m += wcount[i]; + j = wcount[i]-1; + for (k=0; k < wcount[i]; j=k++) { + int a=k,b=j; + // skip the edge if horizontal + if (p[j].y == p[k].y) + continue; + // add edge from j to k to the list + e[n].invert = 0; + if (invert ? p[j].y > p[k].y : p[j].y < p[k].y) { + e[n].invert = 1; + a=j,b=k; + } + e[n].x0 = p[a].x * scale_x + shift_x; + e[n].y0 = (p[a].y * y_scale_inv + shift_y) * vsubsample; + e[n].x1 = p[b].x * scale_x + shift_x; + e[n].y1 = (p[b].y * y_scale_inv + shift_y) * vsubsample; + ++n; + } + } + + // now sort the edges by their highest point (should snap to integer, and then by x) + //STBTT_sort(e, n, sizeof(e[0]), stbtt__edge_compare); + stbtt__sort_edges(e, n); + + // now, traverse the scanlines and find the intersections on each scanline, use xor winding rule + stbtt__rasterize_sorted_edges(result, e, n, vsubsample, off_x, off_y, userdata); + + STBTT_free(e, userdata); +} + +static void stbtt__add_point(stbtt__point *points, int n, float x, float y) +{ + if (!points) return; // during first pass, it's unallocated + points[n].x = x; + points[n].y = y; +} + +// tesselate until threshhold p is happy... @TODO warped to compensate for non-linear stretching +static int stbtt__tesselate_curve(stbtt__point *points, int *num_points, float x0, float y0, float x1, float y1, float x2, float y2, float objspace_flatness_squared, int n) +{ + // midpoint + float mx = (x0 + 2*x1 + x2)/4; + float my = (y0 + 2*y1 + y2)/4; + // versus directly drawn line + float dx = (x0+x2)/2 - mx; + float dy = (y0+y2)/2 - my; + if (n > 16) // 65536 segments on one curve better be enough! + return 1; + if (dx*dx+dy*dy > objspace_flatness_squared) { // half-pixel error allowed... need to be smaller if AA + stbtt__tesselate_curve(points, num_points, x0,y0, (x0+x1)/2.0f,(y0+y1)/2.0f, mx,my, objspace_flatness_squared,n+1); + stbtt__tesselate_curve(points, num_points, mx,my, (x1+x2)/2.0f,(y1+y2)/2.0f, x2,y2, objspace_flatness_squared,n+1); + } else { + stbtt__add_point(points, *num_points,x2,y2); + *num_points = *num_points+1; + } + return 1; +} + +// returns number of contours +static stbtt__point *stbtt_FlattenCurves(stbtt_vertex *vertices, int num_verts, float objspace_flatness, int **contour_lengths, int *num_contours, void *userdata) +{ + stbtt__point *points=0; + int num_points=0; + + float objspace_flatness_squared = objspace_flatness * objspace_flatness; + int i,n=0,start=0, pass; + + // count how many "moves" there are to get the contour count + for (i=0; i < num_verts; ++i) + if (vertices[i].type == STBTT_vmove) + ++n; + + *num_contours = n; + if (n == 0) return 0; + + *contour_lengths = (int *) STBTT_malloc(sizeof(**contour_lengths) * n, userdata); + + if (*contour_lengths == 0) { + *num_contours = 0; + return 0; + } + + // make two passes through the points so we don't need to realloc + for (pass=0; pass < 2; ++pass) { + float x=0,y=0; + if (pass == 1) { + points = (stbtt__point *) STBTT_malloc(num_points * sizeof(points[0]), userdata); + if (points == NULL) goto error; + } + num_points = 0; + n= -1; + for (i=0; i < num_verts; ++i) { + switch (vertices[i].type) { + case STBTT_vmove: + // start the next contour + if (n >= 0) + (*contour_lengths)[n] = num_points - start; + ++n; + start = num_points; + + x = vertices[i].x, y = vertices[i].y; + stbtt__add_point(points, num_points++, x,y); + break; + case STBTT_vline: + x = vertices[i].x, y = vertices[i].y; + stbtt__add_point(points, num_points++, x, y); + break; + case STBTT_vcurve: + stbtt__tesselate_curve(points, &num_points, x,y, + vertices[i].cx, vertices[i].cy, + vertices[i].x, vertices[i].y, + objspace_flatness_squared, 0); + x = vertices[i].x, y = vertices[i].y; + break; + } + } + (*contour_lengths)[n] = num_points - start; + } + + return points; +error: + STBTT_free(points, userdata); + STBTT_free(*contour_lengths, userdata); + *contour_lengths = 0; + *num_contours = 0; + return NULL; +} + +STBTT_DEF void stbtt_Rasterize(stbtt__bitmap *result, float flatness_in_pixels, stbtt_vertex *vertices, int num_verts, float scale_x, float scale_y, float shift_x, float shift_y, int x_off, int y_off, int invert, void *userdata) +{ + float scale = scale_x > scale_y ? scale_y : scale_x; + int winding_count, *winding_lengths; + stbtt__point *windings = stbtt_FlattenCurves(vertices, num_verts, flatness_in_pixels / scale, &winding_lengths, &winding_count, userdata); + if (windings) { + stbtt__rasterize(result, windings, winding_lengths, winding_count, scale_x, scale_y, shift_x, shift_y, x_off, y_off, invert, userdata); + STBTT_free(winding_lengths, userdata); + STBTT_free(windings, userdata); + } +} + +STBTT_DEF void stbtt_FreeBitmap(unsigned char *bitmap, void *userdata) +{ + STBTT_free(bitmap, userdata); +} + +STBTT_DEF unsigned char *stbtt_GetGlyphBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int glyph, int *width, int *height, int *xoff, int *yoff) +{ + int ix0,iy0,ix1,iy1; + stbtt__bitmap gbm; + stbtt_vertex *vertices; + int num_verts = stbtt_GetGlyphShape(info, glyph, &vertices); + + if (scale_x == 0) scale_x = scale_y; + if (scale_y == 0) { + if (scale_x == 0) { + STBTT_free(vertices, info->userdata); + return NULL; + } + scale_y = scale_x; + } + + stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0,&iy0,&ix1,&iy1); + + // now we get the size + gbm.w = (ix1 - ix0); + gbm.h = (iy1 - iy0); + gbm.pixels = NULL; // in case we error + + if (width ) *width = gbm.w; + if (height) *height = gbm.h; + if (xoff ) *xoff = ix0; + if (yoff ) *yoff = iy0; + + if (gbm.w && gbm.h) { + gbm.pixels = (unsigned char *) STBTT_malloc(gbm.w * gbm.h, info->userdata); + if (gbm.pixels) { + gbm.stride = gbm.w; + + stbtt_Rasterize(&gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0, iy0, 1, info->userdata); + } + } + STBTT_free(vertices, info->userdata); + return gbm.pixels; +} + +STBTT_DEF unsigned char *stbtt_GetGlyphBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int glyph, int *width, int *height, int *xoff, int *yoff) +{ + return stbtt_GetGlyphBitmapSubpixel(info, scale_x, scale_y, 0.0f, 0.0f, glyph, width, height, xoff, yoff); +} + +STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int glyph) +{ + int ix0,iy0; + stbtt_vertex *vertices; + int num_verts = stbtt_GetGlyphShape(info, glyph, &vertices); + stbtt__bitmap gbm; + + stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0,&iy0,0,0); + gbm.pixels = output; + gbm.w = out_w; + gbm.h = out_h; + gbm.stride = out_stride; + + if (gbm.w && gbm.h) + stbtt_Rasterize(&gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0,iy0, 1, info->userdata); + + STBTT_free(vertices, info->userdata); +} + +STBTT_DEF void stbtt_MakeGlyphBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int glyph) +{ + stbtt_MakeGlyphBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f,0.0f, glyph); +} + +STBTT_DEF unsigned char *stbtt_GetCodepointBitmapSubpixel(const stbtt_fontinfo *info, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint, int *width, int *height, int *xoff, int *yoff) +{ + return stbtt_GetGlyphBitmapSubpixel(info, scale_x, scale_y,shift_x,shift_y, stbtt_FindGlyphIndex(info,codepoint), width,height,xoff,yoff); +} + +STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, float shift_x, float shift_y, int codepoint) +{ + stbtt_MakeGlyphBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, shift_x, shift_y, stbtt_FindGlyphIndex(info,codepoint)); +} + +STBTT_DEF unsigned char *stbtt_GetCodepointBitmap(const stbtt_fontinfo *info, float scale_x, float scale_y, int codepoint, int *width, int *height, int *xoff, int *yoff) +{ + return stbtt_GetCodepointBitmapSubpixel(info, scale_x, scale_y, 0.0f,0.0f, codepoint, width,height,xoff,yoff); +} + +STBTT_DEF void stbtt_MakeCodepointBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, float scale_x, float scale_y, int codepoint) +{ + stbtt_MakeCodepointBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f,0.0f, codepoint); +} + +////////////////////////////////////////////////////////////////////////////// +// +// bitmap baking +// +// This is SUPER-CRAPPY packing to keep source code small + +STBTT_DEF int stbtt_BakeFontBitmap(const unsigned char *data, int offset, // font location (use offset=0 for plain .ttf) + float pixel_height, // height of font in pixels + unsigned char *pixels, int pw, int ph, // bitmap to be filled in + int first_char, int num_chars, // characters to bake + stbtt_bakedchar *chardata) +{ + float scale; + int x,y,bottom_y, i; + stbtt_fontinfo f; + f.userdata = NULL; + if (!stbtt_InitFont(&f, data, offset)) + return -1; + STBTT_memset(pixels, 0, pw*ph); // background of 0 around pixels + x=y=1; + bottom_y = 1; + + scale = stbtt_ScaleForPixelHeight(&f, pixel_height); + + for (i=0; i < num_chars; ++i) { + int advance, lsb, x0,y0,x1,y1,gw,gh; + int g = stbtt_FindGlyphIndex(&f, first_char + i); + stbtt_GetGlyphHMetrics(&f, g, &advance, &lsb); + stbtt_GetGlyphBitmapBox(&f, g, scale,scale, &x0,&y0,&x1,&y1); + gw = x1-x0; + gh = y1-y0; + if (x + gw + 1 >= pw) + y = bottom_y, x = 1; // advance to next row + if (y + gh + 1 >= ph) // check if it fits vertically AFTER potentially moving to next row + return -i; + STBTT_assert(x+gw < pw); + STBTT_assert(y+gh < ph); + stbtt_MakeGlyphBitmap(&f, pixels+x+y*pw, gw,gh,pw, scale,scale, g); + chardata[i].x0 = (stbtt_int16) x; + chardata[i].y0 = (stbtt_int16) y; + chardata[i].x1 = (stbtt_int16) (x + gw); + chardata[i].y1 = (stbtt_int16) (y + gh); + chardata[i].xadvance = scale * advance; + chardata[i].xoff = (float) x0; + chardata[i].yoff = (float) y0; + x = x + gw + 1; + if (y+gh+1 > bottom_y) + bottom_y = y+gh+1; + } + return bottom_y; +} + +STBTT_DEF void stbtt_GetBakedQuad(stbtt_bakedchar *chardata, int pw, int ph, int char_index, float *xpos, float *ypos, stbtt_aligned_quad *q, int opengl_fillrule) +{ + float d3d_bias = opengl_fillrule ? 0 : -0.5f; + float ipw = 1.0f / pw, iph = 1.0f / ph; + stbtt_bakedchar *b = chardata + char_index; + int round_x = STBTT_ifloor((*xpos + b->xoff) + 0.5f); + int round_y = STBTT_ifloor((*ypos + b->yoff) + 0.5f); + + q->x0 = round_x + d3d_bias; + q->y0 = round_y + d3d_bias; + q->x1 = round_x + b->x1 - b->x0 + d3d_bias; + q->y1 = round_y + b->y1 - b->y0 + d3d_bias; + + q->s0 = b->x0 * ipw; + q->t0 = b->y0 * iph; + q->s1 = b->x1 * ipw; + q->t1 = b->y1 * iph; + + *xpos += b->xadvance; +} + +////////////////////////////////////////////////////////////////////////////// +// +// rectangle packing replacement routines if you don't have stb_rect_pack.h +// + +#ifndef STB_RECT_PACK_VERSION + +typedef int stbrp_coord; + +//////////////////////////////////////////////////////////////////////////////////// +// // +// // +// COMPILER WARNING ?!?!? // +// // +// // +// if you get a compile warning due to these symbols being defined more than // +// once, move #include "stb_rect_pack.h" before #include "stb_truetype.h" // +// // +//////////////////////////////////////////////////////////////////////////////////// + +typedef struct +{ + int width,height; + int x,y,bottom_y; +} stbrp_context; + +typedef struct +{ + unsigned char x; +} stbrp_node; + +struct stbrp_rect +{ + stbrp_coord x,y; + int id,w,h,was_packed; +}; + +static void stbrp_init_target(stbrp_context *con, int pw, int ph, stbrp_node *nodes, int num_nodes) +{ + con->width = pw; + con->height = ph; + con->x = 0; + con->y = 0; + con->bottom_y = 0; + STBTT__NOTUSED(nodes); + STBTT__NOTUSED(num_nodes); +} + +static void stbrp_pack_rects(stbrp_context *con, stbrp_rect *rects, int num_rects) +{ + int i; + for (i=0; i < num_rects; ++i) { + if (con->x + rects[i].w > con->width) { + con->x = 0; + con->y = con->bottom_y; + } + if (con->y + rects[i].h > con->height) + break; + rects[i].x = con->x; + rects[i].y = con->y; + rects[i].was_packed = 1; + con->x += rects[i].w; + if (con->y + rects[i].h > con->bottom_y) + con->bottom_y = con->y + rects[i].h; + } + for ( ; i < num_rects; ++i) + rects[i].was_packed = 0; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// bitmap baking +// +// This is SUPER-AWESOME (tm Ryan Gordon) packing using stb_rect_pack.h. If +// stb_rect_pack.h isn't available, it uses the BakeFontBitmap strategy. + +STBTT_DEF int stbtt_PackBegin(stbtt_pack_context *spc, unsigned char *pixels, int pw, int ph, int stride_in_bytes, int padding, void *alloc_context) +{ + stbrp_context *context = (stbrp_context *) STBTT_malloc(sizeof(*context) ,alloc_context); + int num_nodes = pw - padding; + stbrp_node *nodes = (stbrp_node *) STBTT_malloc(sizeof(*nodes ) * num_nodes,alloc_context); + + if (context == NULL || nodes == NULL) { + if (context != NULL) STBTT_free(context, alloc_context); + if (nodes != NULL) STBTT_free(nodes , alloc_context); + return 0; + } + + spc->user_allocator_context = alloc_context; + spc->width = pw; + spc->height = ph; + spc->pixels = pixels; + spc->pack_info = context; + spc->nodes = nodes; + spc->padding = padding; + spc->stride_in_bytes = stride_in_bytes != 0 ? stride_in_bytes : pw; + spc->h_oversample = 1; + spc->v_oversample = 1; + + stbrp_init_target(context, pw-padding, ph-padding, nodes, num_nodes); + + if (pixels) + STBTT_memset(pixels, 0, pw*ph); // background of 0 around pixels + + return 1; +} + +STBTT_DEF void stbtt_PackEnd (stbtt_pack_context *spc) +{ + STBTT_free(spc->nodes , spc->user_allocator_context); + STBTT_free(spc->pack_info, spc->user_allocator_context); +} + +STBTT_DEF void stbtt_PackSetOversampling(stbtt_pack_context *spc, unsigned int h_oversample, unsigned int v_oversample) +{ + STBTT_assert(h_oversample <= STBTT_MAX_OVERSAMPLE); + STBTT_assert(v_oversample <= STBTT_MAX_OVERSAMPLE); + if (h_oversample <= STBTT_MAX_OVERSAMPLE) + spc->h_oversample = h_oversample; + if (v_oversample <= STBTT_MAX_OVERSAMPLE) + spc->v_oversample = v_oversample; +} + +#define STBTT__OVER_MASK (STBTT_MAX_OVERSAMPLE-1) + +static void stbtt__h_prefilter(unsigned char *pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width) +{ + unsigned char buffer[STBTT_MAX_OVERSAMPLE]; + int safe_w = w - kernel_width; + int j; + STBTT_memset(buffer, 0, STBTT_MAX_OVERSAMPLE); // suppress bogus warning from VS2013 -analyze + for (j=0; j < h; ++j) { + int i; + unsigned int total; + STBTT_memset(buffer, 0, kernel_width); + + total = 0; + + // make kernel_width a constant in common cases so compiler can optimize out the divide + switch (kernel_width) { + case 2: + for (i=0; i <= safe_w; ++i) { + total += pixels[i] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i]; + pixels[i] = (unsigned char) (total / 2); + } + break; + case 3: + for (i=0; i <= safe_w; ++i) { + total += pixels[i] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i]; + pixels[i] = (unsigned char) (total / 3); + } + break; + case 4: + for (i=0; i <= safe_w; ++i) { + total += pixels[i] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i]; + pixels[i] = (unsigned char) (total / 4); + } + break; + case 5: + for (i=0; i <= safe_w; ++i) { + total += pixels[i] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i]; + pixels[i] = (unsigned char) (total / 5); + } + break; + default: + for (i=0; i <= safe_w; ++i) { + total += pixels[i] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i]; + pixels[i] = (unsigned char) (total / kernel_width); + } + break; + } + + for (; i < w; ++i) { + STBTT_assert(pixels[i] == 0); + total -= buffer[i & STBTT__OVER_MASK]; + pixels[i] = (unsigned char) (total / kernel_width); + } + + pixels += stride_in_bytes; + } +} + +static void stbtt__v_prefilter(unsigned char *pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width) +{ + unsigned char buffer[STBTT_MAX_OVERSAMPLE]; + int safe_h = h - kernel_width; + int j; + STBTT_memset(buffer, 0, STBTT_MAX_OVERSAMPLE); // suppress bogus warning from VS2013 -analyze + for (j=0; j < w; ++j) { + int i; + unsigned int total; + STBTT_memset(buffer, 0, kernel_width); + + total = 0; + + // make kernel_width a constant in common cases so compiler can optimize out the divide + switch (kernel_width) { + case 2: + for (i=0; i <= safe_h; ++i) { + total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes]; + pixels[i*stride_in_bytes] = (unsigned char) (total / 2); + } + break; + case 3: + for (i=0; i <= safe_h; ++i) { + total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes]; + pixels[i*stride_in_bytes] = (unsigned char) (total / 3); + } + break; + case 4: + for (i=0; i <= safe_h; ++i) { + total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes]; + pixels[i*stride_in_bytes] = (unsigned char) (total / 4); + } + break; + case 5: + for (i=0; i <= safe_h; ++i) { + total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes]; + pixels[i*stride_in_bytes] = (unsigned char) (total / 5); + } + break; + default: + for (i=0; i <= safe_h; ++i) { + total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK]; + buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes]; + pixels[i*stride_in_bytes] = (unsigned char) (total / kernel_width); + } + break; + } + + for (; i < h; ++i) { + STBTT_assert(pixels[i*stride_in_bytes] == 0); + total -= buffer[i & STBTT__OVER_MASK]; + pixels[i*stride_in_bytes] = (unsigned char) (total / kernel_width); + } + + pixels += 1; + } +} + +static float stbtt__oversample_shift(int oversample) +{ + if (!oversample) + return 0.0f; + + // The prefilter is a box filter of width "oversample", + // which shifts phase by (oversample - 1)/2 pixels in + // oversampled space. We want to shift in the opposite + // direction to counter this. + return (float)-(oversample - 1) / (2.0f * (float)oversample); +} + +// rects array must be big enough to accommodate all characters in the given ranges +STBTT_DEF int stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects) +{ + int i,j,k; + + k=0; + for (i=0; i < num_ranges; ++i) { + float fh = ranges[i].font_size; + float scale = fh > 0 ? stbtt_ScaleForPixelHeight(info, fh) : stbtt_ScaleForMappingEmToPixels(info, -fh); + ranges[i].h_oversample = (unsigned char) spc->h_oversample; + ranges[i].v_oversample = (unsigned char) spc->v_oversample; + for (j=0; j < ranges[i].num_chars; ++j) { + int x0,y0,x1,y1; + int codepoint = ranges[i].array_of_unicode_codepoints == NULL ? ranges[i].first_unicode_codepoint_in_range + j : ranges[i].array_of_unicode_codepoints[j]; + int glyph = stbtt_FindGlyphIndex(info, codepoint); + stbtt_GetGlyphBitmapBoxSubpixel(info,glyph, + scale * spc->h_oversample, + scale * spc->v_oversample, + 0,0, + &x0,&y0,&x1,&y1); + rects[k].w = (stbrp_coord) (x1-x0 + spc->padding + spc->h_oversample-1); + rects[k].h = (stbrp_coord) (y1-y0 + spc->padding + spc->v_oversample-1); + ++k; + } + } + + return k; +} + +// rects array must be big enough to accommodate all characters in the given ranges +STBTT_DEF int stbtt_PackFontRangesRenderIntoRects(stbtt_pack_context *spc, stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects) +{ + int i,j,k, return_value = 1; + + // save current values + int old_h_over = spc->h_oversample; + int old_v_over = spc->v_oversample; + + k = 0; + for (i=0; i < num_ranges; ++i) { + float fh = ranges[i].font_size; + float scale = fh > 0 ? stbtt_ScaleForPixelHeight(info, fh) : stbtt_ScaleForMappingEmToPixels(info, -fh); + float recip_h,recip_v,sub_x,sub_y; + spc->h_oversample = ranges[i].h_oversample; + spc->v_oversample = ranges[i].v_oversample; + recip_h = 1.0f / spc->h_oversample; + recip_v = 1.0f / spc->v_oversample; + sub_x = stbtt__oversample_shift(spc->h_oversample); + sub_y = stbtt__oversample_shift(spc->v_oversample); + for (j=0; j < ranges[i].num_chars; ++j) { + stbrp_rect *r = &rects[k]; + if (r->was_packed) { + stbtt_packedchar *bc = &ranges[i].chardata_for_range[j]; + int advance, lsb, x0,y0,x1,y1; + int codepoint = ranges[i].array_of_unicode_codepoints == NULL ? ranges[i].first_unicode_codepoint_in_range + j : ranges[i].array_of_unicode_codepoints[j]; + int glyph = stbtt_FindGlyphIndex(info, codepoint); + stbrp_coord pad = (stbrp_coord) spc->padding; + + // pad on left and top + r->x += pad; + r->y += pad; + r->w -= pad; + r->h -= pad; + stbtt_GetGlyphHMetrics(info, glyph, &advance, &lsb); + stbtt_GetGlyphBitmapBox(info, glyph, + scale * spc->h_oversample, + scale * spc->v_oversample, + &x0,&y0,&x1,&y1); + stbtt_MakeGlyphBitmapSubpixel(info, + spc->pixels + r->x + r->y*spc->stride_in_bytes, + r->w - spc->h_oversample+1, + r->h - spc->v_oversample+1, + spc->stride_in_bytes, + scale * spc->h_oversample, + scale * spc->v_oversample, + 0,0, + glyph); + + if (spc->h_oversample > 1) + stbtt__h_prefilter(spc->pixels + r->x + r->y*spc->stride_in_bytes, + r->w, r->h, spc->stride_in_bytes, + spc->h_oversample); + + if (spc->v_oversample > 1) + stbtt__v_prefilter(spc->pixels + r->x + r->y*spc->stride_in_bytes, + r->w, r->h, spc->stride_in_bytes, + spc->v_oversample); + + bc->x0 = (stbtt_int16) r->x; + bc->y0 = (stbtt_int16) r->y; + bc->x1 = (stbtt_int16) (r->x + r->w); + bc->y1 = (stbtt_int16) (r->y + r->h); + bc->xadvance = scale * advance; + bc->xoff = (float) x0 * recip_h + sub_x; + bc->yoff = (float) y0 * recip_v + sub_y; + bc->xoff2 = (x0 + r->w) * recip_h + sub_x; + bc->yoff2 = (y0 + r->h) * recip_v + sub_y; + } else { + return_value = 0; // if any fail, report failure + } + + ++k; + } + } + + // restore original values + spc->h_oversample = old_h_over; + spc->v_oversample = old_v_over; + + return return_value; +} + +STBTT_DEF void stbtt_PackFontRangesPackRects(stbtt_pack_context *spc, stbrp_rect *rects, int num_rects) +{ + stbrp_pack_rects((stbrp_context *) spc->pack_info, rects, num_rects); +} + +STBTT_DEF int stbtt_PackFontRanges(stbtt_pack_context *spc, unsigned char *fontdata, int font_index, stbtt_pack_range *ranges, int num_ranges) +{ + stbtt_fontinfo info; + int i,j,n, return_value = 1; + //stbrp_context *context = (stbrp_context *) spc->pack_info; + stbrp_rect *rects; + + // flag all characters as NOT packed + for (i=0; i < num_ranges; ++i) + for (j=0; j < ranges[i].num_chars; ++j) + ranges[i].chardata_for_range[j].x0 = + ranges[i].chardata_for_range[j].y0 = + ranges[i].chardata_for_range[j].x1 = + ranges[i].chardata_for_range[j].y1 = 0; + + n = 0; + for (i=0; i < num_ranges; ++i) + n += ranges[i].num_chars; + + rects = (stbrp_rect *) STBTT_malloc(sizeof(*rects) * n, spc->user_allocator_context); + if (rects == NULL) + return 0; + + info.userdata = spc->user_allocator_context; + stbtt_InitFont(&info, fontdata, stbtt_GetFontOffsetForIndex(fontdata,font_index)); + + n = stbtt_PackFontRangesGatherRects(spc, &info, ranges, num_ranges, rects); + + stbtt_PackFontRangesPackRects(spc, rects, n); + + return_value = stbtt_PackFontRangesRenderIntoRects(spc, &info, ranges, num_ranges, rects); + + STBTT_free(rects, spc->user_allocator_context); + return return_value; +} + +STBTT_DEF int stbtt_PackFontRange(stbtt_pack_context *spc, unsigned char *fontdata, int font_index, float font_size, + int first_unicode_codepoint_in_range, int num_chars_in_range, stbtt_packedchar *chardata_for_range) +{ + stbtt_pack_range range; + range.first_unicode_codepoint_in_range = first_unicode_codepoint_in_range; + range.array_of_unicode_codepoints = NULL; + range.num_chars = num_chars_in_range; + range.chardata_for_range = chardata_for_range; + range.font_size = font_size; + return stbtt_PackFontRanges(spc, fontdata, font_index, &range, 1); +} + +STBTT_DEF void stbtt_GetPackedQuad(stbtt_packedchar *chardata, int pw, int ph, int char_index, float *xpos, float *ypos, stbtt_aligned_quad *q, int align_to_integer) +{ + float ipw = 1.0f / pw, iph = 1.0f / ph; + stbtt_packedchar *b = chardata + char_index; + + if (align_to_integer) { + float x = (float) STBTT_ifloor((*xpos + b->xoff) + 0.5f); + float y = (float) STBTT_ifloor((*ypos + b->yoff) + 0.5f); + q->x0 = x; + q->y0 = y; + q->x1 = x + b->xoff2 - b->xoff; + q->y1 = y + b->yoff2 - b->yoff; + } else { + q->x0 = *xpos + b->xoff; + q->y0 = *ypos + b->yoff; + q->x1 = *xpos + b->xoff2; + q->y1 = *ypos + b->yoff2; + } + + q->s0 = b->x0 * ipw; + q->t0 = b->y0 * iph; + q->s1 = b->x1 * ipw; + q->t1 = b->y1 * iph; + + *xpos += b->xadvance; +} + + +////////////////////////////////////////////////////////////////////////////// +// +// font name matching -- recommended not to use this +// + +// check if a utf8 string contains a prefix which is the utf16 string; if so return length of matching utf8 string +static stbtt_int32 stbtt__CompareUTF8toUTF16_bigendian_prefix(const stbtt_uint8 *s1, stbtt_int32 len1, const stbtt_uint8 *s2, stbtt_int32 len2) +{ + stbtt_int32 i=0; + + // convert utf16 to utf8 and compare the results while converting + while (len2) { + stbtt_uint16 ch = s2[0]*256 + s2[1]; + if (ch < 0x80) { + if (i >= len1) return -1; + if (s1[i++] != ch) return -1; + } else if (ch < 0x800) { + if (i+1 >= len1) return -1; + if (s1[i++] != 0xc0 + (ch >> 6)) return -1; + if (s1[i++] != 0x80 + (ch & 0x3f)) return -1; + } else if (ch >= 0xd800 && ch < 0xdc00) { + stbtt_uint32 c; + stbtt_uint16 ch2 = s2[2]*256 + s2[3]; + if (i+3 >= len1) return -1; + c = ((ch - 0xd800) << 10) + (ch2 - 0xdc00) + 0x10000; + if (s1[i++] != 0xf0 + (c >> 18)) return -1; + if (s1[i++] != 0x80 + ((c >> 12) & 0x3f)) return -1; + if (s1[i++] != 0x80 + ((c >> 6) & 0x3f)) return -1; + if (s1[i++] != 0x80 + ((c ) & 0x3f)) return -1; + s2 += 2; // plus another 2 below + len2 -= 2; + } else if (ch >= 0xdc00 && ch < 0xe000) { + return -1; + } else { + if (i+2 >= len1) return -1; + if (s1[i++] != 0xe0 + (ch >> 12)) return -1; + if (s1[i++] != 0x80 + ((ch >> 6) & 0x3f)) return -1; + if (s1[i++] != 0x80 + ((ch ) & 0x3f)) return -1; + } + s2 += 2; + len2 -= 2; + } + return i; +} + +STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const char *s2, int len2) +{ + return len1 == stbtt__CompareUTF8toUTF16_bigendian_prefix((const stbtt_uint8*) s1, len1, (const stbtt_uint8*) s2, len2); +} + +// returns results in whatever encoding you request... but note that 2-byte encodings +// will be BIG-ENDIAN... use stbtt_CompareUTF8toUTF16_bigendian() to compare +STBTT_DEF const char *stbtt_GetFontNameString(const stbtt_fontinfo *font, int *length, int platformID, int encodingID, int languageID, int nameID) +{ + stbtt_int32 i,count,stringOffset; + stbtt_uint8 *fc = font->data; + stbtt_uint32 offset = font->fontstart; + stbtt_uint32 nm = stbtt__find_table(fc, offset, "name"); + if (!nm) return NULL; + + count = ttUSHORT(fc+nm+2); + stringOffset = nm + ttUSHORT(fc+nm+4); + for (i=0; i < count; ++i) { + stbtt_uint32 loc = nm + 6 + 12 * i; + if (platformID == ttUSHORT(fc+loc+0) && encodingID == ttUSHORT(fc+loc+2) + && languageID == ttUSHORT(fc+loc+4) && nameID == ttUSHORT(fc+loc+6)) { + *length = ttUSHORT(fc+loc+8); + return (const char *) (fc+stringOffset+ttUSHORT(fc+loc+10)); + } + } + return NULL; +} + +static int stbtt__matchpair(stbtt_uint8 *fc, stbtt_uint32 nm, stbtt_uint8 *name, stbtt_int32 nlen, stbtt_int32 target_id, stbtt_int32 next_id) +{ + stbtt_int32 i; + stbtt_int32 count = ttUSHORT(fc+nm+2); + stbtt_int32 stringOffset = nm + ttUSHORT(fc+nm+4); + + for (i=0; i < count; ++i) { + stbtt_uint32 loc = nm + 6 + 12 * i; + stbtt_int32 id = ttUSHORT(fc+loc+6); + if (id == target_id) { + // find the encoding + stbtt_int32 platform = ttUSHORT(fc+loc+0), encoding = ttUSHORT(fc+loc+2), language = ttUSHORT(fc+loc+4); + + // is this a Unicode encoding? + if (platform == 0 || (platform == 3 && encoding == 1) || (platform == 3 && encoding == 10)) { + stbtt_int32 slen = ttUSHORT(fc+loc+8); + stbtt_int32 off = ttUSHORT(fc+loc+10); + + // check if there's a prefix match + stbtt_int32 matchlen = stbtt__CompareUTF8toUTF16_bigendian_prefix(name, nlen, fc+stringOffset+off,slen); + if (matchlen >= 0) { + // check for target_id+1 immediately following, with same encoding & language + if (i+1 < count && ttUSHORT(fc+loc+12+6) == next_id && ttUSHORT(fc+loc+12) == platform && ttUSHORT(fc+loc+12+2) == encoding && ttUSHORT(fc+loc+12+4) == language) { + slen = ttUSHORT(fc+loc+12+8); + off = ttUSHORT(fc+loc+12+10); + if (slen == 0) { + if (matchlen == nlen) + return 1; + } else if (matchlen < nlen && name[matchlen] == ' ') { + ++matchlen; + if (stbtt_CompareUTF8toUTF16_bigendian((char*) (name+matchlen), nlen-matchlen, (char*)(fc+stringOffset+off),slen)) + return 1; + } + } else { + // if nothing immediately following + if (matchlen == nlen) + return 1; + } + } + } + + // @TODO handle other encodings + } + } + return 0; +} + +static int stbtt__matches(stbtt_uint8 *fc, stbtt_uint32 offset, stbtt_uint8 *name, stbtt_int32 flags) +{ + stbtt_int32 nlen = (stbtt_int32) STBTT_strlen((char *) name); + stbtt_uint32 nm,hd; + if (!stbtt__isfont(fc+offset)) return 0; + + // check italics/bold/underline flags in macStyle... + if (flags) { + hd = stbtt__find_table(fc, offset, "head"); + if ((ttUSHORT(fc+hd+44) & 7) != (flags & 7)) return 0; + } + + nm = stbtt__find_table(fc, offset, "name"); + if (!nm) return 0; + + if (flags) { + // if we checked the macStyle flags, then just check the family and ignore the subfamily + if (stbtt__matchpair(fc, nm, name, nlen, 16, -1)) return 1; + if (stbtt__matchpair(fc, nm, name, nlen, 1, -1)) return 1; + if (stbtt__matchpair(fc, nm, name, nlen, 3, -1)) return 1; + } else { + if (stbtt__matchpair(fc, nm, name, nlen, 16, 17)) return 1; + if (stbtt__matchpair(fc, nm, name, nlen, 1, 2)) return 1; + if (stbtt__matchpair(fc, nm, name, nlen, 3, -1)) return 1; + } + + return 0; +} + +STBTT_DEF int stbtt_FindMatchingFont(const unsigned char *font_collection, const char *name_utf8, stbtt_int32 flags) +{ + stbtt_int32 i; + for (i=0;;++i) { + stbtt_int32 off = stbtt_GetFontOffsetForIndex(font_collection, i); + if (off < 0) return off; + if (stbtt__matches((stbtt_uint8 *) font_collection, off, (stbtt_uint8*) name_utf8, flags)) + return off; + } +} + +#endif // STB_TRUETYPE_IMPLEMENTATION + + +// FULL VERSION HISTORY +// +// 1.11 (2016-04-02) fix unused-variable warning +// 1.10 (2016-04-02) allow user-defined fabs() replacement +// fix memory leak if fontsize=0.0 +// fix warning from duplicate typedef +// 1.09 (2016-01-16) warning fix; avoid crash on outofmem; use alloc userdata for PackFontRanges +// 1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges +// 1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints; +// allow PackFontRanges to pack and render in separate phases; +// fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?); +// fixed an assert() bug in the new rasterizer +// replace assert() with STBTT_assert() in new rasterizer +// 1.06 (2015-07-14) performance improvements (~35% faster on x86 and x64 on test machine) +// also more precise AA rasterizer, except if shapes overlap +// remove need for STBTT_sort +// 1.05 (2015-04-15) fix misplaced definitions for STBTT_STATIC +// 1.04 (2015-04-15) typo in example +// 1.03 (2015-04-12) STBTT_STATIC, fix memory leak in new packing, various fixes +// 1.02 (2014-12-10) fix various warnings & compile issues w/ stb_rect_pack, C++ +// 1.01 (2014-12-08) fix subpixel position when oversampling to exactly match +// non-oversampled; STBTT_POINT_SIZE for packed case only +// 1.00 (2014-12-06) add new PackBegin etc. API, w/ support for oversampling +// 0.99 (2014-09-18) fix multiple bugs with subpixel rendering (ryg) +// 0.9 (2014-08-07) support certain mac/iOS fonts without an MS platformID +// 0.8b (2014-07-07) fix a warning +// 0.8 (2014-05-25) fix a few more warnings +// 0.7 (2013-09-25) bugfix: subpixel glyph bug fixed in 0.5 had come back +// 0.6c (2012-07-24) improve documentation +// 0.6b (2012-07-20) fix a few more warnings +// 0.6 (2012-07-17) fix warnings; added stbtt_ScaleForMappingEmToPixels, +// stbtt_GetFontBoundingBox, stbtt_IsGlyphEmpty +// 0.5 (2011-12-09) bugfixes: +// subpixel glyph renderer computed wrong bounding box +// first vertex of shape can be off-curve (FreeSans) +// 0.4b (2011-12-03) fixed an error in the font baking example +// 0.4 (2011-12-01) kerning, subpixel rendering (tor) +// bugfixes for: +// codepoint-to-glyph conversion using table fmt=12 +// codepoint-to-glyph conversion using table fmt=4 +// stbtt_GetBakedQuad with non-square texture (Zer) +// updated Hello World! sample to use kerning and subpixel +// fixed some warnings +// 0.3 (2009-06-24) cmap fmt=12, compound shapes (MM) +// userdata, malloc-from-userdata, non-zero fill (stb) +// 0.2 (2009-03-11) Fix unsigned/signed char warnings +// 0.1 (2009-03-09) First public release +// diff --git a/thirdparty/misc/stb_vorbis.c b/thirdparty/misc/stb_vorbis.c new file mode 100644 index 0000000000..c4f24d5898 --- /dev/null +++ b/thirdparty/misc/stb_vorbis.c @@ -0,0 +1,5399 @@ +// Ogg Vorbis audio decoder - v1.09 - public domain +// http://nothings.org/stb_vorbis/ +// +// Original version written by Sean Barrett in 2007. +// +// Originally sponsored by RAD Game Tools. Seeking sponsored +// by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software, +// Aras Pranckevicius, and Sean Barrett. +// +// LICENSE +// +// This software is dual-licensed to the public domain and under the following +// license: you are granted a perpetual, irrevocable license to copy, modify, +// publish, and distribute this file as you see fit. +// +// No warranty for any purpose is expressed or implied by the author (nor +// by RAD Game Tools). Report bugs and send enhancements to the author. +// +// Limitations: +// +// - floor 0 not supported (used in old ogg vorbis files pre-2004) +// - lossless sample-truncation at beginning ignored +// - cannot concatenate multiple vorbis streams +// - sample positions are 32-bit, limiting seekable 192Khz +// files to around 6 hours (Ogg supports 64-bit) +// +// Feature contributors: +// Dougall Johnson (sample-exact seeking) +// +// Bugfix/warning contributors: +// Terje Mathisen Niklas Frykholm Andy Hill +// Casey Muratori John Bolton Gargaj +// Laurent Gomila Marc LeBlanc Ronny Chevalier +// Bernhard Wodo Evan Balster alxprd@github +// Tom Beaumont Ingo Leitgeb Nicolas Guillemot +// Phillip Bennefall Rohit Thiago Goulart +// manxorist@github saga musix +// +// Partial history: +// 1.09 - 2016/04/04 - back out 'truncation of last frame' fix from previous version +// 1.08 - 2016/04/02 - warnings; setup memory leaks; truncation of last frame +// 1.07 - 2015/01/16 - fixes for crashes on invalid files; warning fixes; const +// 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson) +// some crash fixes when out of memory or with corrupt files +// fix some inappropriately signed shifts +// 1.05 - 2015/04/19 - don't define __forceinline if it's redundant +// 1.04 - 2014/08/27 - fix missing const-correct case in API +// 1.03 - 2014/08/07 - warning fixes +// 1.02 - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows +// 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct) +// 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel; +// (API change) report sample rate for decode-full-file funcs +// +// See end of file for full version history. + + +////////////////////////////////////////////////////////////////////////////// +// +// HEADER BEGINS HERE +// + +#ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H +#define STB_VORBIS_INCLUDE_STB_VORBIS_H + +#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO) +#define STB_VORBIS_NO_STDIO 1 +#endif + +#ifndef STB_VORBIS_NO_STDIO +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/////////// THREAD SAFETY + +// Individual stb_vorbis* handles are not thread-safe; you cannot decode from +// them from multiple threads at the same time. However, you can have multiple +// stb_vorbis* handles and decode from them independently in multiple thrads. + + +/////////// MEMORY ALLOCATION + +// normally stb_vorbis uses malloc() to allocate memory at startup, +// and alloca() to allocate temporary memory during a frame on the +// stack. (Memory consumption will depend on the amount of setup +// data in the file and how you set the compile flags for speed +// vs. size. In my test files the maximal-size usage is ~150KB.) +// +// You can modify the wrapper functions in the source (setup_malloc, +// setup_temp_malloc, temp_malloc) to change this behavior, or you +// can use a simpler allocation model: you pass in a buffer from +// which stb_vorbis will allocate _all_ its memory (including the +// temp memory). "open" may fail with a VORBIS_outofmem if you +// do not pass in enough data; there is no way to determine how +// much you do need except to succeed (at which point you can +// query get_info to find the exact amount required. yes I know +// this is lame). +// +// If you pass in a non-NULL buffer of the type below, allocation +// will occur from it as described above. Otherwise just pass NULL +// to use malloc()/alloca() + +typedef struct +{ + char *alloc_buffer; + int alloc_buffer_length_in_bytes; +} stb_vorbis_alloc; + + +/////////// FUNCTIONS USEABLE WITH ALL INPUT MODES + +typedef struct stb_vorbis stb_vorbis; + +typedef struct +{ + unsigned int sample_rate; + int channels; + + unsigned int setup_memory_required; + unsigned int setup_temp_memory_required; + unsigned int temp_memory_required; + + int max_frame_size; +} stb_vorbis_info; + +// get general information about the file +extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f); + +// get the last error detected (clears it, too) +extern int stb_vorbis_get_error(stb_vorbis *f); + +// close an ogg vorbis file and free all memory in use +extern void stb_vorbis_close(stb_vorbis *f); + +// this function returns the offset (in samples) from the beginning of the +// file that will be returned by the next decode, if it is known, or -1 +// otherwise. after a flush_pushdata() call, this may take a while before +// it becomes valid again. +// NOT WORKING YET after a seek with PULLDATA API +extern int stb_vorbis_get_sample_offset(stb_vorbis *f); + +// returns the current seek point within the file, or offset from the beginning +// of the memory buffer. In pushdata mode it returns 0. +extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f); + +/////////// PUSHDATA API + +#ifndef STB_VORBIS_NO_PUSHDATA_API + +// this API allows you to get blocks of data from any source and hand +// them to stb_vorbis. you have to buffer them; stb_vorbis will tell +// you how much it used, and you have to give it the rest next time; +// and stb_vorbis may not have enough data to work with and you will +// need to give it the same data again PLUS more. Note that the Vorbis +// specification does not bound the size of an individual frame. + +extern stb_vorbis *stb_vorbis_open_pushdata( + const unsigned char * datablock, int datablock_length_in_bytes, + int *datablock_memory_consumed_in_bytes, + int *error, + const stb_vorbis_alloc *alloc_buffer); +// create a vorbis decoder by passing in the initial data block containing +// the ogg&vorbis headers (you don't need to do parse them, just provide +// the first N bytes of the file--you're told if it's not enough, see below) +// on success, returns an stb_vorbis *, does not set error, returns the amount of +// data parsed/consumed on this call in *datablock_memory_consumed_in_bytes; +// on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed +// if returns NULL and *error is VORBIS_need_more_data, then the input block was +// incomplete and you need to pass in a larger block from the start of the file + +extern int stb_vorbis_decode_frame_pushdata( + stb_vorbis *f, + const unsigned char *datablock, int datablock_length_in_bytes, + int *channels, // place to write number of float * buffers + float ***output, // place to write float ** array of float * buffers + int *samples // place to write number of output samples + ); +// decode a frame of audio sample data if possible from the passed-in data block +// +// return value: number of bytes we used from datablock +// +// possible cases: +// 0 bytes used, 0 samples output (need more data) +// N bytes used, 0 samples output (resynching the stream, keep going) +// N bytes used, M samples output (one frame of data) +// note that after opening a file, you will ALWAYS get one N-bytes,0-sample +// frame, because Vorbis always "discards" the first frame. +// +// Note that on resynch, stb_vorbis will rarely consume all of the buffer, +// instead only datablock_length_in_bytes-3 or less. This is because it wants +// to avoid missing parts of a page header if they cross a datablock boundary, +// without writing state-machiney code to record a partial detection. +// +// The number of channels returned are stored in *channels (which can be +// NULL--it is always the same as the number of channels reported by +// get_info). *output will contain an array of float* buffers, one per +// channel. In other words, (*output)[0][0] contains the first sample from +// the first channel, and (*output)[1][0] contains the first sample from +// the second channel. + +extern void stb_vorbis_flush_pushdata(stb_vorbis *f); +// inform stb_vorbis that your next datablock will not be contiguous with +// previous ones (e.g. you've seeked in the data); future attempts to decode +// frames will cause stb_vorbis to resynchronize (as noted above), and +// once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it +// will begin decoding the _next_ frame. +// +// if you want to seek using pushdata, you need to seek in your file, then +// call stb_vorbis_flush_pushdata(), then start calling decoding, then once +// decoding is returning you data, call stb_vorbis_get_sample_offset, and +// if you don't like the result, seek your file again and repeat. +#endif + + +////////// PULLING INPUT API + +#ifndef STB_VORBIS_NO_PULLDATA_API +// This API assumes stb_vorbis is allowed to pull data from a source-- +// either a block of memory containing the _entire_ vorbis stream, or a +// FILE * that you or it create, or possibly some other reading mechanism +// if you go modify the source to replace the FILE * case with some kind +// of callback to your code. (But if you don't support seeking, you may +// just want to go ahead and use pushdata.) + +#if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION) +extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output); +#endif +#if !defined(STB_VORBIS_NO_INTEGER_CONVERSION) +extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output); +#endif +// decode an entire file and output the data interleaved into a malloc()ed +// buffer stored in *output. The return value is the number of samples +// decoded, or -1 if the file could not be opened or was not an ogg vorbis file. +// When you're done with it, just free() the pointer returned in *output. + +extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, + int *error, const stb_vorbis_alloc *alloc_buffer); +// create an ogg vorbis decoder from an ogg vorbis stream in memory (note +// this must be the entire stream!). on failure, returns NULL and sets *error + +#ifndef STB_VORBIS_NO_STDIO +extern stb_vorbis * stb_vorbis_open_filename(const char *filename, + int *error, const stb_vorbis_alloc *alloc_buffer); +// create an ogg vorbis decoder from a filename via fopen(). on failure, +// returns NULL and sets *error (possibly to VORBIS_file_open_failure). + +extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close, + int *error, const stb_vorbis_alloc *alloc_buffer); +// create an ogg vorbis decoder from an open FILE *, looking for a stream at +// the _current_ seek point (ftell). on failure, returns NULL and sets *error. +// note that stb_vorbis must "own" this stream; if you seek it in between +// calls to stb_vorbis, it will become confused. Morever, if you attempt to +// perform stb_vorbis_seek_*() operations on this file, it will assume it +// owns the _entire_ rest of the file after the start point. Use the next +// function, stb_vorbis_open_file_section(), to limit it. + +extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close, + int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len); +// create an ogg vorbis decoder from an open FILE *, looking for a stream at +// the _current_ seek point (ftell); the stream will be of length 'len' bytes. +// on failure, returns NULL and sets *error. note that stb_vorbis must "own" +// this stream; if you seek it in between calls to stb_vorbis, it will become +// confused. +#endif + +extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number); +extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number); +// these functions seek in the Vorbis file to (approximately) 'sample_number'. +// after calling seek_frame(), the next call to get_frame_*() will include +// the specified sample. after calling stb_vorbis_seek(), the next call to +// stb_vorbis_get_samples_* will start with the specified sample. If you +// do not need to seek to EXACTLY the target sample when using get_samples_*, +// you can also use seek_frame(). + +extern void stb_vorbis_seek_start(stb_vorbis *f); +// this function is equivalent to stb_vorbis_seek(f,0) + +extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f); +extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f); +// these functions return the total length of the vorbis stream + +extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output); +// decode the next frame and return the number of samples. the number of +// channels returned are stored in *channels (which can be NULL--it is always +// the same as the number of channels reported by get_info). *output will +// contain an array of float* buffers, one per channel. These outputs will +// be overwritten on the next call to stb_vorbis_get_frame_*. +// +// You generally should not intermix calls to stb_vorbis_get_frame_*() +// and stb_vorbis_get_samples_*(), since the latter calls the former. + +#ifndef STB_VORBIS_NO_INTEGER_CONVERSION +extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts); +extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples); +#endif +// decode the next frame and return the number of *samples* per channel. +// Note that for interleaved data, you pass in the number of shorts (the +// size of your array), but the return value is the number of samples per +// channel, not the total number of samples. +// +// The data is coerced to the number of channels you request according to the +// channel coercion rules (see below). You must pass in the size of your +// buffer(s) so that stb_vorbis will not overwrite the end of the buffer. +// The maximum buffer size needed can be gotten from get_info(); however, +// the Vorbis I specification implies an absolute maximum of 4096 samples +// per channel. + +// Channel coercion rules: +// Let M be the number of channels requested, and N the number of channels present, +// and Cn be the nth channel; let stereo L be the sum of all L and center channels, +// and stereo R be the sum of all R and center channels (channel assignment from the +// vorbis spec). +// M N output +// 1 k sum(Ck) for all k +// 2 * stereo L, stereo R +// k l k > l, the first l channels, then 0s +// k l k <= l, the first k channels +// Note that this is not _good_ surround etc. mixing at all! It's just so +// you get something useful. + +extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats); +extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples); +// gets num_samples samples, not necessarily on a frame boundary--this requires +// buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES. +// Returns the number of samples stored per channel; it may be less than requested +// at the end of the file. If there are no more samples in the file, returns 0. + +#ifndef STB_VORBIS_NO_INTEGER_CONVERSION +extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts); +extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples); +#endif +// gets num_samples samples, not necessarily on a frame boundary--this requires +// buffering so you have to supply the buffers. Applies the coercion rules above +// to produce 'channels' channels. Returns the number of samples stored per channel; +// it may be less than requested at the end of the file. If there are no more +// samples in the file, returns 0. + +#endif + +//////// ERROR CODES + +enum STBVorbisError +{ + VORBIS__no_error, + + VORBIS_need_more_data=1, // not a real error + + VORBIS_invalid_api_mixing, // can't mix API modes + VORBIS_outofmem, // not enough memory + VORBIS_feature_not_supported, // uses floor 0 + VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small + VORBIS_file_open_failure, // fopen() failed + VORBIS_seek_without_length, // can't seek in unknown-length file + + VORBIS_unexpected_eof=10, // file is truncated? + VORBIS_seek_invalid, // seek past EOF + + // decoding errors (corrupt/invalid stream) -- you probably + // don't care about the exact details of these + + // vorbis errors: + VORBIS_invalid_setup=20, + VORBIS_invalid_stream, + + // ogg errors: + VORBIS_missing_capture_pattern=30, + VORBIS_invalid_stream_structure_version, + VORBIS_continued_packet_flag_invalid, + VORBIS_incorrect_stream_serial_number, + VORBIS_invalid_first_page, + VORBIS_bad_packet_type, + VORBIS_cant_find_last_page, + VORBIS_seek_failed +}; + + +#ifdef __cplusplus +} +#endif + +#endif // STB_VORBIS_INCLUDE_STB_VORBIS_H +// +// HEADER ENDS HERE +// +////////////////////////////////////////////////////////////////////////////// + +#ifndef STB_VORBIS_HEADER_ONLY + +// global configuration settings (e.g. set these in the project/makefile), +// or just set them in this file at the top (although ideally the first few +// should be visible when the header file is compiled too, although it's not +// crucial) + +// STB_VORBIS_NO_PUSHDATA_API +// does not compile the code for the various stb_vorbis_*_pushdata() +// functions +// #define STB_VORBIS_NO_PUSHDATA_API + +// STB_VORBIS_NO_PULLDATA_API +// does not compile the code for the non-pushdata APIs +// #define STB_VORBIS_NO_PULLDATA_API + +// STB_VORBIS_NO_STDIO +// does not compile the code for the APIs that use FILE *s internally +// or externally (implied by STB_VORBIS_NO_PULLDATA_API) +// #define STB_VORBIS_NO_STDIO + +// STB_VORBIS_NO_INTEGER_CONVERSION +// does not compile the code for converting audio sample data from +// float to integer (implied by STB_VORBIS_NO_PULLDATA_API) +// #define STB_VORBIS_NO_INTEGER_CONVERSION + +// STB_VORBIS_NO_FAST_SCALED_FLOAT +// does not use a fast float-to-int trick to accelerate float-to-int on +// most platforms which requires endianness be defined correctly. +//#define STB_VORBIS_NO_FAST_SCALED_FLOAT + + +// STB_VORBIS_MAX_CHANNELS [number] +// globally define this to the maximum number of channels you need. +// The spec does not put a restriction on channels except that +// the count is stored in a byte, so 255 is the hard limit. +// Reducing this saves about 16 bytes per value, so using 16 saves +// (255-16)*16 or around 4KB. Plus anything other memory usage +// I forgot to account for. Can probably go as low as 8 (7.1 audio), +// 6 (5.1 audio), or 2 (stereo only). +#ifndef STB_VORBIS_MAX_CHANNELS +#define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone? +#endif + +// STB_VORBIS_PUSHDATA_CRC_COUNT [number] +// after a flush_pushdata(), stb_vorbis begins scanning for the +// next valid page, without backtracking. when it finds something +// that looks like a page, it streams through it and verifies its +// CRC32. Should that validation fail, it keeps scanning. But it's +// possible that _while_ streaming through to check the CRC32 of +// one candidate page, it sees another candidate page. This #define +// determines how many "overlapping" candidate pages it can search +// at once. Note that "real" pages are typically ~4KB to ~8KB, whereas +// garbage pages could be as big as 64KB, but probably average ~16KB. +// So don't hose ourselves by scanning an apparent 64KB page and +// missing a ton of real ones in the interim; so minimum of 2 +#ifndef STB_VORBIS_PUSHDATA_CRC_COUNT +#define STB_VORBIS_PUSHDATA_CRC_COUNT 4 +#endif + +// STB_VORBIS_FAST_HUFFMAN_LENGTH [number] +// sets the log size of the huffman-acceleration table. Maximum +// supported value is 24. with larger numbers, more decodings are O(1), +// but the table size is larger so worse cache missing, so you'll have +// to probe (and try multiple ogg vorbis files) to find the sweet spot. +#ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH +#define STB_VORBIS_FAST_HUFFMAN_LENGTH 10 +#endif + +// STB_VORBIS_FAST_BINARY_LENGTH [number] +// sets the log size of the binary-search acceleration table. this +// is used in similar fashion to the fast-huffman size to set initial +// parameters for the binary search + +// STB_VORBIS_FAST_HUFFMAN_INT +// The fast huffman tables are much more efficient if they can be +// stored as 16-bit results instead of 32-bit results. This restricts +// the codebooks to having only 65535 possible outcomes, though. +// (At least, accelerated by the huffman table.) +#ifndef STB_VORBIS_FAST_HUFFMAN_INT +#define STB_VORBIS_FAST_HUFFMAN_SHORT +#endif + +// STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH +// If the 'fast huffman' search doesn't succeed, then stb_vorbis falls +// back on binary searching for the correct one. This requires storing +// extra tables with the huffman codes in sorted order. Defining this +// symbol trades off space for speed by forcing a linear search in the +// non-fast case, except for "sparse" codebooks. +// #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH + +// STB_VORBIS_DIVIDES_IN_RESIDUE +// stb_vorbis precomputes the result of the scalar residue decoding +// that would otherwise require a divide per chunk. you can trade off +// space for time by defining this symbol. +// #define STB_VORBIS_DIVIDES_IN_RESIDUE + +// STB_VORBIS_DIVIDES_IN_CODEBOOK +// vorbis VQ codebooks can be encoded two ways: with every case explicitly +// stored, or with all elements being chosen from a small range of values, +// and all values possible in all elements. By default, stb_vorbis expands +// this latter kind out to look like the former kind for ease of decoding, +// because otherwise an integer divide-per-vector-element is required to +// unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can +// trade off storage for speed. +//#define STB_VORBIS_DIVIDES_IN_CODEBOOK + +#ifdef STB_VORBIS_CODEBOOK_SHORTS +#error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats" +#endif + +// STB_VORBIS_DIVIDE_TABLE +// this replaces small integer divides in the floor decode loop with +// table lookups. made less than 1% difference, so disabled by default. + +// STB_VORBIS_NO_INLINE_DECODE +// disables the inlining of the scalar codebook fast-huffman decode. +// might save a little codespace; useful for debugging +// #define STB_VORBIS_NO_INLINE_DECODE + +// STB_VORBIS_NO_DEFER_FLOOR +// Normally we only decode the floor without synthesizing the actual +// full curve. We can instead synthesize the curve immediately. This +// requires more memory and is very likely slower, so I don't think +// you'd ever want to do it except for debugging. +// #define STB_VORBIS_NO_DEFER_FLOOR + + + + +////////////////////////////////////////////////////////////////////////////// + +#ifdef STB_VORBIS_NO_PULLDATA_API + #define STB_VORBIS_NO_INTEGER_CONVERSION + #define STB_VORBIS_NO_STDIO +#endif + +#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO) + #define STB_VORBIS_NO_STDIO 1 +#endif + +#ifndef STB_VORBIS_NO_INTEGER_CONVERSION +#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT + + // only need endianness for fast-float-to-int, which we don't + // use for pushdata + + #ifndef STB_VORBIS_BIG_ENDIAN + #define STB_VORBIS_ENDIAN 0 + #else + #define STB_VORBIS_ENDIAN 1 + #endif + +#endif +#endif + + +#ifndef STB_VORBIS_NO_STDIO +#include +#endif + +#ifndef STB_VORBIS_NO_CRT + #include + #include + #include + #include + + // find definition of alloca if it's not in stdlib.h: + #ifdef _MSC_VER + #include + #endif + #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__) + #include + #endif +#else // STB_VORBIS_NO_CRT + #define NULL 0 + #define malloc(s) 0 + #define free(s) ((void) 0) + #define realloc(s) 0 +#endif // STB_VORBIS_NO_CRT + +#include + +#ifdef __MINGW32__ + // eff you mingw: + // "fixed": + // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/ + // "no that broke the build, reverted, who cares about C": + // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/ + #ifdef __forceinline + #undef __forceinline + #endif + #define __forceinline +#elif !defined(_MSC_VER) + #if __GNUC__ + #define __forceinline inline + #else + #define __forceinline + #endif +#endif + +#if STB_VORBIS_MAX_CHANNELS > 256 +#error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range" +#endif + +#if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24 +#error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range" +#endif + + +#if 0 +#include +#define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1]) +#else +#define CHECK(f) ((void) 0) +#endif + +#define MAX_BLOCKSIZE_LOG 13 // from specification +#define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG) + + +typedef unsigned char uint8; +typedef signed char int8; +typedef unsigned short uint16; +typedef signed short int16; +typedef unsigned int uint32; +typedef signed int int32; + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +typedef float codetype; + +// @NOTE +// +// Some arrays below are tagged "//varies", which means it's actually +// a variable-sized piece of data, but rather than malloc I assume it's +// small enough it's better to just allocate it all together with the +// main thing +// +// Most of the variables are specified with the smallest size I could pack +// them into. It might give better performance to make them all full-sized +// integers. It should be safe to freely rearrange the structures or change +// the sizes larger--nothing relies on silently truncating etc., nor the +// order of variables. + +#define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH) +#define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1) + +typedef struct +{ + int dimensions, entries; + uint8 *codeword_lengths; + float minimum_value; + float delta_value; + uint8 value_bits; + uint8 lookup_type; + uint8 sequence_p; + uint8 sparse; + uint32 lookup_values; + codetype *multiplicands; + uint32 *codewords; + #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT + int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE]; + #else + int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE]; + #endif + uint32 *sorted_codewords; + int *sorted_values; + int sorted_entries; +} Codebook; + +typedef struct +{ + uint8 order; + uint16 rate; + uint16 bark_map_size; + uint8 amplitude_bits; + uint8 amplitude_offset; + uint8 number_of_books; + uint8 book_list[16]; // varies +} Floor0; + +typedef struct +{ + uint8 partitions; + uint8 partition_class_list[32]; // varies + uint8 class_dimensions[16]; // varies + uint8 class_subclasses[16]; // varies + uint8 class_masterbooks[16]; // varies + int16 subclass_books[16][8]; // varies + uint16 Xlist[31*8+2]; // varies + uint8 sorted_order[31*8+2]; + uint8 neighbors[31*8+2][2]; + uint8 floor1_multiplier; + uint8 rangebits; + int values; +} Floor1; + +typedef union +{ + Floor0 floor0; + Floor1 floor1; +} Floor; + +typedef struct +{ + uint32 begin, end; + uint32 part_size; + uint8 classifications; + uint8 classbook; + uint8 **classdata; + int16 (*residue_books)[8]; +} Residue; + +typedef struct +{ + uint8 magnitude; + uint8 angle; + uint8 mux; +} MappingChannel; + +typedef struct +{ + uint16 coupling_steps; + MappingChannel *chan; + uint8 submaps; + uint8 submap_floor[15]; // varies + uint8 submap_residue[15]; // varies +} Mapping; + +typedef struct +{ + uint8 blockflag; + uint8 mapping; + uint16 windowtype; + uint16 transformtype; +} Mode; + +typedef struct +{ + uint32 goal_crc; // expected crc if match + int bytes_left; // bytes left in packet + uint32 crc_so_far; // running crc + int bytes_done; // bytes processed in _current_ chunk + uint32 sample_loc; // granule pos encoded in page +} CRCscan; + +typedef struct +{ + uint32 page_start, page_end; + uint32 last_decoded_sample; +} ProbedPage; + +struct stb_vorbis +{ + // user-accessible info + unsigned int sample_rate; + int channels; + + unsigned int setup_memory_required; + unsigned int temp_memory_required; + unsigned int setup_temp_memory_required; + + // input config +#ifndef STB_VORBIS_NO_STDIO + FILE *f; + uint32 f_start; + int close_on_free; +#endif + + uint8 *stream; + uint8 *stream_start; + uint8 *stream_end; + + uint32 stream_len; + + uint8 push_mode; + + uint32 first_audio_page_offset; + + ProbedPage p_first, p_last; + + // memory management + stb_vorbis_alloc alloc; + int setup_offset; + int temp_offset; + + // run-time results + int eof; + enum STBVorbisError error; + + // user-useful data + + // header info + int blocksize[2]; + int blocksize_0, blocksize_1; + int codebook_count; + Codebook *codebooks; + int floor_count; + uint16 floor_types[64]; // varies + Floor *floor_config; + int residue_count; + uint16 residue_types[64]; // varies + Residue *residue_config; + int mapping_count; + Mapping *mapping; + int mode_count; + Mode mode_config[64]; // varies + + uint32 total_samples; + + // decode buffer + float *channel_buffers[STB_VORBIS_MAX_CHANNELS]; + float *outputs [STB_VORBIS_MAX_CHANNELS]; + + float *previous_window[STB_VORBIS_MAX_CHANNELS]; + int previous_length; + + #ifndef STB_VORBIS_NO_DEFER_FLOOR + int16 *finalY[STB_VORBIS_MAX_CHANNELS]; + #else + float *floor_buffers[STB_VORBIS_MAX_CHANNELS]; + #endif + + uint32 current_loc; // sample location of next frame to decode + int current_loc_valid; + + // per-blocksize precomputed data + + // twiddle factors + float *A[2],*B[2],*C[2]; + float *window[2]; + uint16 *bit_reverse[2]; + + // current page/packet/segment streaming info + uint32 serial; // stream serial number for verification + int last_page; + int segment_count; + uint8 segments[255]; + uint8 page_flag; + uint8 bytes_in_seg; + uint8 first_decode; + int next_seg; + int last_seg; // flag that we're on the last segment + int last_seg_which; // what was the segment number of the last seg? + uint32 acc; + int valid_bits; + int packet_bytes; + int end_seg_with_known_loc; + uint32 known_loc_for_packet; + int discard_samples_deferred; + uint32 samples_output; + + // push mode scanning + int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching +#ifndef STB_VORBIS_NO_PUSHDATA_API + CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT]; +#endif + + // sample-access + int channel_buffer_start; + int channel_buffer_end; +}; + +#if defined(STB_VORBIS_NO_PUSHDATA_API) + #define IS_PUSH_MODE(f) FALSE +#elif defined(STB_VORBIS_NO_PULLDATA_API) + #define IS_PUSH_MODE(f) TRUE +#else + #define IS_PUSH_MODE(f) ((f)->push_mode) +#endif + +typedef struct stb_vorbis vorb; + +static int error(vorb *f, enum STBVorbisError e) +{ + f->error = e; + if (!f->eof && e != VORBIS_need_more_data) { + f->error=e; // breakpoint for debugging + } + return 0; +} + + +// these functions are used for allocating temporary memory +// while decoding. if you can afford the stack space, use +// alloca(); otherwise, provide a temp buffer and it will +// allocate out of those. + +#define array_size_required(count,size) (count*(sizeof(void *)+(size))) + +#define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size)) +#ifdef dealloca +#define temp_free(f,p) (f->alloc.alloc_buffer ? 0 : dealloca(size)) +#else +#define temp_free(f,p) 0 +#endif +#define temp_alloc_save(f) ((f)->temp_offset) +#define temp_alloc_restore(f,p) ((f)->temp_offset = (p)) + +#define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size) + +// given a sufficiently large block of memory, make an array of pointers to subblocks of it +static void *make_block_array(void *mem, int count, int size) +{ + int i; + void ** p = (void **) mem; + char *q = (char *) (p + count); + for (i=0; i < count; ++i) { + p[i] = q; + q += size; + } + return p; +} + +static void *setup_malloc(vorb *f, int sz) +{ + sz = (sz+3) & ~3; + f->setup_memory_required += sz; + if (f->alloc.alloc_buffer) { + void *p = (char *) f->alloc.alloc_buffer + f->setup_offset; + if (f->setup_offset + sz > f->temp_offset) return NULL; + f->setup_offset += sz; + return p; + } + return sz ? malloc(sz) : NULL; +} + +static void setup_free(vorb *f, void *p) +{ + if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack + free(p); +} + +static void *setup_temp_malloc(vorb *f, int sz) +{ + sz = (sz+3) & ~3; + if (f->alloc.alloc_buffer) { + if (f->temp_offset - sz < f->setup_offset) return NULL; + f->temp_offset -= sz; + return (char *) f->alloc.alloc_buffer + f->temp_offset; + } + return malloc(sz); +} + +static void setup_temp_free(vorb *f, void *p, int sz) +{ + if (f->alloc.alloc_buffer) { + f->temp_offset += (sz+3)&~3; + return; + } + free(p); +} + +#define CRC32_POLY 0x04c11db7 // from spec + +static uint32 crc_table[256]; +static void crc32_init(void) +{ + int i,j; + uint32 s; + for(i=0; i < 256; i++) { + for (s=(uint32) i << 24, j=0; j < 8; ++j) + s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0); + crc_table[i] = s; + } +} + +static __forceinline uint32 crc32_update(uint32 crc, uint8 byte) +{ + return (crc << 8) ^ crc_table[byte ^ (crc >> 24)]; +} + + +// used in setup, and for huffman that doesn't go fast path +static unsigned int bit_reverse(unsigned int n) +{ + n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1); + n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2); + n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4); + n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8); + return (n >> 16) | (n << 16); +} + +static float square(float x) +{ + return x*x; +} + +// this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3 +// as required by the specification. fast(?) implementation from stb.h +// @OPTIMIZE: called multiple times per-packet with "constants"; move to setup +static int ilog(int32 n) +{ + static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 }; + + // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29) + if (n < (1 << 14)) + if (n < (1 << 4)) return 0 + log2_4[n ]; + else if (n < (1 << 9)) return 5 + log2_4[n >> 5]; + else return 10 + log2_4[n >> 10]; + else if (n < (1 << 24)) + if (n < (1 << 19)) return 15 + log2_4[n >> 15]; + else return 20 + log2_4[n >> 20]; + else if (n < (1 << 29)) return 25 + log2_4[n >> 25]; + else if (n < (1 << 31)) return 30 + log2_4[n >> 30]; + else return 0; // signed n returns 0 +} + +#ifndef M_PI + #define M_PI 3.14159265358979323846264f // from CRC +#endif + +// code length assigned to a value with no huffman encoding +#define NO_CODE 255 + +/////////////////////// LEAF SETUP FUNCTIONS ////////////////////////// +// +// these functions are only called at setup, and only a few times +// per file + +static float float32_unpack(uint32 x) +{ + // from the specification + uint32 mantissa = x & 0x1fffff; + uint32 sign = x & 0x80000000; + uint32 exp = (x & 0x7fe00000) >> 21; + double res = sign ? -(double)mantissa : (double)mantissa; + return (float) ldexp((float)res, exp-788); +} + + +// zlib & jpeg huffman tables assume that the output symbols +// can either be arbitrarily arranged, or have monotonically +// increasing frequencies--they rely on the lengths being sorted; +// this makes for a very simple generation algorithm. +// vorbis allows a huffman table with non-sorted lengths. This +// requires a more sophisticated construction, since symbols in +// order do not map to huffman codes "in order". +static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values) +{ + if (!c->sparse) { + c->codewords [symbol] = huff_code; + } else { + c->codewords [count] = huff_code; + c->codeword_lengths[count] = len; + values [count] = symbol; + } +} + +static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values) +{ + int i,k,m=0; + uint32 available[32]; + + memset(available, 0, sizeof(available)); + // find the first entry + for (k=0; k < n; ++k) if (len[k] < NO_CODE) break; + if (k == n) { assert(c->sorted_entries == 0); return TRUE; } + // add to the list + add_entry(c, 0, k, m++, len[k], values); + // add all available leaves + for (i=1; i <= len[k]; ++i) + available[i] = 1U << (32-i); + // note that the above code treats the first case specially, + // but it's really the same as the following code, so they + // could probably be combined (except the initial code is 0, + // and I use 0 in available[] to mean 'empty') + for (i=k+1; i < n; ++i) { + uint32 res; + int z = len[i], y; + if (z == NO_CODE) continue; + // find lowest available leaf (should always be earliest, + // which is what the specification calls for) + // note that this property, and the fact we can never have + // more than one free leaf at a given level, isn't totally + // trivial to prove, but it seems true and the assert never + // fires, so! + while (z > 0 && !available[z]) --z; + if (z == 0) { return FALSE; } + res = available[z]; + assert(z >= 0 && z < 32); + available[z] = 0; + add_entry(c, bit_reverse(res), i, m++, len[i], values); + // propogate availability up the tree + if (z != len[i]) { + assert(len[i] >= 0 && len[i] < 32); + for (y=len[i]; y > z; --y) { + assert(available[y] == 0); + available[y] = res + (1 << (32-y)); + } + } + } + return TRUE; +} + +// accelerated huffman table allows fast O(1) match of all symbols +// of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH +static void compute_accelerated_huffman(Codebook *c) +{ + int i, len; + for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) + c->fast_huffman[i] = -1; + + len = c->sparse ? c->sorted_entries : c->entries; + #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT + if (len > 32767) len = 32767; // largest possible value we can encode! + #endif + for (i=0; i < len; ++i) { + if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) { + uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i]; + // set table entries for all bit combinations in the higher bits + while (z < FAST_HUFFMAN_TABLE_SIZE) { + c->fast_huffman[z] = i; + z += 1 << c->codeword_lengths[i]; + } + } + } +} + +#ifdef _MSC_VER +#define STBV_CDECL __cdecl +#else +#define STBV_CDECL +#endif + +static int STBV_CDECL uint32_compare(const void *p, const void *q) +{ + uint32 x = * (uint32 *) p; + uint32 y = * (uint32 *) q; + return x < y ? -1 : x > y; +} + +static int include_in_sort(Codebook *c, uint8 len) +{ + if (c->sparse) { assert(len != NO_CODE); return TRUE; } + if (len == NO_CODE) return FALSE; + if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE; + return FALSE; +} + +// if the fast table above doesn't work, we want to binary +// search them... need to reverse the bits +static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values) +{ + int i, len; + // build a list of all the entries + // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN. + // this is kind of a frivolous optimization--I don't see any performance improvement, + // but it's like 4 extra lines of code, so. + if (!c->sparse) { + int k = 0; + for (i=0; i < c->entries; ++i) + if (include_in_sort(c, lengths[i])) + c->sorted_codewords[k++] = bit_reverse(c->codewords[i]); + assert(k == c->sorted_entries); + } else { + for (i=0; i < c->sorted_entries; ++i) + c->sorted_codewords[i] = bit_reverse(c->codewords[i]); + } + + qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare); + c->sorted_codewords[c->sorted_entries] = 0xffffffff; + + len = c->sparse ? c->sorted_entries : c->entries; + // now we need to indicate how they correspond; we could either + // #1: sort a different data structure that says who they correspond to + // #2: for each sorted entry, search the original list to find who corresponds + // #3: for each original entry, find the sorted entry + // #1 requires extra storage, #2 is slow, #3 can use binary search! + for (i=0; i < len; ++i) { + int huff_len = c->sparse ? lengths[values[i]] : lengths[i]; + if (include_in_sort(c,huff_len)) { + uint32 code = bit_reverse(c->codewords[i]); + int x=0, n=c->sorted_entries; + while (n > 1) { + // invariant: sc[x] <= code < sc[x+n] + int m = x + (n >> 1); + if (c->sorted_codewords[m] <= code) { + x = m; + n -= (n>>1); + } else { + n >>= 1; + } + } + assert(c->sorted_codewords[x] == code); + if (c->sparse) { + c->sorted_values[x] = values[i]; + c->codeword_lengths[x] = huff_len; + } else { + c->sorted_values[x] = i; + } + } + } +} + +// only run while parsing the header (3 times) +static int vorbis_validate(uint8 *data) +{ + static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' }; + return memcmp(data, vorbis, 6) == 0; +} + +// called from setup only, once per code book +// (formula implied by specification) +static int lookup1_values(int entries, int dim) +{ + int r = (int) floor(exp((float) log((float) entries) / dim)); + if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning; + ++r; // floor() to avoid _ftol() when non-CRT + assert(pow((float) r+1, dim) > entries); + assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above + return r; +} + +// called twice per file +static void compute_twiddle_factors(int n, float *A, float *B, float *C) +{ + int n4 = n >> 2, n8 = n >> 3; + int k,k2; + + for (k=k2=0; k < n4; ++k,k2+=2) { + A[k2 ] = (float) cos(4*k*M_PI/n); + A[k2+1] = (float) -sin(4*k*M_PI/n); + B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f; + B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f; + } + for (k=k2=0; k < n8; ++k,k2+=2) { + C[k2 ] = (float) cos(2*(k2+1)*M_PI/n); + C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n); + } +} + +static void compute_window(int n, float *window) +{ + int n2 = n >> 1, i; + for (i=0; i < n2; ++i) + window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI))); +} + +static void compute_bitreverse(int n, uint16 *rev) +{ + int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions + int i, n8 = n >> 3; + for (i=0; i < n8; ++i) + rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2; +} + +static int init_blocksize(vorb *f, int b, int n) +{ + int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3; + f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2); + f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2); + f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4); + if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem); + compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]); + f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2); + if (!f->window[b]) return error(f, VORBIS_outofmem); + compute_window(n, f->window[b]); + f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8); + if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem); + compute_bitreverse(n, f->bit_reverse[b]); + return TRUE; +} + +static void neighbors(uint16 *x, int n, int *plow, int *phigh) +{ + int low = -1; + int high = 65536; + int i; + for (i=0; i < n; ++i) { + if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; } + if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; } + } +} + +// this has been repurposed so y is now the original index instead of y +typedef struct +{ + uint16 x,y; +} Point; + +static int STBV_CDECL point_compare(const void *p, const void *q) +{ + Point *a = (Point *) p; + Point *b = (Point *) q; + return a->x < b->x ? -1 : a->x > b->x; +} + +// +/////////////////////// END LEAF SETUP FUNCTIONS ////////////////////////// + + +#if defined(STB_VORBIS_NO_STDIO) + #define USE_MEMORY(z) TRUE +#else + #define USE_MEMORY(z) ((z)->stream) +#endif + +static uint8 get8(vorb *z) +{ + if (USE_MEMORY(z)) { + if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; } + return *z->stream++; + } + + #ifndef STB_VORBIS_NO_STDIO + { + int c = fgetc(z->f); + if (c == EOF) { z->eof = TRUE; return 0; } + return c; + } + #endif +} + +static uint32 get32(vorb *f) +{ + uint32 x; + x = get8(f); + x += get8(f) << 8; + x += get8(f) << 16; + x += (uint32) get8(f) << 24; + return x; +} + +static int getn(vorb *z, uint8 *data, int n) +{ + if (USE_MEMORY(z)) { + if (z->stream+n > z->stream_end) { z->eof = 1; return 0; } + memcpy(data, z->stream, n); + z->stream += n; + return 1; + } + + #ifndef STB_VORBIS_NO_STDIO + if (fread(data, n, 1, z->f) == 1) + return 1; + else { + z->eof = 1; + return 0; + } + #endif +} + +static void skip(vorb *z, int n) +{ + if (USE_MEMORY(z)) { + z->stream += n; + if (z->stream >= z->stream_end) z->eof = 1; + return; + } + #ifndef STB_VORBIS_NO_STDIO + { + long x = ftell(z->f); + fseek(z->f, x+n, SEEK_SET); + } + #endif +} + +static int set_file_offset(stb_vorbis *f, unsigned int loc) +{ + #ifndef STB_VORBIS_NO_PUSHDATA_API + if (f->push_mode) return 0; + #endif + f->eof = 0; + if (USE_MEMORY(f)) { + if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) { + f->stream = f->stream_end; + f->eof = 1; + return 0; + } else { + f->stream = f->stream_start + loc; + return 1; + } + } + #ifndef STB_VORBIS_NO_STDIO + if (loc + f->f_start < loc || loc >= 0x80000000) { + loc = 0x7fffffff; + f->eof = 1; + } else { + loc += f->f_start; + } + if (!fseek(f->f, loc, SEEK_SET)) + return 1; + f->eof = 1; + fseek(f->f, f->f_start, SEEK_END); + return 0; + #endif +} + + +static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 }; + +static int capture_pattern(vorb *f) +{ + if (0x4f != get8(f)) return FALSE; + if (0x67 != get8(f)) return FALSE; + if (0x67 != get8(f)) return FALSE; + if (0x53 != get8(f)) return FALSE; + return TRUE; +} + +#define PAGEFLAG_continued_packet 1 +#define PAGEFLAG_first_page 2 +#define PAGEFLAG_last_page 4 + +static int start_page_no_capturepattern(vorb *f) +{ + uint32 loc0,loc1,n; + // stream structure version + if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version); + // header flag + f->page_flag = get8(f); + // absolute granule position + loc0 = get32(f); + loc1 = get32(f); + // @TODO: validate loc0,loc1 as valid positions? + // stream serial number -- vorbis doesn't interleave, so discard + get32(f); + //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number); + // page sequence number + n = get32(f); + f->last_page = n; + // CRC32 + get32(f); + // page_segments + f->segment_count = get8(f); + if (!getn(f, f->segments, f->segment_count)) + return error(f, VORBIS_unexpected_eof); + // assume we _don't_ know any the sample position of any segments + f->end_seg_with_known_loc = -2; + if (loc0 != ~0U || loc1 != ~0U) { + int i; + // determine which packet is the last one that will complete + for (i=f->segment_count-1; i >= 0; --i) + if (f->segments[i] < 255) + break; + // 'i' is now the index of the _last_ segment of a packet that ends + if (i >= 0) { + f->end_seg_with_known_loc = i; + f->known_loc_for_packet = loc0; + } + } + if (f->first_decode) { + int i,len; + ProbedPage p; + len = 0; + for (i=0; i < f->segment_count; ++i) + len += f->segments[i]; + len += 27 + f->segment_count; + p.page_start = f->first_audio_page_offset; + p.page_end = p.page_start + len; + p.last_decoded_sample = loc0; + f->p_first = p; + } + f->next_seg = 0; + return TRUE; +} + +static int start_page(vorb *f) +{ + if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern); + return start_page_no_capturepattern(f); +} + +static int start_packet(vorb *f) +{ + while (f->next_seg == -1) { + if (!start_page(f)) return FALSE; + if (f->page_flag & PAGEFLAG_continued_packet) + return error(f, VORBIS_continued_packet_flag_invalid); + } + f->last_seg = FALSE; + f->valid_bits = 0; + f->packet_bytes = 0; + f->bytes_in_seg = 0; + // f->next_seg is now valid + return TRUE; +} + +static int maybe_start_packet(vorb *f) +{ + if (f->next_seg == -1) { + int x = get8(f); + if (f->eof) return FALSE; // EOF at page boundary is not an error! + if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern); + if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern); + if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern); + if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern); + if (!start_page_no_capturepattern(f)) return FALSE; + if (f->page_flag & PAGEFLAG_continued_packet) { + // set up enough state that we can read this packet if we want, + // e.g. during recovery + f->last_seg = FALSE; + f->bytes_in_seg = 0; + return error(f, VORBIS_continued_packet_flag_invalid); + } + } + return start_packet(f); +} + +static int next_segment(vorb *f) +{ + int len; + if (f->last_seg) return 0; + if (f->next_seg == -1) { + f->last_seg_which = f->segment_count-1; // in case start_page fails + if (!start_page(f)) { f->last_seg = 1; return 0; } + if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid); + } + len = f->segments[f->next_seg++]; + if (len < 255) { + f->last_seg = TRUE; + f->last_seg_which = f->next_seg-1; + } + if (f->next_seg >= f->segment_count) + f->next_seg = -1; + assert(f->bytes_in_seg == 0); + f->bytes_in_seg = len; + return len; +} + +#define EOP (-1) +#define INVALID_BITS (-1) + +static int get8_packet_raw(vorb *f) +{ + if (!f->bytes_in_seg) { // CLANG! + if (f->last_seg) return EOP; + else if (!next_segment(f)) return EOP; + } + assert(f->bytes_in_seg > 0); + --f->bytes_in_seg; + ++f->packet_bytes; + return get8(f); +} + +static int get8_packet(vorb *f) +{ + int x = get8_packet_raw(f); + f->valid_bits = 0; + return x; +} + +static void flush_packet(vorb *f) +{ + while (get8_packet_raw(f) != EOP); +} + +// @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important +// as the huffman decoder? +static uint32 get_bits(vorb *f, int n) +{ + uint32 z; + + if (f->valid_bits < 0) return 0; + if (f->valid_bits < n) { + if (n > 24) { + // the accumulator technique below would not work correctly in this case + z = get_bits(f, 24); + z += get_bits(f, n-24) << 24; + return z; + } + if (f->valid_bits == 0) f->acc = 0; + while (f->valid_bits < n) { + int z = get8_packet_raw(f); + if (z == EOP) { + f->valid_bits = INVALID_BITS; + return 0; + } + f->acc += z << f->valid_bits; + f->valid_bits += 8; + } + } + if (f->valid_bits < 0) return 0; + z = f->acc & ((1 << n)-1); + f->acc >>= n; + f->valid_bits -= n; + return z; +} + +// @OPTIMIZE: primary accumulator for huffman +// expand the buffer to as many bits as possible without reading off end of packet +// it might be nice to allow f->valid_bits and f->acc to be stored in registers, +// e.g. cache them locally and decode locally +static __forceinline void prep_huffman(vorb *f) +{ + if (f->valid_bits <= 24) { + if (f->valid_bits == 0) f->acc = 0; + do { + int z; + if (f->last_seg && !f->bytes_in_seg) return; + z = get8_packet_raw(f); + if (z == EOP) return; + f->acc += (unsigned) z << f->valid_bits; + f->valid_bits += 8; + } while (f->valid_bits <= 24); + } +} + +enum +{ + VORBIS_packet_id = 1, + VORBIS_packet_comment = 3, + VORBIS_packet_setup = 5 +}; + +static int codebook_decode_scalar_raw(vorb *f, Codebook *c) +{ + int i; + prep_huffman(f); + + if (c->codewords == NULL && c->sorted_codewords == NULL) + return -1; + + // cases to use binary search: sorted_codewords && !c->codewords + // sorted_codewords && c->entries > 8 + if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) { + // binary search + uint32 code = bit_reverse(f->acc); + int x=0, n=c->sorted_entries, len; + + while (n > 1) { + // invariant: sc[x] <= code < sc[x+n] + int m = x + (n >> 1); + if (c->sorted_codewords[m] <= code) { + x = m; + n -= (n>>1); + } else { + n >>= 1; + } + } + // x is now the sorted index + if (!c->sparse) x = c->sorted_values[x]; + // x is now sorted index if sparse, or symbol otherwise + len = c->codeword_lengths[x]; + if (f->valid_bits >= len) { + f->acc >>= len; + f->valid_bits -= len; + return x; + } + + f->valid_bits = 0; + return -1; + } + + // if small, linear search + assert(!c->sparse); + for (i=0; i < c->entries; ++i) { + if (c->codeword_lengths[i] == NO_CODE) continue; + if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) { + if (f->valid_bits >= c->codeword_lengths[i]) { + f->acc >>= c->codeword_lengths[i]; + f->valid_bits -= c->codeword_lengths[i]; + return i; + } + f->valid_bits = 0; + return -1; + } + } + + error(f, VORBIS_invalid_stream); + f->valid_bits = 0; + return -1; +} + +#ifndef STB_VORBIS_NO_INLINE_DECODE + +#define DECODE_RAW(var, f,c) \ + if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \ + prep_huffman(f); \ + var = f->acc & FAST_HUFFMAN_TABLE_MASK; \ + var = c->fast_huffman[var]; \ + if (var >= 0) { \ + int n = c->codeword_lengths[var]; \ + f->acc >>= n; \ + f->valid_bits -= n; \ + if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \ + } else { \ + var = codebook_decode_scalar_raw(f,c); \ + } + +#else + +static int codebook_decode_scalar(vorb *f, Codebook *c) +{ + int i; + if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) + prep_huffman(f); + // fast huffman table lookup + i = f->acc & FAST_HUFFMAN_TABLE_MASK; + i = c->fast_huffman[i]; + if (i >= 0) { + f->acc >>= c->codeword_lengths[i]; + f->valid_bits -= c->codeword_lengths[i]; + if (f->valid_bits < 0) { f->valid_bits = 0; return -1; } + return i; + } + return codebook_decode_scalar_raw(f,c); +} + +#define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c); + +#endif + +#define DECODE(var,f,c) \ + DECODE_RAW(var,f,c) \ + if (c->sparse) var = c->sorted_values[var]; + +#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK + #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c) +#else + #define DECODE_VQ(var,f,c) DECODE(var,f,c) +#endif + + + + + + +// CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case +// where we avoid one addition +#define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off]) +#define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off]) +#define CODEBOOK_ELEMENT_BASE(c) (0) + +static int codebook_decode_start(vorb *f, Codebook *c) +{ + int z = -1; + + // type 0 is only legal in a scalar context + if (c->lookup_type == 0) + error(f, VORBIS_invalid_stream); + else { + DECODE_VQ(z,f,c); + if (c->sparse) assert(z < c->sorted_entries); + if (z < 0) { // check for EOP + if (!f->bytes_in_seg) + if (f->last_seg) + return z; + error(f, VORBIS_invalid_stream); + } + } + return z; +} + +static int codebook_decode(vorb *f, Codebook *c, float *output, int len) +{ + int i,z = codebook_decode_start(f,c); + if (z < 0) return FALSE; + if (len > c->dimensions) len = c->dimensions; + +#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK + if (c->lookup_type == 1) { + float last = CODEBOOK_ELEMENT_BASE(c); + int div = 1; + for (i=0; i < len; ++i) { + int off = (z / div) % c->lookup_values; + float val = CODEBOOK_ELEMENT_FAST(c,off) + last; + output[i] += val; + if (c->sequence_p) last = val + c->minimum_value; + div *= c->lookup_values; + } + return TRUE; + } +#endif + + z *= c->dimensions; + if (c->sequence_p) { + float last = CODEBOOK_ELEMENT_BASE(c); + for (i=0; i < len; ++i) { + float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; + output[i] += val; + last = val + c->minimum_value; + } + } else { + float last = CODEBOOK_ELEMENT_BASE(c); + for (i=0; i < len; ++i) { + output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last; + } + } + + return TRUE; +} + +static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step) +{ + int i,z = codebook_decode_start(f,c); + float last = CODEBOOK_ELEMENT_BASE(c); + if (z < 0) return FALSE; + if (len > c->dimensions) len = c->dimensions; + +#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK + if (c->lookup_type == 1) { + int div = 1; + for (i=0; i < len; ++i) { + int off = (z / div) % c->lookup_values; + float val = CODEBOOK_ELEMENT_FAST(c,off) + last; + output[i*step] += val; + if (c->sequence_p) last = val; + div *= c->lookup_values; + } + return TRUE; + } +#endif + + z *= c->dimensions; + for (i=0; i < len; ++i) { + float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; + output[i*step] += val; + if (c->sequence_p) last = val; + } + + return TRUE; +} + +static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode) +{ + int c_inter = *c_inter_p; + int p_inter = *p_inter_p; + int i,z, effective = c->dimensions; + + // type 0 is only legal in a scalar context + if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream); + + while (total_decode > 0) { + float last = CODEBOOK_ELEMENT_BASE(c); + DECODE_VQ(z,f,c); + #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK + assert(!c->sparse || z < c->sorted_entries); + #endif + if (z < 0) { + if (!f->bytes_in_seg) + if (f->last_seg) return FALSE; + return error(f, VORBIS_invalid_stream); + } + + // if this will take us off the end of the buffers, stop short! + // we check by computing the length of the virtual interleaved + // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter), + // and the length we'll be using (effective) + if (c_inter + p_inter*ch + effective > len * ch) { + effective = len*ch - (p_inter*ch - c_inter); + } + + #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK + if (c->lookup_type == 1) { + int div = 1; + for (i=0; i < effective; ++i) { + int off = (z / div) % c->lookup_values; + float val = CODEBOOK_ELEMENT_FAST(c,off) + last; + if (outputs[c_inter]) + outputs[c_inter][p_inter] += val; + if (++c_inter == ch) { c_inter = 0; ++p_inter; } + if (c->sequence_p) last = val; + div *= c->lookup_values; + } + } else + #endif + { + z *= c->dimensions; + if (c->sequence_p) { + for (i=0; i < effective; ++i) { + float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; + if (outputs[c_inter]) + outputs[c_inter][p_inter] += val; + if (++c_inter == ch) { c_inter = 0; ++p_inter; } + last = val; + } + } else { + for (i=0; i < effective; ++i) { + float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; + if (outputs[c_inter]) + outputs[c_inter][p_inter] += val; + if (++c_inter == ch) { c_inter = 0; ++p_inter; } + } + } + } + + total_decode -= effective; + } + *c_inter_p = c_inter; + *p_inter_p = p_inter; + return TRUE; +} + +static int predict_point(int x, int x0, int x1, int y0, int y1) +{ + int dy = y1 - y0; + int adx = x1 - x0; + // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86? + int err = abs(dy) * (x - x0); + int off = err / adx; + return dy < 0 ? y0 - off : y0 + off; +} + +// the following table is block-copied from the specification +static float inverse_db_table[256] = +{ + 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f, + 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f, + 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f, + 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f, + 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f, + 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f, + 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f, + 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f, + 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f, + 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f, + 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f, + 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f, + 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f, + 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f, + 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f, + 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f, + 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f, + 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f, + 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f, + 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f, + 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f, + 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f, + 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f, + 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f, + 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f, + 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f, + 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f, + 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f, + 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f, + 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f, + 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f, + 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f, + 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f, + 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f, + 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f, + 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f, + 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f, + 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f, + 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f, + 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f, + 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f, + 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f, + 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f, + 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f, + 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f, + 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f, + 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f, + 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f, + 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f, + 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f, + 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f, + 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f, + 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f, + 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f, + 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f, + 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f, + 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f, + 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f, + 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f, + 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f, + 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f, + 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f, + 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f, + 0.82788260f, 0.88168307f, 0.9389798f, 1.0f +}; + + +// @OPTIMIZE: if you want to replace this bresenham line-drawing routine, +// note that you must produce bit-identical output to decode correctly; +// this specific sequence of operations is specified in the spec (it's +// drawing integer-quantized frequency-space lines that the encoder +// expects to be exactly the same) +// ... also, isn't the whole point of Bresenham's algorithm to NOT +// have to divide in the setup? sigh. +#ifndef STB_VORBIS_NO_DEFER_FLOOR +#define LINE_OP(a,b) a *= b +#else +#define LINE_OP(a,b) a = b +#endif + +#ifdef STB_VORBIS_DIVIDE_TABLE +#define DIVTAB_NUMER 32 +#define DIVTAB_DENOM 64 +int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB +#endif + +static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n) +{ + int dy = y1 - y0; + int adx = x1 - x0; + int ady = abs(dy); + int base; + int x=x0,y=y0; + int err = 0; + int sy; + +#ifdef STB_VORBIS_DIVIDE_TABLE + if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) { + if (dy < 0) { + base = -integer_divide_table[ady][adx]; + sy = base-1; + } else { + base = integer_divide_table[ady][adx]; + sy = base+1; + } + } else { + base = dy / adx; + if (dy < 0) + sy = base - 1; + else + sy = base+1; + } +#else + base = dy / adx; + if (dy < 0) + sy = base - 1; + else + sy = base+1; +#endif + ady -= abs(base) * adx; + if (x1 > n) x1 = n; + if (x < x1) { + LINE_OP(output[x], inverse_db_table[y]); + for (++x; x < x1; ++x) { + err += ady; + if (err >= adx) { + err -= adx; + y += sy; + } else + y += base; + LINE_OP(output[x], inverse_db_table[y]); + } + } +} + +static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype) +{ + int k; + if (rtype == 0) { + int step = n / book->dimensions; + for (k=0; k < step; ++k) + if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) + return FALSE; + } else { + for (k=0; k < n; ) { + if (!codebook_decode(f, book, target+offset, n-k)) + return FALSE; + k += book->dimensions; + offset += book->dimensions; + } + } + return TRUE; +} + +static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode) +{ + int i,j,pass; + Residue *r = f->residue_config + rn; + int rtype = f->residue_types[rn]; + int c = r->classbook; + int classwords = f->codebooks[c].dimensions; + int n_read = r->end - r->begin; + int part_read = n_read / r->part_size; + int temp_alloc_point = temp_alloc_save(f); + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata)); + #else + int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications)); + #endif + + CHECK(f); + + for (i=0; i < ch; ++i) + if (!do_not_decode[i]) + memset(residue_buffers[i], 0, sizeof(float) * n); + + if (rtype == 2 && ch != 1) { + for (j=0; j < ch; ++j) + if (!do_not_decode[j]) + break; + if (j == ch) + goto done; + + for (pass=0; pass < 8; ++pass) { + int pcount = 0, class_set = 0; + if (ch == 2) { + while (pcount < part_read) { + int z = r->begin + pcount*r->part_size; + int c_inter = (z & 1), p_inter = z>>1; + if (pass == 0) { + Codebook *c = f->codebooks+r->classbook; + int q; + DECODE(q,f,c); + if (q == EOP) goto done; + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + part_classdata[0][class_set] = r->classdata[q]; + #else + for (i=classwords-1; i >= 0; --i) { + classifications[0][i+pcount] = q % r->classifications; + q /= r->classifications; + } + #endif + } + for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { + int z = r->begin + pcount*r->part_size; + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + int c = part_classdata[0][class_set][i]; + #else + int c = classifications[0][pcount]; + #endif + int b = r->residue_books[c][pass]; + if (b >= 0) { + Codebook *book = f->codebooks + b; + #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK + if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) + goto done; + #else + // saves 1% + if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) + goto done; + #endif + } else { + z += r->part_size; + c_inter = z & 1; + p_inter = z >> 1; + } + } + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + ++class_set; + #endif + } + } else if (ch == 1) { + while (pcount < part_read) { + int z = r->begin + pcount*r->part_size; + int c_inter = 0, p_inter = z; + if (pass == 0) { + Codebook *c = f->codebooks+r->classbook; + int q; + DECODE(q,f,c); + if (q == EOP) goto done; + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + part_classdata[0][class_set] = r->classdata[q]; + #else + for (i=classwords-1; i >= 0; --i) { + classifications[0][i+pcount] = q % r->classifications; + q /= r->classifications; + } + #endif + } + for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { + int z = r->begin + pcount*r->part_size; + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + int c = part_classdata[0][class_set][i]; + #else + int c = classifications[0][pcount]; + #endif + int b = r->residue_books[c][pass]; + if (b >= 0) { + Codebook *book = f->codebooks + b; + if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) + goto done; + } else { + z += r->part_size; + c_inter = 0; + p_inter = z; + } + } + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + ++class_set; + #endif + } + } else { + while (pcount < part_read) { + int z = r->begin + pcount*r->part_size; + int c_inter = z % ch, p_inter = z/ch; + if (pass == 0) { + Codebook *c = f->codebooks+r->classbook; + int q; + DECODE(q,f,c); + if (q == EOP) goto done; + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + part_classdata[0][class_set] = r->classdata[q]; + #else + for (i=classwords-1; i >= 0; --i) { + classifications[0][i+pcount] = q % r->classifications; + q /= r->classifications; + } + #endif + } + for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { + int z = r->begin + pcount*r->part_size; + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + int c = part_classdata[0][class_set][i]; + #else + int c = classifications[0][pcount]; + #endif + int b = r->residue_books[c][pass]; + if (b >= 0) { + Codebook *book = f->codebooks + b; + if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) + goto done; + } else { + z += r->part_size; + c_inter = z % ch; + p_inter = z / ch; + } + } + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + ++class_set; + #endif + } + } + } + goto done; + } + CHECK(f); + + for (pass=0; pass < 8; ++pass) { + int pcount = 0, class_set=0; + while (pcount < part_read) { + if (pass == 0) { + for (j=0; j < ch; ++j) { + if (!do_not_decode[j]) { + Codebook *c = f->codebooks+r->classbook; + int temp; + DECODE(temp,f,c); + if (temp == EOP) goto done; + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + part_classdata[j][class_set] = r->classdata[temp]; + #else + for (i=classwords-1; i >= 0; --i) { + classifications[j][i+pcount] = temp % r->classifications; + temp /= r->classifications; + } + #endif + } + } + } + for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { + for (j=0; j < ch; ++j) { + if (!do_not_decode[j]) { + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + int c = part_classdata[j][class_set][i]; + #else + int c = classifications[j][pcount]; + #endif + int b = r->residue_books[c][pass]; + if (b >= 0) { + float *target = residue_buffers[j]; + int offset = r->begin + pcount * r->part_size; + int n = r->part_size; + Codebook *book = f->codebooks + b; + if (!residue_decode(f, book, target, offset, n, rtype)) + goto done; + } + } + } + } + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + ++class_set; + #endif + } + } + done: + CHECK(f); + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + temp_free(f,part_classdata); + #else + temp_free(f,classifications); + #endif + temp_alloc_restore(f,temp_alloc_point); +} + + +#if 0 +// slow way for debugging +void inverse_mdct_slow(float *buffer, int n) +{ + int i,j; + int n2 = n >> 1; + float *x = (float *) malloc(sizeof(*x) * n2); + memcpy(x, buffer, sizeof(*x) * n2); + for (i=0; i < n; ++i) { + float acc = 0; + for (j=0; j < n2; ++j) + // formula from paper: + //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1)); + // formula from wikipedia + //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5)); + // these are equivalent, except the formula from the paper inverts the multiplier! + // however, what actually works is NO MULTIPLIER!?! + //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5)); + acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1)); + buffer[i] = acc; + } + free(x); +} +#elif 0 +// same as above, but just barely able to run in real time on modern machines +void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype) +{ + float mcos[16384]; + int i,j; + int n2 = n >> 1, nmask = (n << 2) -1; + float *x = (float *) malloc(sizeof(*x) * n2); + memcpy(x, buffer, sizeof(*x) * n2); + for (i=0; i < 4*n; ++i) + mcos[i] = (float) cos(M_PI / 2 * i / n); + + for (i=0; i < n; ++i) { + float acc = 0; + for (j=0; j < n2; ++j) + acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask]; + buffer[i] = acc; + } + free(x); +} +#elif 0 +// transform to use a slow dct-iv; this is STILL basically trivial, +// but only requires half as many ops +void dct_iv_slow(float *buffer, int n) +{ + float mcos[16384]; + float x[2048]; + int i,j; + int n2 = n >> 1, nmask = (n << 3) - 1; + memcpy(x, buffer, sizeof(*x) * n); + for (i=0; i < 8*n; ++i) + mcos[i] = (float) cos(M_PI / 4 * i / n); + for (i=0; i < n; ++i) { + float acc = 0; + for (j=0; j < n; ++j) + acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask]; + buffer[i] = acc; + } +} + +void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype) +{ + int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4; + float temp[4096]; + + memcpy(temp, buffer, n2 * sizeof(float)); + dct_iv_slow(temp, n2); // returns -c'-d, a-b' + + for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b' + for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d' + for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d +} +#endif + +#ifndef LIBVORBIS_MDCT +#define LIBVORBIS_MDCT 0 +#endif + +#if LIBVORBIS_MDCT +// directly call the vorbis MDCT using an interface documented +// by Jeff Roberts... useful for performance comparison +typedef struct +{ + int n; + int log2n; + + float *trig; + int *bitrev; + + float scale; +} mdct_lookup; + +extern void mdct_init(mdct_lookup *lookup, int n); +extern void mdct_clear(mdct_lookup *l); +extern void mdct_backward(mdct_lookup *init, float *in, float *out); + +mdct_lookup M1,M2; + +void inverse_mdct(float *buffer, int n, vorb *f, int blocktype) +{ + mdct_lookup *M; + if (M1.n == n) M = &M1; + else if (M2.n == n) M = &M2; + else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; } + else { + if (M2.n) __asm int 3; + mdct_init(&M2, n); + M = &M2; + } + + mdct_backward(M, buffer, buffer); +} +#endif + + +// the following were split out into separate functions while optimizing; +// they could be pushed back up but eh. __forceinline showed no change; +// they're probably already being inlined. +static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A) +{ + float *ee0 = e + i_off; + float *ee2 = ee0 + k_off; + int i; + + assert((n & 3) == 0); + for (i=(n>>2); i > 0; --i) { + float k00_20, k01_21; + k00_20 = ee0[ 0] - ee2[ 0]; + k01_21 = ee0[-1] - ee2[-1]; + ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0]; + ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1]; + ee2[ 0] = k00_20 * A[0] - k01_21 * A[1]; + ee2[-1] = k01_21 * A[0] + k00_20 * A[1]; + A += 8; + + k00_20 = ee0[-2] - ee2[-2]; + k01_21 = ee0[-3] - ee2[-3]; + ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2]; + ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3]; + ee2[-2] = k00_20 * A[0] - k01_21 * A[1]; + ee2[-3] = k01_21 * A[0] + k00_20 * A[1]; + A += 8; + + k00_20 = ee0[-4] - ee2[-4]; + k01_21 = ee0[-5] - ee2[-5]; + ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4]; + ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5]; + ee2[-4] = k00_20 * A[0] - k01_21 * A[1]; + ee2[-5] = k01_21 * A[0] + k00_20 * A[1]; + A += 8; + + k00_20 = ee0[-6] - ee2[-6]; + k01_21 = ee0[-7] - ee2[-7]; + ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6]; + ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7]; + ee2[-6] = k00_20 * A[0] - k01_21 * A[1]; + ee2[-7] = k01_21 * A[0] + k00_20 * A[1]; + A += 8; + ee0 -= 8; + ee2 -= 8; + } +} + +static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1) +{ + int i; + float k00_20, k01_21; + + float *e0 = e + d0; + float *e2 = e0 + k_off; + + for (i=lim >> 2; i > 0; --i) { + k00_20 = e0[-0] - e2[-0]; + k01_21 = e0[-1] - e2[-1]; + e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0]; + e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1]; + e2[-0] = (k00_20)*A[0] - (k01_21) * A[1]; + e2[-1] = (k01_21)*A[0] + (k00_20) * A[1]; + + A += k1; + + k00_20 = e0[-2] - e2[-2]; + k01_21 = e0[-3] - e2[-3]; + e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2]; + e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3]; + e2[-2] = (k00_20)*A[0] - (k01_21) * A[1]; + e2[-3] = (k01_21)*A[0] + (k00_20) * A[1]; + + A += k1; + + k00_20 = e0[-4] - e2[-4]; + k01_21 = e0[-5] - e2[-5]; + e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4]; + e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5]; + e2[-4] = (k00_20)*A[0] - (k01_21) * A[1]; + e2[-5] = (k01_21)*A[0] + (k00_20) * A[1]; + + A += k1; + + k00_20 = e0[-6] - e2[-6]; + k01_21 = e0[-7] - e2[-7]; + e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6]; + e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7]; + e2[-6] = (k00_20)*A[0] - (k01_21) * A[1]; + e2[-7] = (k01_21)*A[0] + (k00_20) * A[1]; + + e0 -= 8; + e2 -= 8; + + A += k1; + } +} + +static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0) +{ + int i; + float A0 = A[0]; + float A1 = A[0+1]; + float A2 = A[0+a_off]; + float A3 = A[0+a_off+1]; + float A4 = A[0+a_off*2+0]; + float A5 = A[0+a_off*2+1]; + float A6 = A[0+a_off*3+0]; + float A7 = A[0+a_off*3+1]; + + float k00,k11; + + float *ee0 = e +i_off; + float *ee2 = ee0+k_off; + + for (i=n; i > 0; --i) { + k00 = ee0[ 0] - ee2[ 0]; + k11 = ee0[-1] - ee2[-1]; + ee0[ 0] = ee0[ 0] + ee2[ 0]; + ee0[-1] = ee0[-1] + ee2[-1]; + ee2[ 0] = (k00) * A0 - (k11) * A1; + ee2[-1] = (k11) * A0 + (k00) * A1; + + k00 = ee0[-2] - ee2[-2]; + k11 = ee0[-3] - ee2[-3]; + ee0[-2] = ee0[-2] + ee2[-2]; + ee0[-3] = ee0[-3] + ee2[-3]; + ee2[-2] = (k00) * A2 - (k11) * A3; + ee2[-3] = (k11) * A2 + (k00) * A3; + + k00 = ee0[-4] - ee2[-4]; + k11 = ee0[-5] - ee2[-5]; + ee0[-4] = ee0[-4] + ee2[-4]; + ee0[-5] = ee0[-5] + ee2[-5]; + ee2[-4] = (k00) * A4 - (k11) * A5; + ee2[-5] = (k11) * A4 + (k00) * A5; + + k00 = ee0[-6] - ee2[-6]; + k11 = ee0[-7] - ee2[-7]; + ee0[-6] = ee0[-6] + ee2[-6]; + ee0[-7] = ee0[-7] + ee2[-7]; + ee2[-6] = (k00) * A6 - (k11) * A7; + ee2[-7] = (k11) * A6 + (k00) * A7; + + ee0 -= k0; + ee2 -= k0; + } +} + +static __forceinline void iter_54(float *z) +{ + float k00,k11,k22,k33; + float y0,y1,y2,y3; + + k00 = z[ 0] - z[-4]; + y0 = z[ 0] + z[-4]; + y2 = z[-2] + z[-6]; + k22 = z[-2] - z[-6]; + + z[-0] = y0 + y2; // z0 + z4 + z2 + z6 + z[-2] = y0 - y2; // z0 + z4 - z2 - z6 + + // done with y0,y2 + + k33 = z[-3] - z[-7]; + + z[-4] = k00 + k33; // z0 - z4 + z3 - z7 + z[-6] = k00 - k33; // z0 - z4 - z3 + z7 + + // done with k33 + + k11 = z[-1] - z[-5]; + y1 = z[-1] + z[-5]; + y3 = z[-3] + z[-7]; + + z[-1] = y1 + y3; // z1 + z5 + z3 + z7 + z[-3] = y1 - y3; // z1 + z5 - z3 - z7 + z[-5] = k11 - k22; // z1 - z5 + z2 - z6 + z[-7] = k11 + k22; // z1 - z5 - z2 + z6 +} + +static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n) +{ + int a_off = base_n >> 3; + float A2 = A[0+a_off]; + float *z = e + i_off; + float *base = z - 16 * n; + + while (z > base) { + float k00,k11; + + k00 = z[-0] - z[-8]; + k11 = z[-1] - z[-9]; + z[-0] = z[-0] + z[-8]; + z[-1] = z[-1] + z[-9]; + z[-8] = k00; + z[-9] = k11 ; + + k00 = z[ -2] - z[-10]; + k11 = z[ -3] - z[-11]; + z[ -2] = z[ -2] + z[-10]; + z[ -3] = z[ -3] + z[-11]; + z[-10] = (k00+k11) * A2; + z[-11] = (k11-k00) * A2; + + k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation + k11 = z[ -5] - z[-13]; + z[ -4] = z[ -4] + z[-12]; + z[ -5] = z[ -5] + z[-13]; + z[-12] = k11; + z[-13] = k00; + + k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation + k11 = z[ -7] - z[-15]; + z[ -6] = z[ -6] + z[-14]; + z[ -7] = z[ -7] + z[-15]; + z[-14] = (k00+k11) * A2; + z[-15] = (k00-k11) * A2; + + iter_54(z); + iter_54(z-8); + z -= 16; + } +} + +static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype) +{ + int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l; + int ld; + // @OPTIMIZE: reduce register pressure by using fewer variables? + int save_point = temp_alloc_save(f); + float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2)); + float *u=NULL,*v=NULL; + // twiddle factors + float *A = f->A[blocktype]; + + // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio" + // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function. + + // kernel from paper + + + // merged: + // copy and reflect spectral data + // step 0 + + // note that it turns out that the items added together during + // this step are, in fact, being added to themselves (as reflected + // by step 0). inexplicable inefficiency! this became obvious + // once I combined the passes. + + // so there's a missing 'times 2' here (for adding X to itself). + // this propogates through linearly to the end, where the numbers + // are 1/2 too small, and need to be compensated for. + + { + float *d,*e, *AA, *e_stop; + d = &buf2[n2-2]; + AA = A; + e = &buffer[0]; + e_stop = &buffer[n2]; + while (e != e_stop) { + d[1] = (e[0] * AA[0] - e[2]*AA[1]); + d[0] = (e[0] * AA[1] + e[2]*AA[0]); + d -= 2; + AA += 2; + e += 4; + } + + e = &buffer[n2-3]; + while (d >= buf2) { + d[1] = (-e[2] * AA[0] - -e[0]*AA[1]); + d[0] = (-e[2] * AA[1] + -e[0]*AA[0]); + d -= 2; + AA += 2; + e -= 4; + } + } + + // now we use symbolic names for these, so that we can + // possibly swap their meaning as we change which operations + // are in place + + u = buffer; + v = buf2; + + // step 2 (paper output is w, now u) + // this could be in place, but the data ends up in the wrong + // place... _somebody_'s got to swap it, so this is nominated + { + float *AA = &A[n2-8]; + float *d0,*d1, *e0, *e1; + + e0 = &v[n4]; + e1 = &v[0]; + + d0 = &u[n4]; + d1 = &u[0]; + + while (AA >= A) { + float v40_20, v41_21; + + v41_21 = e0[1] - e1[1]; + v40_20 = e0[0] - e1[0]; + d0[1] = e0[1] + e1[1]; + d0[0] = e0[0] + e1[0]; + d1[1] = v41_21*AA[4] - v40_20*AA[5]; + d1[0] = v40_20*AA[4] + v41_21*AA[5]; + + v41_21 = e0[3] - e1[3]; + v40_20 = e0[2] - e1[2]; + d0[3] = e0[3] + e1[3]; + d0[2] = e0[2] + e1[2]; + d1[3] = v41_21*AA[0] - v40_20*AA[1]; + d1[2] = v40_20*AA[0] + v41_21*AA[1]; + + AA -= 8; + + d0 += 4; + d1 += 4; + e0 += 4; + e1 += 4; + } + } + + // step 3 + ld = ilog(n) - 1; // ilog is off-by-one from normal definitions + + // optimized step 3: + + // the original step3 loop can be nested r inside s or s inside r; + // it's written originally as s inside r, but this is dumb when r + // iterates many times, and s few. So I have two copies of it and + // switch between them halfway. + + // this is iteration 0 of step 3 + imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A); + imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A); + + // this is iteration 1 of step 3 + imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16); + imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16); + imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16); + imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16); + + l=2; + for (; l < (ld-3)>>1; ++l) { + int k0 = n >> (l+2), k0_2 = k0>>1; + int lim = 1 << (l+1); + int i; + for (i=0; i < lim; ++i) + imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3)); + } + + for (; l < ld-6; ++l) { + int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1; + int rlim = n >> (l+6), r; + int lim = 1 << (l+1); + int i_off; + float *A0 = A; + i_off = n2-1; + for (r=rlim; r > 0; --r) { + imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0); + A0 += k1*4; + i_off -= 8; + } + } + + // iterations with count: + // ld-6,-5,-4 all interleaved together + // the big win comes from getting rid of needless flops + // due to the constants on pass 5 & 4 being all 1 and 0; + // combining them to be simultaneous to improve cache made little difference + imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n); + + // output is u + + // step 4, 5, and 6 + // cannot be in-place because of step 5 + { + uint16 *bitrev = f->bit_reverse[blocktype]; + // weirdly, I'd have thought reading sequentially and writing + // erratically would have been better than vice-versa, but in + // fact that's not what my testing showed. (That is, with + // j = bitreverse(i), do you read i and write j, or read j and write i.) + + float *d0 = &v[n4-4]; + float *d1 = &v[n2-4]; + while (d0 >= v) { + int k4; + + k4 = bitrev[0]; + d1[3] = u[k4+0]; + d1[2] = u[k4+1]; + d0[3] = u[k4+2]; + d0[2] = u[k4+3]; + + k4 = bitrev[1]; + d1[1] = u[k4+0]; + d1[0] = u[k4+1]; + d0[1] = u[k4+2]; + d0[0] = u[k4+3]; + + d0 -= 4; + d1 -= 4; + bitrev += 2; + } + } + // (paper output is u, now v) + + + // data must be in buf2 + assert(v == buf2); + + // step 7 (paper output is v, now v) + // this is now in place + { + float *C = f->C[blocktype]; + float *d, *e; + + d = v; + e = v + n2 - 4; + + while (d < e) { + float a02,a11,b0,b1,b2,b3; + + a02 = d[0] - e[2]; + a11 = d[1] + e[3]; + + b0 = C[1]*a02 + C[0]*a11; + b1 = C[1]*a11 - C[0]*a02; + + b2 = d[0] + e[ 2]; + b3 = d[1] - e[ 3]; + + d[0] = b2 + b0; + d[1] = b3 + b1; + e[2] = b2 - b0; + e[3] = b1 - b3; + + a02 = d[2] - e[0]; + a11 = d[3] + e[1]; + + b0 = C[3]*a02 + C[2]*a11; + b1 = C[3]*a11 - C[2]*a02; + + b2 = d[2] + e[ 0]; + b3 = d[3] - e[ 1]; + + d[2] = b2 + b0; + d[3] = b3 + b1; + e[0] = b2 - b0; + e[1] = b1 - b3; + + C += 4; + d += 4; + e -= 4; + } + } + + // data must be in buf2 + + + // step 8+decode (paper output is X, now buffer) + // this generates pairs of data a la 8 and pushes them directly through + // the decode kernel (pushing rather than pulling) to avoid having + // to make another pass later + + // this cannot POSSIBLY be in place, so we refer to the buffers directly + + { + float *d0,*d1,*d2,*d3; + + float *B = f->B[blocktype] + n2 - 8; + float *e = buf2 + n2 - 8; + d0 = &buffer[0]; + d1 = &buffer[n2-4]; + d2 = &buffer[n2]; + d3 = &buffer[n-4]; + while (e >= v) { + float p0,p1,p2,p3; + + p3 = e[6]*B[7] - e[7]*B[6]; + p2 = -e[6]*B[6] - e[7]*B[7]; + + d0[0] = p3; + d1[3] = - p3; + d2[0] = p2; + d3[3] = p2; + + p1 = e[4]*B[5] - e[5]*B[4]; + p0 = -e[4]*B[4] - e[5]*B[5]; + + d0[1] = p1; + d1[2] = - p1; + d2[1] = p0; + d3[2] = p0; + + p3 = e[2]*B[3] - e[3]*B[2]; + p2 = -e[2]*B[2] - e[3]*B[3]; + + d0[2] = p3; + d1[1] = - p3; + d2[2] = p2; + d3[1] = p2; + + p1 = e[0]*B[1] - e[1]*B[0]; + p0 = -e[0]*B[0] - e[1]*B[1]; + + d0[3] = p1; + d1[0] = - p1; + d2[3] = p0; + d3[0] = p0; + + B -= 8; + e -= 8; + d0 += 4; + d2 += 4; + d1 -= 4; + d3 -= 4; + } + } + + temp_free(f,buf2); + temp_alloc_restore(f,save_point); +} + +#if 0 +// this is the original version of the above code, if you want to optimize it from scratch +void inverse_mdct_naive(float *buffer, int n) +{ + float s; + float A[1 << 12], B[1 << 12], C[1 << 11]; + int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l; + int n3_4 = n - n4, ld; + // how can they claim this only uses N words?! + // oh, because they're only used sparsely, whoops + float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13]; + // set up twiddle factors + + for (k=k2=0; k < n4; ++k,k2+=2) { + A[k2 ] = (float) cos(4*k*M_PI/n); + A[k2+1] = (float) -sin(4*k*M_PI/n); + B[k2 ] = (float) cos((k2+1)*M_PI/n/2); + B[k2+1] = (float) sin((k2+1)*M_PI/n/2); + } + for (k=k2=0; k < n8; ++k,k2+=2) { + C[k2 ] = (float) cos(2*(k2+1)*M_PI/n); + C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n); + } + + // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio" + // Note there are bugs in that pseudocode, presumably due to them attempting + // to rename the arrays nicely rather than representing the way their actual + // implementation bounces buffers back and forth. As a result, even in the + // "some formulars corrected" version, a direct implementation fails. These + // are noted below as "paper bug". + + // copy and reflect spectral data + for (k=0; k < n2; ++k) u[k] = buffer[k]; + for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1]; + // kernel from paper + // step 1 + for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) { + v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1]; + v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2]; + } + // step 2 + for (k=k4=0; k < n8; k+=1, k4+=4) { + w[n2+3+k4] = v[n2+3+k4] + v[k4+3]; + w[n2+1+k4] = v[n2+1+k4] + v[k4+1]; + w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4]; + w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4]; + } + // step 3 + ld = ilog(n) - 1; // ilog is off-by-one from normal definitions + for (l=0; l < ld-3; ++l) { + int k0 = n >> (l+2), k1 = 1 << (l+3); + int rlim = n >> (l+4), r4, r; + int s2lim = 1 << (l+2), s2; + for (r=r4=0; r < rlim; r4+=4,++r) { + for (s2=0; s2 < s2lim; s2+=2) { + u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4]; + u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4]; + u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1] + - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1]; + u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1] + + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1]; + } + } + if (l+1 < ld-3) { + // paper bug: ping-ponging of u&w here is omitted + memcpy(w, u, sizeof(u)); + } + } + + // step 4 + for (i=0; i < n8; ++i) { + int j = bit_reverse(i) >> (32-ld+3); + assert(j < n8); + if (i == j) { + // paper bug: original code probably swapped in place; if copying, + // need to directly copy in this case + int i8 = i << 3; + v[i8+1] = u[i8+1]; + v[i8+3] = u[i8+3]; + v[i8+5] = u[i8+5]; + v[i8+7] = u[i8+7]; + } else if (i < j) { + int i8 = i << 3, j8 = j << 3; + v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1]; + v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3]; + v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5]; + v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7]; + } + } + // step 5 + for (k=0; k < n2; ++k) { + w[k] = v[k*2+1]; + } + // step 6 + for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) { + u[n-1-k2] = w[k4]; + u[n-2-k2] = w[k4+1]; + u[n3_4 - 1 - k2] = w[k4+2]; + u[n3_4 - 2 - k2] = w[k4+3]; + } + // step 7 + for (k=k2=0; k < n8; ++k, k2 += 2) { + v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2; + v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2; + v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2; + v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2; + } + // step 8 + for (k=k2=0; k < n4; ++k,k2 += 2) { + X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1]; + X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ]; + } + + // decode kernel to output + // determined the following value experimentally + // (by first figuring out what made inverse_mdct_slow work); then matching that here + // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?) + s = 0.5; // theoretically would be n4 + + // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code, + // so it needs to use the "old" B values to behave correctly, or else + // set s to 1.0 ]]] + for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4]; + for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1]; + for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4]; +} +#endif + +static float *get_window(vorb *f, int len) +{ + len <<= 1; + if (len == f->blocksize_0) return f->window[0]; + if (len == f->blocksize_1) return f->window[1]; + assert(0); + return NULL; +} + +#ifndef STB_VORBIS_NO_DEFER_FLOOR +typedef int16 YTYPE; +#else +typedef int YTYPE; +#endif +static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag) +{ + int n2 = n >> 1; + int s = map->chan[i].mux, floor; + floor = map->submap_floor[s]; + if (f->floor_types[floor] == 0) { + return error(f, VORBIS_invalid_stream); + } else { + Floor1 *g = &f->floor_config[floor].floor1; + int j,q; + int lx = 0, ly = finalY[0] * g->floor1_multiplier; + for (q=1; q < g->values; ++q) { + j = g->sorted_order[q]; + #ifndef STB_VORBIS_NO_DEFER_FLOOR + if (finalY[j] >= 0) + #else + if (step2_flag[j]) + #endif + { + int hy = finalY[j] * g->floor1_multiplier; + int hx = g->Xlist[j]; + if (lx != hx) + draw_line(target, lx,ly, hx,hy, n2); + CHECK(f); + lx = hx, ly = hy; + } + } + if (lx < n2) { + // optimization of: draw_line(target, lx,ly, n,ly, n2); + for (j=lx; j < n2; ++j) + LINE_OP(target[j], inverse_db_table[ly]); + CHECK(f); + } + } + return TRUE; +} + +// The meaning of "left" and "right" +// +// For a given frame: +// we compute samples from 0..n +// window_center is n/2 +// we'll window and mix the samples from left_start to left_end with data from the previous frame +// all of the samples from left_end to right_start can be output without mixing; however, +// this interval is 0-length except when transitioning between short and long frames +// all of the samples from right_start to right_end need to be mixed with the next frame, +// which we don't have, so those get saved in a buffer +// frame N's right_end-right_start, the number of samples to mix with the next frame, +// has to be the same as frame N+1's left_end-left_start (which they are by +// construction) + +static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode) +{ + Mode *m; + int i, n, prev, next, window_center; + f->channel_buffer_start = f->channel_buffer_end = 0; + + retry: + if (f->eof) return FALSE; + if (!maybe_start_packet(f)) + return FALSE; + // check packet type + if (get_bits(f,1) != 0) { + if (IS_PUSH_MODE(f)) + return error(f,VORBIS_bad_packet_type); + while (EOP != get8_packet(f)); + goto retry; + } + + if (f->alloc.alloc_buffer) + assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); + + i = get_bits(f, ilog(f->mode_count-1)); + if (i == EOP) return FALSE; + if (i >= f->mode_count) return FALSE; + *mode = i; + m = f->mode_config + i; + if (m->blockflag) { + n = f->blocksize_1; + prev = get_bits(f,1); + next = get_bits(f,1); + } else { + prev = next = 0; + n = f->blocksize_0; + } + +// WINDOWING + + window_center = n >> 1; + if (m->blockflag && !prev) { + *p_left_start = (n - f->blocksize_0) >> 2; + *p_left_end = (n + f->blocksize_0) >> 2; + } else { + *p_left_start = 0; + *p_left_end = window_center; + } + if (m->blockflag && !next) { + *p_right_start = (n*3 - f->blocksize_0) >> 2; + *p_right_end = (n*3 + f->blocksize_0) >> 2; + } else { + *p_right_start = window_center; + *p_right_end = n; + } + + return TRUE; +} + +static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left) +{ + Mapping *map; + int i,j,k,n,n2; + int zero_channel[256]; + int really_zero_channel[256]; + +// WINDOWING + + n = f->blocksize[m->blockflag]; + map = &f->mapping[m->mapping]; + +// FLOORS + n2 = n >> 1; + + CHECK(f); + + for (i=0; i < f->channels; ++i) { + int s = map->chan[i].mux, floor; + zero_channel[i] = FALSE; + floor = map->submap_floor[s]; + if (f->floor_types[floor] == 0) { + return error(f, VORBIS_invalid_stream); + } else { + Floor1 *g = &f->floor_config[floor].floor1; + if (get_bits(f, 1)) { + short *finalY; + uint8 step2_flag[256]; + static int range_list[4] = { 256, 128, 86, 64 }; + int range = range_list[g->floor1_multiplier-1]; + int offset = 2; + finalY = f->finalY[i]; + finalY[0] = get_bits(f, ilog(range)-1); + finalY[1] = get_bits(f, ilog(range)-1); + for (j=0; j < g->partitions; ++j) { + int pclass = g->partition_class_list[j]; + int cdim = g->class_dimensions[pclass]; + int cbits = g->class_subclasses[pclass]; + int csub = (1 << cbits)-1; + int cval = 0; + if (cbits) { + Codebook *c = f->codebooks + g->class_masterbooks[pclass]; + DECODE(cval,f,c); + } + for (k=0; k < cdim; ++k) { + int book = g->subclass_books[pclass][cval & csub]; + cval = cval >> cbits; + if (book >= 0) { + int temp; + Codebook *c = f->codebooks + book; + DECODE(temp,f,c); + finalY[offset++] = temp; + } else + finalY[offset++] = 0; + } + } + if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec + step2_flag[0] = step2_flag[1] = 1; + for (j=2; j < g->values; ++j) { + int low, high, pred, highroom, lowroom, room, val; + low = g->neighbors[j][0]; + high = g->neighbors[j][1]; + //neighbors(g->Xlist, j, &low, &high); + pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]); + val = finalY[j]; + highroom = range - pred; + lowroom = pred; + if (highroom < lowroom) + room = highroom * 2; + else + room = lowroom * 2; + if (val) { + step2_flag[low] = step2_flag[high] = 1; + step2_flag[j] = 1; + if (val >= room) + if (highroom > lowroom) + finalY[j] = val - lowroom + pred; + else + finalY[j] = pred - val + highroom - 1; + else + if (val & 1) + finalY[j] = pred - ((val+1)>>1); + else + finalY[j] = pred + (val>>1); + } else { + step2_flag[j] = 0; + finalY[j] = pred; + } + } + +#ifdef STB_VORBIS_NO_DEFER_FLOOR + do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag); +#else + // defer final floor computation until _after_ residue + for (j=0; j < g->values; ++j) { + if (!step2_flag[j]) + finalY[j] = -1; + } +#endif + } else { + error: + zero_channel[i] = TRUE; + } + // So we just defer everything else to later + + // at this point we've decoded the floor into buffer + } + } + CHECK(f); + // at this point we've decoded all floors + + if (f->alloc.alloc_buffer) + assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); + + // re-enable coupled channels if necessary + memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels); + for (i=0; i < map->coupling_steps; ++i) + if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) { + zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE; + } + + CHECK(f); +// RESIDUE DECODE + for (i=0; i < map->submaps; ++i) { + float *residue_buffers[STB_VORBIS_MAX_CHANNELS]; + int r; + uint8 do_not_decode[256]; + int ch = 0; + for (j=0; j < f->channels; ++j) { + if (map->chan[j].mux == i) { + if (zero_channel[j]) { + do_not_decode[ch] = TRUE; + residue_buffers[ch] = NULL; + } else { + do_not_decode[ch] = FALSE; + residue_buffers[ch] = f->channel_buffers[j]; + } + ++ch; + } + } + r = map->submap_residue[i]; + decode_residue(f, residue_buffers, ch, n2, r, do_not_decode); + } + + if (f->alloc.alloc_buffer) + assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); + CHECK(f); + +// INVERSE COUPLING + for (i = map->coupling_steps-1; i >= 0; --i) { + int n2 = n >> 1; + float *m = f->channel_buffers[map->chan[i].magnitude]; + float *a = f->channel_buffers[map->chan[i].angle ]; + for (j=0; j < n2; ++j) { + float a2,m2; + if (m[j] > 0) + if (a[j] > 0) + m2 = m[j], a2 = m[j] - a[j]; + else + a2 = m[j], m2 = m[j] + a[j]; + else + if (a[j] > 0) + m2 = m[j], a2 = m[j] + a[j]; + else + a2 = m[j], m2 = m[j] - a[j]; + m[j] = m2; + a[j] = a2; + } + } + CHECK(f); + + // finish decoding the floors +#ifndef STB_VORBIS_NO_DEFER_FLOOR + for (i=0; i < f->channels; ++i) { + if (really_zero_channel[i]) { + memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2); + } else { + do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL); + } + } +#else + for (i=0; i < f->channels; ++i) { + if (really_zero_channel[i]) { + memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2); + } else { + for (j=0; j < n2; ++j) + f->channel_buffers[i][j] *= f->floor_buffers[i][j]; + } + } +#endif + +// INVERSE MDCT + CHECK(f); + for (i=0; i < f->channels; ++i) + inverse_mdct(f->channel_buffers[i], n, f, m->blockflag); + CHECK(f); + + // this shouldn't be necessary, unless we exited on an error + // and want to flush to get to the next packet + flush_packet(f); + + if (f->first_decode) { + // assume we start so first non-discarded sample is sample 0 + // this isn't to spec, but spec would require us to read ahead + // and decode the size of all current frames--could be done, + // but presumably it's not a commonly used feature + f->current_loc = -n2; // start of first frame is positioned for discard + // we might have to discard samples "from" the next frame too, + // if we're lapping a large block then a small at the start? + f->discard_samples_deferred = n - right_end; + f->current_loc_valid = TRUE; + f->first_decode = FALSE; + } else if (f->discard_samples_deferred) { + if (f->discard_samples_deferred >= right_start - left_start) { + f->discard_samples_deferred -= (right_start - left_start); + left_start = right_start; + *p_left = left_start; + } else { + left_start += f->discard_samples_deferred; + *p_left = left_start; + f->discard_samples_deferred = 0; + } + } else if (f->previous_length == 0 && f->current_loc_valid) { + // we're recovering from a seek... that means we're going to discard + // the samples from this packet even though we know our position from + // the last page header, so we need to update the position based on + // the discarded samples here + // but wait, the code below is going to add this in itself even + // on a discard, so we don't need to do it here... + } + + // check if we have ogg information about the sample # for this packet + if (f->last_seg_which == f->end_seg_with_known_loc) { + // if we have a valid current loc, and this is final: + if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) { + uint32 current_end = f->known_loc_for_packet - (n-right_end); + // then let's infer the size of the (probably) short final frame + if (current_end < f->current_loc + (right_end-left_start)) { + if (current_end < f->current_loc) { + // negative truncation, that's impossible! + *len = 0; + } else { + *len = current_end - f->current_loc; + } + *len += left_start; + if (*len > right_end) *len = right_end; // this should never happen + f->current_loc += *len; + return TRUE; + } + } + // otherwise, just set our sample loc + // guess that the ogg granule pos refers to the _middle_ of the + // last frame? + // set f->current_loc to the position of left_start + f->current_loc = f->known_loc_for_packet - (n2-left_start); + f->current_loc_valid = TRUE; + } + if (f->current_loc_valid) + f->current_loc += (right_start - left_start); + + if (f->alloc.alloc_buffer) + assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); + *len = right_end; // ignore samples after the window goes to 0 + CHECK(f); + + return TRUE; +} + +static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right) +{ + int mode, left_end, right_end; + if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0; + return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left); +} + +static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right) +{ + int prev,i,j; + // we use right&left (the start of the right- and left-window sin()-regions) + // to determine how much to return, rather than inferring from the rules + // (same result, clearer code); 'left' indicates where our sin() window + // starts, therefore where the previous window's right edge starts, and + // therefore where to start mixing from the previous buffer. 'right' + // indicates where our sin() ending-window starts, therefore that's where + // we start saving, and where our returned-data ends. + + // mixin from previous window + if (f->previous_length) { + int i,j, n = f->previous_length; + float *w = get_window(f, n); + for (i=0; i < f->channels; ++i) { + for (j=0; j < n; ++j) + f->channel_buffers[i][left+j] = + f->channel_buffers[i][left+j]*w[ j] + + f->previous_window[i][ j]*w[n-1-j]; + } + } + + prev = f->previous_length; + + // last half of this data becomes previous window + f->previous_length = len - right; + + // @OPTIMIZE: could avoid this copy by double-buffering the + // output (flipping previous_window with channel_buffers), but + // then previous_window would have to be 2x as large, and + // channel_buffers couldn't be temp mem (although they're NOT + // currently temp mem, they could be (unless we want to level + // performance by spreading out the computation)) + for (i=0; i < f->channels; ++i) + for (j=0; right+j < len; ++j) + f->previous_window[i][j] = f->channel_buffers[i][right+j]; + + if (!prev) + // there was no previous packet, so this data isn't valid... + // this isn't entirely true, only the would-have-overlapped data + // isn't valid, but this seems to be what the spec requires + return 0; + + // truncate a short frame + if (len < right) right = len; + + f->samples_output += right-left; + + return right - left; +} + +static void vorbis_pump_first_frame(stb_vorbis *f) +{ + int len, right, left; + if (vorbis_decode_packet(f, &len, &left, &right)) + vorbis_finish_frame(f, len, left, right); +} + +#ifndef STB_VORBIS_NO_PUSHDATA_API +static int is_whole_packet_present(stb_vorbis *f, int end_page) +{ + // make sure that we have the packet available before continuing... + // this requires a full ogg parse, but we know we can fetch from f->stream + + // instead of coding this out explicitly, we could save the current read state, + // read the next packet with get8() until end-of-packet, check f->eof, then + // reset the state? but that would be slower, esp. since we'd have over 256 bytes + // of state to restore (primarily the page segment table) + + int s = f->next_seg, first = TRUE; + uint8 *p = f->stream; + + if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag + for (; s < f->segment_count; ++s) { + p += f->segments[s]; + if (f->segments[s] < 255) // stop at first short segment + break; + } + // either this continues, or it ends it... + if (end_page) + if (s < f->segment_count-1) return error(f, VORBIS_invalid_stream); + if (s == f->segment_count) + s = -1; // set 'crosses page' flag + if (p > f->stream_end) return error(f, VORBIS_need_more_data); + first = FALSE; + } + for (; s == -1;) { + uint8 *q; + int n; + + // check that we have the page header ready + if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data); + // validate the page + if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream); + if (p[4] != 0) return error(f, VORBIS_invalid_stream); + if (first) { // the first segment must NOT have 'continued_packet', later ones MUST + if (f->previous_length) + if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream); + // if no previous length, we're resynching, so we can come in on a continued-packet, + // which we'll just drop + } else { + if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream); + } + n = p[26]; // segment counts + q = p+27; // q points to segment table + p = q + n; // advance past header + // make sure we've read the segment table + if (p > f->stream_end) return error(f, VORBIS_need_more_data); + for (s=0; s < n; ++s) { + p += q[s]; + if (q[s] < 255) + break; + } + if (end_page) + if (s < n-1) return error(f, VORBIS_invalid_stream); + if (s == n) + s = -1; // set 'crosses page' flag + if (p > f->stream_end) return error(f, VORBIS_need_more_data); + first = FALSE; + } + return TRUE; +} +#endif // !STB_VORBIS_NO_PUSHDATA_API + +static int start_decoder(vorb *f) +{ + uint8 header[6], x,y; + int len,i,j,k, max_submaps = 0; + int longest_floorlist=0; + + // first page, first packet + + if (!start_page(f)) return FALSE; + // validate page flag + if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page); + if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page); + if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page); + // check for expected packet length + if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page); + if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page); + // read packet + // check packet header + if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page); + if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof); + if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page); + // vorbis_version + if (get32(f) != 0) return error(f, VORBIS_invalid_first_page); + f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page); + if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels); + f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page); + get32(f); // bitrate_maximum + get32(f); // bitrate_nominal + get32(f); // bitrate_minimum + x = get8(f); + { + int log0,log1; + log0 = x & 15; + log1 = x >> 4; + f->blocksize_0 = 1 << log0; + f->blocksize_1 = 1 << log1; + if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup); + if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup); + if (log0 > log1) return error(f, VORBIS_invalid_setup); + } + + // framing_flag + x = get8(f); + if (!(x & 1)) return error(f, VORBIS_invalid_first_page); + + // second packet! + if (!start_page(f)) return FALSE; + + if (!start_packet(f)) return FALSE; + do { + len = next_segment(f); + skip(f, len); + f->bytes_in_seg = 0; + } while (len); + + // third packet! + if (!start_packet(f)) return FALSE; + + #ifndef STB_VORBIS_NO_PUSHDATA_API + if (IS_PUSH_MODE(f)) { + if (!is_whole_packet_present(f, TRUE)) { + // convert error in ogg header to write type + if (f->error == VORBIS_invalid_stream) + f->error = VORBIS_invalid_setup; + return FALSE; + } + } + #endif + + crc32_init(); // always init it, to avoid multithread race conditions + + if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup); + for (i=0; i < 6; ++i) header[i] = get8_packet(f); + if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup); + + // codebooks + + f->codebook_count = get_bits(f,8) + 1; + f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count); + if (f->codebooks == NULL) return error(f, VORBIS_outofmem); + memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count); + for (i=0; i < f->codebook_count; ++i) { + uint32 *values; + int ordered, sorted_count; + int total=0; + uint8 *lengths; + Codebook *c = f->codebooks+i; + CHECK(f); + x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup); + x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup); + x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup); + x = get_bits(f, 8); + c->dimensions = (get_bits(f, 8)<<8) + x; + x = get_bits(f, 8); + y = get_bits(f, 8); + c->entries = (get_bits(f, 8)<<16) + (y<<8) + x; + ordered = get_bits(f,1); + c->sparse = ordered ? 0 : get_bits(f,1); + + if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup); + + if (c->sparse) + lengths = (uint8 *) setup_temp_malloc(f, c->entries); + else + lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries); + + if (!lengths) return error(f, VORBIS_outofmem); + + if (ordered) { + int current_entry = 0; + int current_length = get_bits(f,5) + 1; + while (current_entry < c->entries) { + int limit = c->entries - current_entry; + int n = get_bits(f, ilog(limit)); + if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); } + memset(lengths + current_entry, current_length, n); + current_entry += n; + ++current_length; + } + } else { + for (j=0; j < c->entries; ++j) { + int present = c->sparse ? get_bits(f,1) : 1; + if (present) { + lengths[j] = get_bits(f, 5) + 1; + ++total; + if (lengths[j] == 32) + return error(f, VORBIS_invalid_setup); + } else { + lengths[j] = NO_CODE; + } + } + } + + if (c->sparse && total >= c->entries >> 2) { + // convert sparse items to non-sparse! + if (c->entries > (int) f->setup_temp_memory_required) + f->setup_temp_memory_required = c->entries; + + c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries); + if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem); + memcpy(c->codeword_lengths, lengths, c->entries); + setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs! + lengths = c->codeword_lengths; + c->sparse = 0; + } + + // compute the size of the sorted tables + if (c->sparse) { + sorted_count = total; + } else { + sorted_count = 0; + #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH + for (j=0; j < c->entries; ++j) + if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) + ++sorted_count; + #endif + } + + c->sorted_entries = sorted_count; + values = NULL; + + CHECK(f); + if (!c->sparse) { + c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries); + if (!c->codewords) return error(f, VORBIS_outofmem); + } else { + unsigned int size; + if (c->sorted_entries) { + c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries); + if (!c->codeword_lengths) return error(f, VORBIS_outofmem); + c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries); + if (!c->codewords) return error(f, VORBIS_outofmem); + values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries); + if (!values) return error(f, VORBIS_outofmem); + } + size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries; + if (size > f->setup_temp_memory_required) + f->setup_temp_memory_required = size; + } + + if (!compute_codewords(c, lengths, c->entries, values)) { + if (c->sparse) setup_temp_free(f, values, 0); + return error(f, VORBIS_invalid_setup); + } + + if (c->sorted_entries) { + // allocate an extra slot for sentinels + c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1)); + if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem); + // allocate an extra slot at the front so that c->sorted_values[-1] is defined + // so that we can catch that case without an extra if + c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1)); + if (c->sorted_values == NULL) return error(f, VORBIS_outofmem); + ++c->sorted_values; + c->sorted_values[-1] = -1; + compute_sorted_huffman(c, lengths, values); + } + + if (c->sparse) { + setup_temp_free(f, values, sizeof(*values)*c->sorted_entries); + setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries); + setup_temp_free(f, lengths, c->entries); + c->codewords = NULL; + } + + compute_accelerated_huffman(c); + + CHECK(f); + c->lookup_type = get_bits(f, 4); + if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup); + if (c->lookup_type > 0) { + uint16 *mults; + c->minimum_value = float32_unpack(get_bits(f, 32)); + c->delta_value = float32_unpack(get_bits(f, 32)); + c->value_bits = get_bits(f, 4)+1; + c->sequence_p = get_bits(f,1); + if (c->lookup_type == 1) { + c->lookup_values = lookup1_values(c->entries, c->dimensions); + } else { + c->lookup_values = c->entries * c->dimensions; + } + if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup); + mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values); + if (mults == NULL) return error(f, VORBIS_outofmem); + for (j=0; j < (int) c->lookup_values; ++j) { + int q = get_bits(f, c->value_bits); + if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); } + mults[j] = q; + } + +#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK + if (c->lookup_type == 1) { + int len, sparse = c->sparse; + float last=0; + // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop + if (sparse) { + if (c->sorted_entries == 0) goto skip; + c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions); + } else + c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions); + if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); } + len = sparse ? c->sorted_entries : c->entries; + for (j=0; j < len; ++j) { + unsigned int z = sparse ? c->sorted_values[j] : j; + unsigned int div=1; + for (k=0; k < c->dimensions; ++k) { + int off = (z / div) % c->lookup_values; + float val = mults[off]; + val = mults[off]*c->delta_value + c->minimum_value + last; + c->multiplicands[j*c->dimensions + k] = val; + if (c->sequence_p) + last = val; + if (k+1 < c->dimensions) { + if (div > UINT_MAX / (unsigned int) c->lookup_values) { + setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); + return error(f, VORBIS_invalid_setup); + } + div *= c->lookup_values; + } + } + } + c->lookup_type = 2; + } + else +#endif + { + float last=0; + CHECK(f); + c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values); + if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); } + for (j=0; j < (int) c->lookup_values; ++j) { + float val = mults[j] * c->delta_value + c->minimum_value + last; + c->multiplicands[j] = val; + if (c->sequence_p) + last = val; + } + } +#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK + skip:; +#endif + setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values); + + CHECK(f); + } + CHECK(f); + } + + // time domain transfers (notused) + + x = get_bits(f, 6) + 1; + for (i=0; i < x; ++i) { + uint32 z = get_bits(f, 16); + if (z != 0) return error(f, VORBIS_invalid_setup); + } + + // Floors + f->floor_count = get_bits(f, 6)+1; + f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config)); + if (f->floor_config == NULL) return error(f, VORBIS_outofmem); + for (i=0; i < f->floor_count; ++i) { + f->floor_types[i] = get_bits(f, 16); + if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup); + if (f->floor_types[i] == 0) { + Floor0 *g = &f->floor_config[i].floor0; + g->order = get_bits(f,8); + g->rate = get_bits(f,16); + g->bark_map_size = get_bits(f,16); + g->amplitude_bits = get_bits(f,6); + g->amplitude_offset = get_bits(f,8); + g->number_of_books = get_bits(f,4) + 1; + for (j=0; j < g->number_of_books; ++j) + g->book_list[j] = get_bits(f,8); + return error(f, VORBIS_feature_not_supported); + } else { + Point p[31*8+2]; + Floor1 *g = &f->floor_config[i].floor1; + int max_class = -1; + g->partitions = get_bits(f, 5); + for (j=0; j < g->partitions; ++j) { + g->partition_class_list[j] = get_bits(f, 4); + if (g->partition_class_list[j] > max_class) + max_class = g->partition_class_list[j]; + } + for (j=0; j <= max_class; ++j) { + g->class_dimensions[j] = get_bits(f, 3)+1; + g->class_subclasses[j] = get_bits(f, 2); + if (g->class_subclasses[j]) { + g->class_masterbooks[j] = get_bits(f, 8); + if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup); + } + for (k=0; k < 1 << g->class_subclasses[j]; ++k) { + g->subclass_books[j][k] = get_bits(f,8)-1; + if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup); + } + } + g->floor1_multiplier = get_bits(f,2)+1; + g->rangebits = get_bits(f,4); + g->Xlist[0] = 0; + g->Xlist[1] = 1 << g->rangebits; + g->values = 2; + for (j=0; j < g->partitions; ++j) { + int c = g->partition_class_list[j]; + for (k=0; k < g->class_dimensions[c]; ++k) { + g->Xlist[g->values] = get_bits(f, g->rangebits); + ++g->values; + } + } + // precompute the sorting + for (j=0; j < g->values; ++j) { + p[j].x = g->Xlist[j]; + p[j].y = j; + } + qsort(p, g->values, sizeof(p[0]), point_compare); + for (j=0; j < g->values; ++j) + g->sorted_order[j] = (uint8) p[j].y; + // precompute the neighbors + for (j=2; j < g->values; ++j) { + int low,hi; + neighbors(g->Xlist, j, &low,&hi); + g->neighbors[j][0] = low; + g->neighbors[j][1] = hi; + } + + if (g->values > longest_floorlist) + longest_floorlist = g->values; + } + } + + // Residue + f->residue_count = get_bits(f, 6)+1; + f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0])); + if (f->residue_config == NULL) return error(f, VORBIS_outofmem); + memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0])); + for (i=0; i < f->residue_count; ++i) { + uint8 residue_cascade[64]; + Residue *r = f->residue_config+i; + f->residue_types[i] = get_bits(f, 16); + if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup); + r->begin = get_bits(f, 24); + r->end = get_bits(f, 24); + if (r->end < r->begin) return error(f, VORBIS_invalid_setup); + r->part_size = get_bits(f,24)+1; + r->classifications = get_bits(f,6)+1; + r->classbook = get_bits(f,8); + if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup); + for (j=0; j < r->classifications; ++j) { + uint8 high_bits=0; + uint8 low_bits=get_bits(f,3); + if (get_bits(f,1)) + high_bits = get_bits(f,5); + residue_cascade[j] = high_bits*8 + low_bits; + } + r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications); + if (r->residue_books == NULL) return error(f, VORBIS_outofmem); + for (j=0; j < r->classifications; ++j) { + for (k=0; k < 8; ++k) { + if (residue_cascade[j] & (1 << k)) { + r->residue_books[j][k] = get_bits(f, 8); + if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup); + } else { + r->residue_books[j][k] = -1; + } + } + } + // precompute the classifications[] array to avoid inner-loop mod/divide + // call it 'classdata' since we already have r->classifications + r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries); + if (!r->classdata) return error(f, VORBIS_outofmem); + memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries); + for (j=0; j < f->codebooks[r->classbook].entries; ++j) { + int classwords = f->codebooks[r->classbook].dimensions; + int temp = j; + r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords); + if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem); + for (k=classwords-1; k >= 0; --k) { + r->classdata[j][k] = temp % r->classifications; + temp /= r->classifications; + } + } + } + + f->mapping_count = get_bits(f,6)+1; + f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping)); + if (f->mapping == NULL) return error(f, VORBIS_outofmem); + memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping)); + for (i=0; i < f->mapping_count; ++i) { + Mapping *m = f->mapping + i; + int mapping_type = get_bits(f,16); + if (mapping_type != 0) return error(f, VORBIS_invalid_setup); + m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan)); + if (m->chan == NULL) return error(f, VORBIS_outofmem); + if (get_bits(f,1)) + m->submaps = get_bits(f,4)+1; + else + m->submaps = 1; + if (m->submaps > max_submaps) + max_submaps = m->submaps; + if (get_bits(f,1)) { + m->coupling_steps = get_bits(f,8)+1; + for (k=0; k < m->coupling_steps; ++k) { + m->chan[k].magnitude = get_bits(f, ilog(f->channels-1)); + m->chan[k].angle = get_bits(f, ilog(f->channels-1)); + if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup); + if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup); + if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup); + } + } else + m->coupling_steps = 0; + + // reserved field + if (get_bits(f,2)) return error(f, VORBIS_invalid_setup); + if (m->submaps > 1) { + for (j=0; j < f->channels; ++j) { + m->chan[j].mux = get_bits(f, 4); + if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup); + } + } else + // @SPECIFICATION: this case is missing from the spec + for (j=0; j < f->channels; ++j) + m->chan[j].mux = 0; + + for (j=0; j < m->submaps; ++j) { + get_bits(f,8); // discard + m->submap_floor[j] = get_bits(f,8); + m->submap_residue[j] = get_bits(f,8); + if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup); + if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup); + } + } + + // Modes + f->mode_count = get_bits(f, 6)+1; + for (i=0; i < f->mode_count; ++i) { + Mode *m = f->mode_config+i; + m->blockflag = get_bits(f,1); + m->windowtype = get_bits(f,16); + m->transformtype = get_bits(f,16); + m->mapping = get_bits(f,8); + if (m->windowtype != 0) return error(f, VORBIS_invalid_setup); + if (m->transformtype != 0) return error(f, VORBIS_invalid_setup); + if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup); + } + + flush_packet(f); + + f->previous_length = 0; + + for (i=0; i < f->channels; ++i) { + f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1); + f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2); + f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist); + if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem); + #ifdef STB_VORBIS_NO_DEFER_FLOOR + f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2); + if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem); + #endif + } + + if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE; + if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE; + f->blocksize[0] = f->blocksize_0; + f->blocksize[1] = f->blocksize_1; + +#ifdef STB_VORBIS_DIVIDE_TABLE + if (integer_divide_table[1][1]==0) + for (i=0; i < DIVTAB_NUMER; ++i) + for (j=1; j < DIVTAB_DENOM; ++j) + integer_divide_table[i][j] = i / j; +#endif + + // compute how much temporary memory is needed + + // 1. + { + uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1); + uint32 classify_mem; + int i,max_part_read=0; + for (i=0; i < f->residue_count; ++i) { + Residue *r = f->residue_config + i; + int n_read = r->end - r->begin; + int part_read = n_read / r->part_size; + if (part_read > max_part_read) + max_part_read = part_read; + } + #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE + classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *)); + #else + classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *)); + #endif + + f->temp_memory_required = classify_mem; + if (imdct_mem > f->temp_memory_required) + f->temp_memory_required = imdct_mem; + } + + f->first_decode = TRUE; + + if (f->alloc.alloc_buffer) { + assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes); + // check if there's enough temp memory so we don't error later + if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset) + return error(f, VORBIS_outofmem); + } + + f->first_audio_page_offset = stb_vorbis_get_file_offset(f); + + return TRUE; +} + +static void vorbis_deinit(stb_vorbis *p) +{ + int i,j; + if (p->residue_config) { + for (i=0; i < p->residue_count; ++i) { + Residue *r = p->residue_config+i; + if (r->classdata) { + for (j=0; j < p->codebooks[r->classbook].entries; ++j) + setup_free(p, r->classdata[j]); + setup_free(p, r->classdata); + } + setup_free(p, r->residue_books); + } + } + + if (p->codebooks) { + CHECK(p); + for (i=0; i < p->codebook_count; ++i) { + Codebook *c = p->codebooks + i; + setup_free(p, c->codeword_lengths); + setup_free(p, c->multiplicands); + setup_free(p, c->codewords); + setup_free(p, c->sorted_codewords); + // c->sorted_values[-1] is the first entry in the array + setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL); + } + setup_free(p, p->codebooks); + } + setup_free(p, p->floor_config); + setup_free(p, p->residue_config); + if (p->mapping) { + for (i=0; i < p->mapping_count; ++i) + setup_free(p, p->mapping[i].chan); + setup_free(p, p->mapping); + } + CHECK(p); + for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) { + setup_free(p, p->channel_buffers[i]); + setup_free(p, p->previous_window[i]); + #ifdef STB_VORBIS_NO_DEFER_FLOOR + setup_free(p, p->floor_buffers[i]); + #endif + setup_free(p, p->finalY[i]); + } + for (i=0; i < 2; ++i) { + setup_free(p, p->A[i]); + setup_free(p, p->B[i]); + setup_free(p, p->C[i]); + setup_free(p, p->window[i]); + setup_free(p, p->bit_reverse[i]); + } + #ifndef STB_VORBIS_NO_STDIO + if (p->close_on_free) fclose(p->f); + #endif +} + +void stb_vorbis_close(stb_vorbis *p) +{ + if (p == NULL) return; + vorbis_deinit(p); + setup_free(p,p); +} + +static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z) +{ + memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start + if (z) { + p->alloc = *z; + p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3; + p->temp_offset = p->alloc.alloc_buffer_length_in_bytes; + } + p->eof = 0; + p->error = VORBIS__no_error; + p->stream = NULL; + p->codebooks = NULL; + p->page_crc_tests = -1; + #ifndef STB_VORBIS_NO_STDIO + p->close_on_free = FALSE; + p->f = NULL; + #endif +} + +int stb_vorbis_get_sample_offset(stb_vorbis *f) +{ + if (f->current_loc_valid) + return f->current_loc; + else + return -1; +} + +stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f) +{ + stb_vorbis_info d; + d.channels = f->channels; + d.sample_rate = f->sample_rate; + d.setup_memory_required = f->setup_memory_required; + d.setup_temp_memory_required = f->setup_temp_memory_required; + d.temp_memory_required = f->temp_memory_required; + d.max_frame_size = f->blocksize_1 >> 1; + return d; +} + +int stb_vorbis_get_error(stb_vorbis *f) +{ + int e = f->error; + f->error = VORBIS__no_error; + return e; +} + +static stb_vorbis * vorbis_alloc(stb_vorbis *f) +{ + stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p)); + return p; +} + +#ifndef STB_VORBIS_NO_PUSHDATA_API + +void stb_vorbis_flush_pushdata(stb_vorbis *f) +{ + f->previous_length = 0; + f->page_crc_tests = 0; + f->discard_samples_deferred = 0; + f->current_loc_valid = FALSE; + f->first_decode = FALSE; + f->samples_output = 0; + f->channel_buffer_start = 0; + f->channel_buffer_end = 0; +} + +static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len) +{ + int i,n; + for (i=0; i < f->page_crc_tests; ++i) + f->scan[i].bytes_done = 0; + + // if we have room for more scans, search for them first, because + // they may cause us to stop early if their header is incomplete + if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) { + if (data_len < 4) return 0; + data_len -= 3; // need to look for 4-byte sequence, so don't miss + // one that straddles a boundary + for (i=0; i < data_len; ++i) { + if (data[i] == 0x4f) { + if (0==memcmp(data+i, ogg_page_header, 4)) { + int j,len; + uint32 crc; + // make sure we have the whole page header + if (i+26 >= data_len || i+27+data[i+26] >= data_len) { + // only read up to this page start, so hopefully we'll + // have the whole page header start next time + data_len = i; + break; + } + // ok, we have it all; compute the length of the page + len = 27 + data[i+26]; + for (j=0; j < data[i+26]; ++j) + len += data[i+27+j]; + // scan everything up to the embedded crc (which we must 0) + crc = 0; + for (j=0; j < 22; ++j) + crc = crc32_update(crc, data[i+j]); + // now process 4 0-bytes + for ( ; j < 26; ++j) + crc = crc32_update(crc, 0); + // len is the total number of bytes we need to scan + n = f->page_crc_tests++; + f->scan[n].bytes_left = len-j; + f->scan[n].crc_so_far = crc; + f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24); + // if the last frame on a page is continued to the next, then + // we can't recover the sample_loc immediately + if (data[i+27+data[i+26]-1] == 255) + f->scan[n].sample_loc = ~0; + else + f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24); + f->scan[n].bytes_done = i+j; + if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT) + break; + // keep going if we still have room for more + } + } + } + } + + for (i=0; i < f->page_crc_tests;) { + uint32 crc; + int j; + int n = f->scan[i].bytes_done; + int m = f->scan[i].bytes_left; + if (m > data_len - n) m = data_len - n; + // m is the bytes to scan in the current chunk + crc = f->scan[i].crc_so_far; + for (j=0; j < m; ++j) + crc = crc32_update(crc, data[n+j]); + f->scan[i].bytes_left -= m; + f->scan[i].crc_so_far = crc; + if (f->scan[i].bytes_left == 0) { + // does it match? + if (f->scan[i].crc_so_far == f->scan[i].goal_crc) { + // Houston, we have page + data_len = n+m; // consumption amount is wherever that scan ended + f->page_crc_tests = -1; // drop out of page scan mode + f->previous_length = 0; // decode-but-don't-output one frame + f->next_seg = -1; // start a new page + f->current_loc = f->scan[i].sample_loc; // set the current sample location + // to the amount we'd have decoded had we decoded this page + f->current_loc_valid = f->current_loc != ~0U; + return data_len; + } + // delete entry + f->scan[i] = f->scan[--f->page_crc_tests]; + } else { + ++i; + } + } + + return data_len; +} + +// return value: number of bytes we used +int stb_vorbis_decode_frame_pushdata( + stb_vorbis *f, // the file we're decoding + const uint8 *data, int data_len, // the memory available for decoding + int *channels, // place to write number of float * buffers + float ***output, // place to write float ** array of float * buffers + int *samples // place to write number of output samples + ) +{ + int i; + int len,right,left; + + if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); + + if (f->page_crc_tests >= 0) { + *samples = 0; + return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len); + } + + f->stream = (uint8 *) data; + f->stream_end = (uint8 *) data + data_len; + f->error = VORBIS__no_error; + + // check that we have the entire packet in memory + if (!is_whole_packet_present(f, FALSE)) { + *samples = 0; + return 0; + } + + if (!vorbis_decode_packet(f, &len, &left, &right)) { + // save the actual error we encountered + enum STBVorbisError error = f->error; + if (error == VORBIS_bad_packet_type) { + // flush and resynch + f->error = VORBIS__no_error; + while (get8_packet(f) != EOP) + if (f->eof) break; + *samples = 0; + return (int) (f->stream - data); + } + if (error == VORBIS_continued_packet_flag_invalid) { + if (f->previous_length == 0) { + // we may be resynching, in which case it's ok to hit one + // of these; just discard the packet + f->error = VORBIS__no_error; + while (get8_packet(f) != EOP) + if (f->eof) break; + *samples = 0; + return (int) (f->stream - data); + } + } + // if we get an error while parsing, what to do? + // well, it DEFINITELY won't work to continue from where we are! + stb_vorbis_flush_pushdata(f); + // restore the error that actually made us bail + f->error = error; + *samples = 0; + return 1; + } + + // success! + len = vorbis_finish_frame(f, len, left, right); + for (i=0; i < f->channels; ++i) + f->outputs[i] = f->channel_buffers[i] + left; + + if (channels) *channels = f->channels; + *samples = len; + *output = f->outputs; + return (int) (f->stream - data); +} + +stb_vorbis *stb_vorbis_open_pushdata( + const unsigned char *data, int data_len, // the memory available for decoding + int *data_used, // only defined if result is not NULL + int *error, const stb_vorbis_alloc *alloc) +{ + stb_vorbis *f, p; + vorbis_init(&p, alloc); + p.stream = (uint8 *) data; + p.stream_end = (uint8 *) data + data_len; + p.push_mode = TRUE; + if (!start_decoder(&p)) { + if (p.eof) + *error = VORBIS_need_more_data; + else + *error = p.error; + return NULL; + } + f = vorbis_alloc(&p); + if (f) { + *f = p; + *data_used = (int) (f->stream - data); + *error = 0; + return f; + } else { + vorbis_deinit(&p); + return NULL; + } +} +#endif // STB_VORBIS_NO_PUSHDATA_API + +unsigned int stb_vorbis_get_file_offset(stb_vorbis *f) +{ + #ifndef STB_VORBIS_NO_PUSHDATA_API + if (f->push_mode) return 0; + #endif + if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start); + #ifndef STB_VORBIS_NO_STDIO + return (unsigned int) (ftell(f->f) - f->f_start); + #endif +} + +#ifndef STB_VORBIS_NO_PULLDATA_API +// +// DATA-PULLING API +// + +static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last) +{ + for(;;) { + int n; + if (f->eof) return 0; + n = get8(f); + if (n == 0x4f) { // page header candidate + unsigned int retry_loc = stb_vorbis_get_file_offset(f); + int i; + // check if we're off the end of a file_section stream + if (retry_loc - 25 > f->stream_len) + return 0; + // check the rest of the header + for (i=1; i < 4; ++i) + if (get8(f) != ogg_page_header[i]) + break; + if (f->eof) return 0; + if (i == 4) { + uint8 header[27]; + uint32 i, crc, goal, len; + for (i=0; i < 4; ++i) + header[i] = ogg_page_header[i]; + for (; i < 27; ++i) + header[i] = get8(f); + if (f->eof) return 0; + if (header[4] != 0) goto invalid; + goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24); + for (i=22; i < 26; ++i) + header[i] = 0; + crc = 0; + for (i=0; i < 27; ++i) + crc = crc32_update(crc, header[i]); + len = 0; + for (i=0; i < header[26]; ++i) { + int s = get8(f); + crc = crc32_update(crc, s); + len += s; + } + if (len && f->eof) return 0; + for (i=0; i < len; ++i) + crc = crc32_update(crc, get8(f)); + // finished parsing probable page + if (crc == goal) { + // we could now check that it's either got the last + // page flag set, OR it's followed by the capture + // pattern, but I guess TECHNICALLY you could have + // a file with garbage between each ogg page and recover + // from it automatically? So even though that paranoia + // might decrease the chance of an invalid decode by + // another 2^32, not worth it since it would hose those + // invalid-but-useful files? + if (end) + *end = stb_vorbis_get_file_offset(f); + if (last) { + if (header[5] & 0x04) + *last = 1; + else + *last = 0; + } + set_file_offset(f, retry_loc-1); + return 1; + } + } + invalid: + // not a valid page, so rewind and look for next one + set_file_offset(f, retry_loc); + } + } +} + + +#define SAMPLE_unknown 0xffffffff + +// seeking is implemented with a binary search, which narrows down the range to +// 64K, before using a linear search (because finding the synchronization +// pattern can be expensive, and the chance we'd find the end page again is +// relatively high for small ranges) +// +// two initial interpolation-style probes are used at the start of the search +// to try to bound either side of the binary search sensibly, while still +// working in O(log n) time if they fail. + +static int get_seek_page_info(stb_vorbis *f, ProbedPage *z) +{ + uint8 header[27], lacing[255]; + int i,len; + + // record where the page starts + z->page_start = stb_vorbis_get_file_offset(f); + + // parse the header + getn(f, header, 27); + if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') + return 0; + getn(f, lacing, header[26]); + + // determine the length of the payload + len = 0; + for (i=0; i < header[26]; ++i) + len += lacing[i]; + + // this implies where the page ends + z->page_end = z->page_start + 27 + header[26] + len; + + // read the last-decoded sample out of the data + z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24); + + // restore file state to where we were + set_file_offset(f, z->page_start); + return 1; +} + +// rarely used function to seek back to the preceeding page while finding the +// start of a packet +static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset) +{ + unsigned int previous_safe, end; + + // now we want to seek back 64K from the limit + if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset) + previous_safe = limit_offset - 65536; + else + previous_safe = f->first_audio_page_offset; + + set_file_offset(f, previous_safe); + + while (vorbis_find_page(f, &end, NULL)) { + if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset) + return 1; + set_file_offset(f, end); + } + + return 0; +} + +// implements the search logic for finding a page and starting decoding. if +// the function succeeds, current_loc_valid will be true and current_loc will +// be less than or equal to the provided sample number (the closer the +// better). +static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number) +{ + ProbedPage left, right, mid; + int i, start_seg_with_known_loc, end_pos, page_start; + uint32 delta, stream_length, padding; + double offset, bytes_per_sample; + int probe = 0; + + // find the last page and validate the target sample + stream_length = stb_vorbis_stream_length_in_samples(f); + if (stream_length == 0) return error(f, VORBIS_seek_without_length); + if (sample_number > stream_length) return error(f, VORBIS_seek_invalid); + + // this is the maximum difference between the window-center (which is the + // actual granule position value), and the right-start (which the spec + // indicates should be the granule position (give or take one)). + padding = ((f->blocksize_1 - f->blocksize_0) >> 2); + if (sample_number < padding) + sample_number = 0; + else + sample_number -= padding; + + left = f->p_first; + while (left.last_decoded_sample == ~0U) { + // (untested) the first page does not have a 'last_decoded_sample' + set_file_offset(f, left.page_end); + if (!get_seek_page_info(f, &left)) goto error; + } + + right = f->p_last; + assert(right.last_decoded_sample != ~0U); + + // starting from the start is handled differently + if (sample_number <= left.last_decoded_sample) { + stb_vorbis_seek_start(f); + return 1; + } + + while (left.page_end != right.page_start) { + assert(left.page_end < right.page_start); + // search range in bytes + delta = right.page_start - left.page_end; + if (delta <= 65536) { + // there's only 64K left to search - handle it linearly + set_file_offset(f, left.page_end); + } else { + if (probe < 2) { + if (probe == 0) { + // first probe (interpolate) + double data_bytes = right.page_end - left.page_start; + bytes_per_sample = data_bytes / right.last_decoded_sample; + offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample); + } else { + // second probe (try to bound the other side) + double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample; + if (error >= 0 && error < 8000) error = 8000; + if (error < 0 && error > -8000) error = -8000; + offset += error * 2; + } + + // ensure the offset is valid + if (offset < left.page_end) + offset = left.page_end; + if (offset > right.page_start - 65536) + offset = right.page_start - 65536; + + set_file_offset(f, (unsigned int) offset); + } else { + // binary search for large ranges (offset by 32K to ensure + // we don't hit the right page) + set_file_offset(f, left.page_end + (delta / 2) - 32768); + } + + if (!vorbis_find_page(f, NULL, NULL)) goto error; + } + + for (;;) { + if (!get_seek_page_info(f, &mid)) goto error; + if (mid.last_decoded_sample != ~0U) break; + // (untested) no frames end on this page + set_file_offset(f, mid.page_end); + assert(mid.page_start < right.page_start); + } + + // if we've just found the last page again then we're in a tricky file, + // and we're close enough. + if (mid.page_start == right.page_start) + break; + + if (sample_number < mid.last_decoded_sample) + right = mid; + else + left = mid; + + ++probe; + } + + // seek back to start of the last packet + page_start = left.page_start; + set_file_offset(f, page_start); + if (!start_page(f)) return error(f, VORBIS_seek_failed); + end_pos = f->end_seg_with_known_loc; + assert(end_pos >= 0); + + for (;;) { + for (i = end_pos; i > 0; --i) + if (f->segments[i-1] != 255) + break; + + start_seg_with_known_loc = i; + + if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet)) + break; + + // (untested) the final packet begins on an earlier page + if (!go_to_page_before(f, page_start)) + goto error; + + page_start = stb_vorbis_get_file_offset(f); + if (!start_page(f)) goto error; + end_pos = f->segment_count - 1; + } + + // prepare to start decoding + f->current_loc_valid = FALSE; + f->last_seg = FALSE; + f->valid_bits = 0; + f->packet_bytes = 0; + f->bytes_in_seg = 0; + f->previous_length = 0; + f->next_seg = start_seg_with_known_loc; + + for (i = 0; i < start_seg_with_known_loc; i++) + skip(f, f->segments[i]); + + // start decoding (optimizable - this frame is generally discarded) + vorbis_pump_first_frame(f); + return 1; + +error: + // try to restore the file to a valid state + stb_vorbis_seek_start(f); + return error(f, VORBIS_seek_failed); +} + +// the same as vorbis_decode_initial, but without advancing +static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode) +{ + int bits_read, bytes_read; + + if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) + return 0; + + // either 1 or 2 bytes were read, figure out which so we can rewind + bits_read = 1 + ilog(f->mode_count-1); + if (f->mode_config[*mode].blockflag) + bits_read += 2; + bytes_read = (bits_read + 7) / 8; + + f->bytes_in_seg += bytes_read; + f->packet_bytes -= bytes_read; + skip(f, -bytes_read); + if (f->next_seg == -1) + f->next_seg = f->segment_count - 1; + else + f->next_seg--; + f->valid_bits = 0; + + return 1; +} + +int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number) +{ + uint32 max_frame_samples; + + if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); + + // fast page-level search + if (!seek_to_sample_coarse(f, sample_number)) + return 0; + + assert(f->current_loc_valid); + assert(f->current_loc <= sample_number); + + // linear search for the relevant packet + max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2; + while (f->current_loc < sample_number) { + int left_start, left_end, right_start, right_end, mode, frame_samples; + if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode)) + return error(f, VORBIS_seek_failed); + // calculate the number of samples returned by the next frame + frame_samples = right_start - left_start; + if (f->current_loc + frame_samples > sample_number) { + return 1; // the next frame will contain the sample + } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) { + // there's a chance the frame after this could contain the sample + vorbis_pump_first_frame(f); + } else { + // this frame is too early to be relevant + f->current_loc += frame_samples; + f->previous_length = 0; + maybe_start_packet(f); + flush_packet(f); + } + } + // the next frame will start with the sample + assert(f->current_loc == sample_number); + return 1; +} + +int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number) +{ + if (!stb_vorbis_seek_frame(f, sample_number)) + return 0; + + if (sample_number != f->current_loc) { + int n; + uint32 frame_start = f->current_loc; + stb_vorbis_get_frame_float(f, &n, NULL); + assert(sample_number > frame_start); + assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end); + f->channel_buffer_start += (sample_number - frame_start); + } + + return 1; +} + +void stb_vorbis_seek_start(stb_vorbis *f) +{ + if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; } + set_file_offset(f, f->first_audio_page_offset); + f->previous_length = 0; + f->first_decode = TRUE; + f->next_seg = -1; + vorbis_pump_first_frame(f); +} + +unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f) +{ + unsigned int restore_offset, previous_safe; + unsigned int end, last_page_loc; + + if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); + if (!f->total_samples) { + unsigned int last; + uint32 lo,hi; + char header[6]; + + // first, store the current decode position so we can restore it + restore_offset = stb_vorbis_get_file_offset(f); + + // now we want to seek back 64K from the end (the last page must + // be at most a little less than 64K, but let's allow a little slop) + if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset) + previous_safe = f->stream_len - 65536; + else + previous_safe = f->first_audio_page_offset; + + set_file_offset(f, previous_safe); + // previous_safe is now our candidate 'earliest known place that seeking + // to will lead to the final page' + + if (!vorbis_find_page(f, &end, &last)) { + // if we can't find a page, we're hosed! + f->error = VORBIS_cant_find_last_page; + f->total_samples = 0xffffffff; + goto done; + } + + // check if there are more pages + last_page_loc = stb_vorbis_get_file_offset(f); + + // stop when the last_page flag is set, not when we reach eof; + // this allows us to stop short of a 'file_section' end without + // explicitly checking the length of the section + while (!last) { + set_file_offset(f, end); + if (!vorbis_find_page(f, &end, &last)) { + // the last page we found didn't have the 'last page' flag + // set. whoops! + break; + } + previous_safe = last_page_loc+1; + last_page_loc = stb_vorbis_get_file_offset(f); + } + + set_file_offset(f, last_page_loc); + + // parse the header + getn(f, (unsigned char *)header, 6); + // extract the absolute granule position + lo = get32(f); + hi = get32(f); + if (lo == 0xffffffff && hi == 0xffffffff) { + f->error = VORBIS_cant_find_last_page; + f->total_samples = SAMPLE_unknown; + goto done; + } + if (hi) + lo = 0xfffffffe; // saturate + f->total_samples = lo; + + f->p_last.page_start = last_page_loc; + f->p_last.page_end = end; + f->p_last.last_decoded_sample = lo; + + done: + set_file_offset(f, restore_offset); + } + return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples; +} + +float stb_vorbis_stream_length_in_seconds(stb_vorbis *f) +{ + return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate; +} + + + +int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output) +{ + int len, right,left,i; + if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); + + if (!vorbis_decode_packet(f, &len, &left, &right)) { + f->channel_buffer_start = f->channel_buffer_end = 0; + return 0; + } + + len = vorbis_finish_frame(f, len, left, right); + for (i=0; i < f->channels; ++i) + f->outputs[i] = f->channel_buffers[i] + left; + + f->channel_buffer_start = left; + f->channel_buffer_end = left+len; + + if (channels) *channels = f->channels; + if (output) *output = f->outputs; + return len; +} + +#ifndef STB_VORBIS_NO_STDIO + +stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length) +{ + stb_vorbis *f, p; + vorbis_init(&p, alloc); + p.f = file; + p.f_start = (uint32) ftell(file); + p.stream_len = length; + p.close_on_free = close_on_free; + if (start_decoder(&p)) { + f = vorbis_alloc(&p); + if (f) { + *f = p; + vorbis_pump_first_frame(f); + return f; + } + } + if (error) *error = p.error; + vorbis_deinit(&p); + return NULL; +} + +stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc) +{ + unsigned int len, start; + start = (unsigned int) ftell(file); + fseek(file, 0, SEEK_END); + len = (unsigned int) (ftell(file) - start); + fseek(file, start, SEEK_SET); + return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len); +} + +stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc) +{ + FILE *f = fopen(filename, "rb"); + if (f) + return stb_vorbis_open_file(f, TRUE, error, alloc); + if (error) *error = VORBIS_file_open_failure; + return NULL; +} +#endif // STB_VORBIS_NO_STDIO + +stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc) +{ + stb_vorbis *f, p; + if (data == NULL) return NULL; + vorbis_init(&p, alloc); + p.stream = (uint8 *) data; + p.stream_end = (uint8 *) data + len; + p.stream_start = (uint8 *) p.stream; + p.stream_len = len; + p.push_mode = FALSE; + if (start_decoder(&p)) { + f = vorbis_alloc(&p); + if (f) { + *f = p; + vorbis_pump_first_frame(f); + return f; + } + } + if (error) *error = p.error; + vorbis_deinit(&p); + return NULL; +} + +#ifndef STB_VORBIS_NO_INTEGER_CONVERSION +#define PLAYBACK_MONO 1 +#define PLAYBACK_LEFT 2 +#define PLAYBACK_RIGHT 4 + +#define L (PLAYBACK_LEFT | PLAYBACK_MONO) +#define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO) +#define R (PLAYBACK_RIGHT | PLAYBACK_MONO) + +static int8 channel_position[7][6] = +{ + { 0 }, + { C }, + { L, R }, + { L, C, R }, + { L, R, L, R }, + { L, C, R, L, R }, + { L, C, R, L, R, C }, +}; + + +#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT + typedef union { + float f; + int i; + } float_conv; + typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4]; + #define FASTDEF(x) float_conv x + // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round + #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT)) + #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22)) + #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s)) + #define check_endianness() +#else + #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s)))) + #define check_endianness() + #define FASTDEF(x) +#endif + +static void copy_samples(short *dest, float *src, int len) +{ + int i; + check_endianness(); + for (i=0; i < len; ++i) { + FASTDEF(temp); + int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15); + if ((unsigned int) (v + 32768) > 65535) + v = v < 0 ? -32768 : 32767; + dest[i] = v; + } +} + +static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len) +{ + #define BUFFER_SIZE 32 + float buffer[BUFFER_SIZE]; + int i,j,o,n = BUFFER_SIZE; + check_endianness(); + for (o = 0; o < len; o += BUFFER_SIZE) { + memset(buffer, 0, sizeof(buffer)); + if (o + n > len) n = len - o; + for (j=0; j < num_c; ++j) { + if (channel_position[num_c][j] & mask) { + for (i=0; i < n; ++i) + buffer[i] += data[j][d_offset+o+i]; + } + } + for (i=0; i < n; ++i) { + FASTDEF(temp); + int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15); + if ((unsigned int) (v + 32768) > 65535) + v = v < 0 ? -32768 : 32767; + output[o+i] = v; + } + } +} + +static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len) +{ + #define BUFFER_SIZE 32 + float buffer[BUFFER_SIZE]; + int i,j,o,n = BUFFER_SIZE >> 1; + // o is the offset in the source data + check_endianness(); + for (o = 0; o < len; o += BUFFER_SIZE >> 1) { + // o2 is the offset in the output data + int o2 = o << 1; + memset(buffer, 0, sizeof(buffer)); + if (o + n > len) n = len - o; + for (j=0; j < num_c; ++j) { + int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT); + if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) { + for (i=0; i < n; ++i) { + buffer[i*2+0] += data[j][d_offset+o+i]; + buffer[i*2+1] += data[j][d_offset+o+i]; + } + } else if (m == PLAYBACK_LEFT) { + for (i=0; i < n; ++i) { + buffer[i*2+0] += data[j][d_offset+o+i]; + } + } else if (m == PLAYBACK_RIGHT) { + for (i=0; i < n; ++i) { + buffer[i*2+1] += data[j][d_offset+o+i]; + } + } + } + for (i=0; i < (n<<1); ++i) { + FASTDEF(temp); + int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15); + if ((unsigned int) (v + 32768) > 65535) + v = v < 0 ? -32768 : 32767; + output[o2+i] = v; + } + } +} + +static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples) +{ + int i; + if (buf_c != data_c && buf_c <= 2 && data_c <= 6) { + static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} }; + for (i=0; i < buf_c; ++i) + compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples); + } else { + int limit = buf_c < data_c ? buf_c : data_c; + for (i=0; i < limit; ++i) + copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples); + for ( ; i < buf_c; ++i) + memset(buffer[i]+b_offset, 0, sizeof(short) * samples); + } +} + +int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples) +{ + float **output; + int len = stb_vorbis_get_frame_float(f, NULL, &output); + if (len > num_samples) len = num_samples; + if (len) + convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len); + return len; +} + +static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len) +{ + int i; + check_endianness(); + if (buf_c != data_c && buf_c <= 2 && data_c <= 6) { + assert(buf_c == 2); + for (i=0; i < buf_c; ++i) + compute_stereo_samples(buffer, data_c, data, d_offset, len); + } else { + int limit = buf_c < data_c ? buf_c : data_c; + int j; + for (j=0; j < len; ++j) { + for (i=0; i < limit; ++i) { + FASTDEF(temp); + float f = data[i][d_offset+j]; + int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15); + if ((unsigned int) (v + 32768) > 65535) + v = v < 0 ? -32768 : 32767; + *buffer++ = v; + } + for ( ; i < buf_c; ++i) + *buffer++ = 0; + } + } +} + +int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts) +{ + float **output; + int len; + if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts); + len = stb_vorbis_get_frame_float(f, NULL, &output); + if (len) { + if (len*num_c > num_shorts) len = num_shorts / num_c; + convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len); + } + return len; +} + +int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts) +{ + float **outputs; + int len = num_shorts / channels; + int n=0; + int z = f->channels; + if (z > channels) z = channels; + while (n < len) { + int k = f->channel_buffer_end - f->channel_buffer_start; + if (n+k >= len) k = len - n; + if (k) + convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k); + buffer += k*channels; + n += k; + f->channel_buffer_start += k; + if (n == len) break; + if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break; + } + return n; +} + +int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len) +{ + float **outputs; + int n=0; + int z = f->channels; + if (z > channels) z = channels; + while (n < len) { + int k = f->channel_buffer_end - f->channel_buffer_start; + if (n+k >= len) k = len - n; + if (k) + convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k); + n += k; + f->channel_buffer_start += k; + if (n == len) break; + if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break; + } + return n; +} + +#ifndef STB_VORBIS_NO_STDIO +int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output) +{ + int data_len, offset, total, limit, error; + short *data; + stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL); + if (v == NULL) return -1; + limit = v->channels * 4096; + *channels = v->channels; + if (sample_rate) + *sample_rate = v->sample_rate; + offset = data_len = 0; + total = limit; + data = (short *) malloc(total * sizeof(*data)); + if (data == NULL) { + stb_vorbis_close(v); + return -2; + } + for (;;) { + int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset); + if (n == 0) break; + data_len += n; + offset += n * v->channels; + if (offset + limit > total) { + short *data2; + total *= 2; + data2 = (short *) realloc(data, total * sizeof(*data)); + if (data2 == NULL) { + free(data); + stb_vorbis_close(v); + return -2; + } + data = data2; + } + } + *output = data; + stb_vorbis_close(v); + return data_len; +} +#endif // NO_STDIO + +int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output) +{ + int data_len, offset, total, limit, error; + short *data; + stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL); + if (v == NULL) return -1; + limit = v->channels * 4096; + *channels = v->channels; + if (sample_rate) + *sample_rate = v->sample_rate; + offset = data_len = 0; + total = limit; + data = (short *) malloc(total * sizeof(*data)); + if (data == NULL) { + stb_vorbis_close(v); + return -2; + } + for (;;) { + int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset); + if (n == 0) break; + data_len += n; + offset += n * v->channels; + if (offset + limit > total) { + short *data2; + total *= 2; + data2 = (short *) realloc(data, total * sizeof(*data)); + if (data2 == NULL) { + free(data); + stb_vorbis_close(v); + return -2; + } + data = data2; + } + } + *output = data; + stb_vorbis_close(v); + return data_len; +} +#endif // STB_VORBIS_NO_INTEGER_CONVERSION + +int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats) +{ + float **outputs; + int len = num_floats / channels; + int n=0; + int z = f->channels; + if (z > channels) z = channels; + while (n < len) { + int i,j; + int k = f->channel_buffer_end - f->channel_buffer_start; + if (n+k >= len) k = len - n; + for (j=0; j < k; ++j) { + for (i=0; i < z; ++i) + *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j]; + for ( ; i < channels; ++i) + *buffer++ = 0; + } + n += k; + f->channel_buffer_start += k; + if (n == len) + break; + if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) + break; + } + return n; +} + +int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples) +{ + float **outputs; + int n=0; + int z = f->channels; + if (z > channels) z = channels; + while (n < num_samples) { + int i; + int k = f->channel_buffer_end - f->channel_buffer_start; + if (n+k >= num_samples) k = num_samples - n; + if (k) { + for (i=0; i < z; ++i) + memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k); + for ( ; i < channels; ++i) + memset(buffer[i]+n, 0, sizeof(float) * k); + } + n += k; + f->channel_buffer_start += k; + if (n == num_samples) + break; + if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) + break; + } + return n; +} +#endif // STB_VORBIS_NO_PULLDATA_API + +/* Version history + 1.09 - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version + 1.08 - 2016/04/02 - fixed multiple warnings; fix setup memory leaks; + avoid discarding last frame of audio data + 1.07 - 2015/01/16 - fixed some warnings, fix mingw, const-correct API + some more crash fixes when out of memory or with corrupt files + 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson) + some crash fixes when out of memory or with corrupt files + 1.05 - 2015/04/19 - don't define __forceinline if it's redundant + 1.04 - 2014/08/27 - fix missing const-correct case in API + 1.03 - 2014/08/07 - Warning fixes + 1.02 - 2014/07/09 - Declare qsort compare function _cdecl on windows + 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float + 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel + (API change) report sample rate for decode-full-file funcs + 0.99996 - bracket #include for macintosh compilation by Laurent Gomila + 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem + 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence + 0.99993 - remove assert that fired on legal files with empty tables + 0.99992 - rewind-to-start + 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo + 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++ + 0.9998 - add a full-decode function with a memory source + 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition + 0.9996 - query length of vorbis stream in samples/seconds + 0.9995 - bugfix to another optimization that only happened in certain files + 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors + 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation + 0.9992 - performance improvement of IMDCT; now performs close to reference implementation + 0.9991 - performance improvement of IMDCT + 0.999 - (should have been 0.9990) performance improvement of IMDCT + 0.998 - no-CRT support from Casey Muratori + 0.997 - bugfixes for bugs found by Terje Mathisen + 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen + 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen + 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen + 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen + 0.992 - fixes for MinGW warning + 0.991 - turn fast-float-conversion on by default + 0.990 - fix push-mode seek recovery if you seek into the headers + 0.98b - fix to bad release of 0.98 + 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode + 0.97 - builds under c++ (typecasting, don't use 'class' keyword) + 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code + 0.95 - clamping code for 16-bit functions + 0.94 - not publically released + 0.93 - fixed all-zero-floor case (was decoding garbage) + 0.92 - fixed a memory leak + 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION + 0.90 - first public release +*/ + +#endif // STB_VORBIS_HEADER_ONLY diff --git a/thirdparty/misc/yuv2rgb.h b/thirdparty/misc/yuv2rgb.h new file mode 100644 index 0000000000..a9bef76da8 --- /dev/null +++ b/thirdparty/misc/yuv2rgb.h @@ -0,0 +1,1123 @@ +/* Thirdparty code presumably from http://wss.co.uk/pinknoise/yuv2rgb/ */ +/* FIXME: Move to thirdparty dir */ + +#ifndef YUV2RGB_H +#define YUV2RGB_H + +#include "typedefs.h" + +static const uint32_t tables[256*3] = { + /* y_table */ + 0x7FFFFFEDU, + 0x7FFFFFEFU, + 0x7FFFFFF0U, + 0x7FFFFFF1U, + 0x7FFFFFF2U, + 0x7FFFFFF3U, + 0x7FFFFFF4U, + 0x7FFFFFF6U, + 0x7FFFFFF7U, + 0x7FFFFFF8U, + 0x7FFFFFF9U, + 0x7FFFFFFAU, + 0x7FFFFFFBU, + 0x7FFFFFFDU, + 0x7FFFFFFEU, + 0x7FFFFFFFU, + 0x80000000U, + 0x80400801U, + 0x80A01002U, + 0x80E01803U, + 0x81202805U, + 0x81803006U, + 0x81C03807U, + 0x82004008U, + 0x82604809U, + 0x82A0500AU, + 0x82E0600CU, + 0x8340680DU, + 0x8380700EU, + 0x83C0780FU, + 0x84208010U, + 0x84608811U, + 0x84A09813U, + 0x8500A014U, + 0x8540A815U, + 0x8580B016U, + 0x85E0B817U, + 0x8620C018U, + 0x8660D01AU, + 0x86C0D81BU, + 0x8700E01CU, + 0x8740E81DU, + 0x87A0F01EU, + 0x87E0F81FU, + 0x88210821U, + 0x88811022U, + 0x88C11823U, + 0x89012024U, + 0x89412825U, + 0x89A13026U, + 0x89E14028U, + 0x8A214829U, + 0x8A81502AU, + 0x8AC1582BU, + 0x8B01602CU, + 0x8B61682DU, + 0x8BA1782FU, + 0x8BE18030U, + 0x8C418831U, + 0x8C819032U, + 0x8CC19833U, + 0x8D21A034U, + 0x8D61B036U, + 0x8DA1B837U, + 0x8E01C038U, + 0x8E41C839U, + 0x8E81D03AU, + 0x8EE1D83BU, + 0x8F21E83DU, + 0x8F61F03EU, + 0x8FC1F83FU, + 0x90020040U, + 0x90420841U, + 0x90A21042U, + 0x90E22044U, + 0x91222845U, + 0x91823046U, + 0x91C23847U, + 0x92024048U, + 0x92624849U, + 0x92A2504AU, + 0x92E2604CU, + 0x9342684DU, + 0x9382704EU, + 0x93C2784FU, + 0x94228050U, + 0x94628851U, + 0x94A29853U, + 0x9502A054U, + 0x9542A855U, + 0x9582B056U, + 0x95E2B857U, + 0x9622C058U, + 0x9662D05AU, + 0x96C2D85BU, + 0x9702E05CU, + 0x9742E85DU, + 0x97A2F05EU, + 0x97E2F85FU, + 0x98230861U, + 0x98831062U, + 0x98C31863U, + 0x99032064U, + 0x99632865U, + 0x99A33066U, + 0x99E34068U, + 0x9A434869U, + 0x9A83506AU, + 0x9AC3586BU, + 0x9B23606CU, + 0x9B63686DU, + 0x9BA3786FU, + 0x9BE38070U, + 0x9C438871U, + 0x9C839072U, + 0x9CC39873U, + 0x9D23A074U, + 0x9D63B076U, + 0x9DA3B877U, + 0x9E03C078U, + 0x9E43C879U, + 0x9E83D07AU, + 0x9EE3D87BU, + 0x9F23E87DU, + 0x9F63F07EU, + 0x9FC3F87FU, + 0xA0040080U, + 0xA0440881U, + 0xA0A41082U, + 0xA0E42084U, + 0xA1242885U, + 0xA1843086U, + 0xA1C43887U, + 0xA2044088U, + 0xA2644889U, + 0xA2A4588BU, + 0xA2E4608CU, + 0xA344688DU, + 0xA384708EU, + 0xA3C4788FU, + 0xA4248090U, + 0xA4649092U, + 0xA4A49893U, + 0xA504A094U, + 0xA544A895U, + 0xA584B096U, + 0xA5E4B897U, + 0xA624C098U, + 0xA664D09AU, + 0xA6C4D89BU, + 0xA704E09CU, + 0xA744E89DU, + 0xA7A4F09EU, + 0xA7E4F89FU, + 0xA82508A1U, + 0xA88510A2U, + 0xA8C518A3U, + 0xA90520A4U, + 0xA96528A5U, + 0xA9A530A6U, + 0xA9E540A8U, + 0xAA4548A9U, + 0xAA8550AAU, + 0xAAC558ABU, + 0xAB2560ACU, + 0xAB6568ADU, + 0xABA578AFU, + 0xAC0580B0U, + 0xAC4588B1U, + 0xAC8590B2U, + 0xACE598B3U, + 0xAD25A0B4U, + 0xAD65B0B6U, + 0xADA5B8B7U, + 0xAE05C0B8U, + 0xAE45C8B9U, + 0xAE85D0BAU, + 0xAEE5D8BBU, + 0xAF25E8BDU, + 0xAF65F0BEU, + 0xAFC5F8BFU, + 0xB00600C0U, + 0xB04608C1U, + 0xB0A610C2U, + 0xB0E620C4U, + 0xB12628C5U, + 0xB18630C6U, + 0xB1C638C7U, + 0xB20640C8U, + 0xB26648C9U, + 0xB2A658CBU, + 0xB2E660CCU, + 0xB34668CDU, + 0xB38670CEU, + 0xB3C678CFU, + 0xB42680D0U, + 0xB46690D2U, + 0xB4A698D3U, + 0xB506A0D4U, + 0xB546A8D5U, + 0xB586B0D6U, + 0xB5E6B8D7U, + 0xB626C8D9U, + 0xB666D0DAU, + 0xB6C6D8DBU, + 0xB706E0DCU, + 0xB746E8DDU, + 0xB7A6F0DEU, + 0xB7E6F8DFU, + 0xB82708E1U, + 0xB88710E2U, + 0xB8C718E3U, + 0xB90720E4U, + 0xB96728E5U, + 0xB9A730E6U, + 0xB9E740E8U, + 0xBA4748E9U, + 0xBA8750EAU, + 0xBAC758EBU, + 0xBB2760ECU, + 0xBB6768EDU, + 0xBBA778EFU, + 0xBC0780F0U, + 0xBC4788F1U, + 0xBC8790F2U, + 0xBCE798F3U, + 0xBD27A0F4U, + 0xBD67B0F6U, + 0xBDC7B8F7U, + 0xBE07C0F8U, + 0xBE47C8F9U, + 0xBEA7D0FAU, + 0xBEE7D8FBU, + 0xBF27E8FDU, + 0xBF87F0FEU, + 0xBFC7F8FFU, + 0xC0080100U, + 0xC0480901U, + 0xC0A81102U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + 0xC0E82104U, + /* u_table */ + 0x0C400103U, + 0x0C200105U, + 0x0C200107U, + 0x0C000109U, + 0x0BE0010BU, + 0x0BC0010DU, + 0x0BA0010FU, + 0x0BA00111U, + 0x0B800113U, + 0x0B600115U, + 0x0B400117U, + 0x0B400119U, + 0x0B20011BU, + 0x0B00011DU, + 0x0AE0011FU, + 0x0AE00121U, + 0x0AC00123U, + 0x0AA00125U, + 0x0A800127U, + 0x0A600129U, + 0x0A60012BU, + 0x0A40012DU, + 0x0A20012FU, + 0x0A000131U, + 0x0A000132U, + 0x09E00134U, + 0x09C00136U, + 0x09A00138U, + 0x09A0013AU, + 0x0980013CU, + 0x0960013EU, + 0x09400140U, + 0x09400142U, + 0x09200144U, + 0x09000146U, + 0x08E00148U, + 0x08C0014AU, + 0x08C0014CU, + 0x08A0014EU, + 0x08800150U, + 0x08600152U, + 0x08600154U, + 0x08400156U, + 0x08200158U, + 0x0800015AU, + 0x0800015CU, + 0x07E0015EU, + 0x07C00160U, + 0x07A00162U, + 0x07A00164U, + 0x07800166U, + 0x07600168U, + 0x0740016AU, + 0x0720016CU, + 0x0720016EU, + 0x07000170U, + 0x06E00172U, + 0x06C00174U, + 0x06C00176U, + 0x06A00178U, + 0x0680017AU, + 0x0660017CU, + 0x0660017EU, + 0x06400180U, + 0x06200182U, + 0x06000184U, + 0x05E00185U, + 0x05E00187U, + 0x05C00189U, + 0x05A0018BU, + 0x0580018DU, + 0x0580018FU, + 0x05600191U, + 0x05400193U, + 0x05200195U, + 0x05200197U, + 0x05000199U, + 0x04E0019BU, + 0x04C0019DU, + 0x04C0019FU, + 0x04A001A1U, + 0x048001A3U, + 0x046001A5U, + 0x044001A7U, + 0x044001A9U, + 0x042001ABU, + 0x040001ADU, + 0x03E001AFU, + 0x03E001B1U, + 0x03C001B3U, + 0x03A001B5U, + 0x038001B7U, + 0x038001B9U, + 0x036001BBU, + 0x034001BDU, + 0x032001BFU, + 0x032001C1U, + 0x030001C3U, + 0x02E001C5U, + 0x02C001C7U, + 0x02A001C9U, + 0x02A001CBU, + 0x028001CDU, + 0x026001CFU, + 0x024001D1U, + 0x024001D3U, + 0x022001D5U, + 0x020001D7U, + 0x01E001D8U, + 0x01E001DAU, + 0x01C001DCU, + 0x01A001DEU, + 0x018001E0U, + 0x016001E2U, + 0x016001E4U, + 0x014001E6U, + 0x012001E8U, + 0x010001EAU, + 0x010001ECU, + 0x00E001EEU, + 0x00C001F0U, + 0x00A001F2U, + 0x00A001F4U, + 0x008001F6U, + 0x006001F8U, + 0x004001FAU, + 0x004001FCU, + 0x002001FEU, + 0x00000200U, + 0xFFE00202U, + 0xFFC00204U, + 0xFFC00206U, + 0xFFA00208U, + 0xFF80020AU, + 0xFF60020CU, + 0xFF60020EU, + 0xFF400210U, + 0xFF200212U, + 0xFF000214U, + 0xFF000216U, + 0xFEE00218U, + 0xFEC0021AU, + 0xFEA0021CU, + 0xFEA0021EU, + 0xFE800220U, + 0xFE600222U, + 0xFE400224U, + 0xFE200226U, + 0xFE200228U, + 0xFE000229U, + 0xFDE0022BU, + 0xFDC0022DU, + 0xFDC0022FU, + 0xFDA00231U, + 0xFD800233U, + 0xFD600235U, + 0xFD600237U, + 0xFD400239U, + 0xFD20023BU, + 0xFD00023DU, + 0xFCE0023FU, + 0xFCE00241U, + 0xFCC00243U, + 0xFCA00245U, + 0xFC800247U, + 0xFC800249U, + 0xFC60024BU, + 0xFC40024DU, + 0xFC20024FU, + 0xFC200251U, + 0xFC000253U, + 0xFBE00255U, + 0xFBC00257U, + 0xFBC00259U, + 0xFBA0025BU, + 0xFB80025DU, + 0xFB60025FU, + 0xFB400261U, + 0xFB400263U, + 0xFB200265U, + 0xFB000267U, + 0xFAE00269U, + 0xFAE0026BU, + 0xFAC0026DU, + 0xFAA0026FU, + 0xFA800271U, + 0xFA800273U, + 0xFA600275U, + 0xFA400277U, + 0xFA200279U, + 0xFA20027BU, + 0xFA00027CU, + 0xF9E0027EU, + 0xF9C00280U, + 0xF9A00282U, + 0xF9A00284U, + 0xF9800286U, + 0xF9600288U, + 0xF940028AU, + 0xF940028CU, + 0xF920028EU, + 0xF9000290U, + 0xF8E00292U, + 0xF8E00294U, + 0xF8C00296U, + 0xF8A00298U, + 0xF880029AU, + 0xF860029CU, + 0xF860029EU, + 0xF84002A0U, + 0xF82002A2U, + 0xF80002A4U, + 0xF80002A6U, + 0xF7E002A8U, + 0xF7C002AAU, + 0xF7A002ACU, + 0xF7A002AEU, + 0xF78002B0U, + 0xF76002B2U, + 0xF74002B4U, + 0xF74002B6U, + 0xF72002B8U, + 0xF70002BAU, + 0xF6E002BCU, + 0xF6C002BEU, + 0xF6C002C0U, + 0xF6A002C2U, + 0xF68002C4U, + 0xF66002C6U, + 0xF66002C8U, + 0xF64002CAU, + 0xF62002CCU, + 0xF60002CEU, + 0xF60002CFU, + 0xF5E002D1U, + 0xF5C002D3U, + 0xF5A002D5U, + 0xF5A002D7U, + 0xF58002D9U, + 0xF56002DBU, + 0xF54002DDU, + 0xF52002DFU, + 0xF52002E1U, + 0xF50002E3U, + 0xF4E002E5U, + 0xF4C002E7U, + 0xF4C002E9U, + 0xF4A002EBU, + 0xF48002EDU, + 0xF46002EFU, + 0xF46002F1U, + 0xF44002F3U, + 0xF42002F5U, + 0xF40002F7U, + 0xF3E002F9U, + 0xF3E002FBU, + /* v_table */ + 0x1A09A000U, + 0x19E9A800U, + 0x19A9B800U, + 0x1969C800U, + 0x1949D000U, + 0x1909E000U, + 0x18C9E800U, + 0x18A9F800U, + 0x186A0000U, + 0x182A1000U, + 0x180A2000U, + 0x17CA2800U, + 0x17AA3800U, + 0x176A4000U, + 0x172A5000U, + 0x170A6000U, + 0x16CA6800U, + 0x168A7800U, + 0x166A8000U, + 0x162A9000U, + 0x160AA000U, + 0x15CAA800U, + 0x158AB800U, + 0x156AC000U, + 0x152AD000U, + 0x14EAE000U, + 0x14CAE800U, + 0x148AF800U, + 0x146B0000U, + 0x142B1000U, + 0x13EB2000U, + 0x13CB2800U, + 0x138B3800U, + 0x134B4000U, + 0x132B5000U, + 0x12EB6000U, + 0x12CB6800U, + 0x128B7800U, + 0x124B8000U, + 0x122B9000U, + 0x11EBA000U, + 0x11ABA800U, + 0x118BB800U, + 0x114BC000U, + 0x112BD000U, + 0x10EBE000U, + 0x10ABE800U, + 0x108BF800U, + 0x104C0000U, + 0x100C1000U, + 0x0FEC2000U, + 0x0FAC2800U, + 0x0F8C3800U, + 0x0F4C4000U, + 0x0F0C5000U, + 0x0EEC5800U, + 0x0EAC6800U, + 0x0E6C7800U, + 0x0E4C8000U, + 0x0E0C9000U, + 0x0DEC9800U, + 0x0DACA800U, + 0x0D6CB800U, + 0x0D4CC000U, + 0x0D0CD000U, + 0x0CCCD800U, + 0x0CACE800U, + 0x0C6CF800U, + 0x0C4D0000U, + 0x0C0D1000U, + 0x0BCD1800U, + 0x0BAD2800U, + 0x0B6D3800U, + 0x0B2D4000U, + 0x0B0D5000U, + 0x0ACD5800U, + 0x0AAD6800U, + 0x0A6D7800U, + 0x0A2D8000U, + 0x0A0D9000U, + 0x09CD9800U, + 0x098DA800U, + 0x096DB800U, + 0x092DC000U, + 0x090DD000U, + 0x08CDD800U, + 0x088DE800U, + 0x086DF800U, + 0x082E0000U, + 0x07EE1000U, + 0x07CE1800U, + 0x078E2800U, + 0x076E3800U, + 0x072E4000U, + 0x06EE5000U, + 0x06CE5800U, + 0x068E6800U, + 0x064E7800U, + 0x062E8000U, + 0x05EE9000U, + 0x05CE9800U, + 0x058EA800U, + 0x054EB800U, + 0x052EC000U, + 0x04EED000U, + 0x04AED800U, + 0x048EE800U, + 0x044EF000U, + 0x042F0000U, + 0x03EF1000U, + 0x03AF1800U, + 0x038F2800U, + 0x034F3000U, + 0x030F4000U, + 0x02EF5000U, + 0x02AF5800U, + 0x028F6800U, + 0x024F7000U, + 0x020F8000U, + 0x01EF9000U, + 0x01AF9800U, + 0x016FA800U, + 0x014FB000U, + 0x010FC000U, + 0x00EFD000U, + 0x00AFD800U, + 0x006FE800U, + 0x004FF000U, + 0x00100000U, + 0xFFD01000U, + 0xFFB01800U, + 0xFF702800U, + 0xFF303000U, + 0xFF104000U, + 0xFED05000U, + 0xFEB05800U, + 0xFE706800U, + 0xFE307000U, + 0xFE108000U, + 0xFDD09000U, + 0xFD909800U, + 0xFD70A800U, + 0xFD30B000U, + 0xFD10C000U, + 0xFCD0D000U, + 0xFC90D800U, + 0xFC70E800U, + 0xFC30F000U, + 0xFBF10000U, + 0xFBD11000U, + 0xFB911800U, + 0xFB712800U, + 0xFB313000U, + 0xFAF14000U, + 0xFAD14800U, + 0xFA915800U, + 0xFA516800U, + 0xFA317000U, + 0xF9F18000U, + 0xF9D18800U, + 0xF9919800U, + 0xF951A800U, + 0xF931B000U, + 0xF8F1C000U, + 0xF8B1C800U, + 0xF891D800U, + 0xF851E800U, + 0xF831F000U, + 0xF7F20000U, + 0xF7B20800U, + 0xF7921800U, + 0xF7522800U, + 0xF7123000U, + 0xF6F24000U, + 0xF6B24800U, + 0xF6925800U, + 0xF6526800U, + 0xF6127000U, + 0xF5F28000U, + 0xF5B28800U, + 0xF5729800U, + 0xF552A800U, + 0xF512B000U, + 0xF4F2C000U, + 0xF4B2C800U, + 0xF472D800U, + 0xF452E800U, + 0xF412F000U, + 0xF3D30000U, + 0xF3B30800U, + 0xF3731800U, + 0xF3532800U, + 0xF3133000U, + 0xF2D34000U, + 0xF2B34800U, + 0xF2735800U, + 0xF2336800U, + 0xF2137000U, + 0xF1D38000U, + 0xF1B38800U, + 0xF1739800U, + 0xF133A800U, + 0xF113B000U, + 0xF0D3C000U, + 0xF093C800U, + 0xF073D800U, + 0xF033E000U, + 0xF013F000U, + 0xEFD40000U, + 0xEF940800U, + 0xEF741800U, + 0xEF342000U, + 0xEEF43000U, + 0xEED44000U, + 0xEE944800U, + 0xEE745800U, + 0xEE346000U, + 0xEDF47000U, + 0xEDD48000U, + 0xED948800U, + 0xED549800U, + 0xED34A000U, + 0xECF4B000U, + 0xECD4C000U, + 0xEC94C800U, + 0xEC54D800U, + 0xEC34E000U, + 0xEBF4F000U, + 0xEBB50000U, + 0xEB950800U, + 0xEB551800U, + 0xEB352000U, + 0xEAF53000U, + 0xEAB54000U, + 0xEA954800U, + 0xEA555800U, + 0xEA156000U, + 0xE9F57000U, + 0xE9B58000U, + 0xE9958800U, + 0xE9559800U, + 0xE915A000U, + 0xE8F5B000U, + 0xE8B5C000U, + 0xE875C800U, + 0xE855D800U, + 0xE815E000U, + 0xE7F5F000U, + 0xE7B60000U, + 0xE7760800U, + 0xE7561800U, + 0xE7162000U, + 0xE6D63000U, + 0xE6B64000U, + 0xE6764800U, + 0xE6365800U +}; + +#define FLAGS 0x40080100 +#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)]) +#define READY(Y) tables[Y] +#define FIXUP(Y) \ +do { \ + int tmp = (Y) & FLAGS; \ + if (tmp != 0) \ + { \ + tmp -= tmp>>8; \ + (Y) |= tmp; \ + tmp = FLAGS & ~(Y>>1); \ + (Y) += tmp>>8; \ + } \ +} while (0 == 1) + +#define STORE(Y,DSTPTR) \ +do { \ + *(DSTPTR)++ = (Y); \ + *(DSTPTR)++ = (Y)>>22; \ + *(DSTPTR)++ = (Y)>>11; \ + *(DSTPTR)++ = 255; \ +} while (0 == 1) + +static void yuv422_2_rgb8888(uint8_t *dst_ptr, + const uint8_t *y_ptr, + const uint8_t *u_ptr, + const uint8_t *v_ptr, + int32_t width, + int32_t height, + int32_t y_span, + int32_t uv_span, + int32_t dst_span, + int32_t dither) +{ + height -= 1; + while (height > 0) + { + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do top row pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + y1 = uv + READY(*y_ptr++); + FIXUP(y0); + FIXUP(y1); + STORE(y0, dst_ptr); + STORE(y1, dst_ptr); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing top row pix */ + uint32_t uv, y0; + + uv = READUV(*u_ptr,*v_ptr); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + } + dst_ptr += dst_span-width*4; + y_ptr += y_span-width; + u_ptr += uv_span-(width>>1); + v_ptr += uv_span-(width>>1); + height = (height<<16)>>16; + height -= 1; + if (height == 0) + break; + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do second row pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + y1 = uv + READY(*y_ptr++); + FIXUP(y0); + FIXUP(y1); + STORE(y0, dst_ptr); + STORE(y1, dst_ptr); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing bottom row pix */ + uint32_t uv, y0; + + uv = READUV(*u_ptr,*v_ptr); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + } + dst_ptr += dst_span-width*4; + y_ptr += y_span-width; + u_ptr += uv_span-(width>>1); + v_ptr += uv_span-(width>>1); + height = (height<<16)>>16; + height -= 1; + } +} + + +#undef FLAGS +#undef READUV +#undef READY +#undef FIXUP +#undef STORE + + +#define FLAGS 0x40080100 +#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)]) +#define READY(Y) tables[Y] +#define FIXUP(Y) \ +do { \ + int tmp = (Y) & FLAGS; \ + if (tmp != 0) \ + { \ + tmp -= tmp>>8; \ + (Y) |= tmp; \ + tmp = FLAGS & ~(Y>>1); \ + (Y) += tmp>>8; \ + } \ +} while (0 == 1) + +#define STORE(Y,DSTPTR) \ +do { \ + (DSTPTR) = 0xFF000000 | (Y & 0xFF) | (0xFF00 & (Y>>14)) | (0xFF0000 & (Y<<5));\ +} while (0 == 1) + +static void yuv420_2_rgb8888(uint8_t *dst_ptr_, + const uint8_t *y_ptr, + const uint8_t *u_ptr, + const uint8_t *v_ptr, + int32_t width, + int32_t height, + int32_t y_span, + int32_t uv_span, + int32_t dst_span, + int32_t dither) +{ + uint32_t *dst_ptr = (uint32_t *)(void *)dst_ptr_; + dst_span >>= 2; + + height -= 1; + while (height > 0) + { + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do 2 column pairs */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y1 = uv + READY(y_ptr[y_span]); + y0 = uv + READY(*y_ptr++); + FIXUP(y1); + FIXUP(y0); + STORE(y1, dst_ptr[dst_span]); + STORE(y0, *dst_ptr++); + y1 = uv + READY(y_ptr[y_span]); + y0 = uv + READY(*y_ptr++); + FIXUP(y1); + FIXUP(y0); + STORE(y1, dst_ptr[dst_span]); + STORE(y0, *dst_ptr++); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing column pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr,*v_ptr); + y1 = uv + READY(y_ptr[y_span]); + y0 = uv + READY(*y_ptr++); + FIXUP(y1); + FIXUP(y0); + STORE(y0, dst_ptr[dst_span]); + STORE(y1, *dst_ptr++); + } + dst_ptr += dst_span*2-width; + y_ptr += y_span*2-width; + u_ptr += uv_span-(width>>1); + v_ptr += uv_span-(width>>1); + height = (height<<16)>>16; + height -= 2; + } + if (height == 0) + { + /* Trail row */ + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do a row pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y1 = uv + READY(*y_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y1); + FIXUP(y0); + STORE(y1, *dst_ptr++); + STORE(y0, *dst_ptr++); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing pix */ + uint32_t uv, y0; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, *dst_ptr++); + } + } +} + + + +#undef FLAGS +#undef READUV +#undef READY +#undef FIXUP +#undef STORE + +#define FLAGS 0x40080100 +#define READUV(U,V) (tables[256 + (U)] + tables[512 + (V)]) +#define READY(Y) tables[Y] +#define FIXUP(Y) \ +do { \ + int tmp = (Y) & FLAGS; \ + if (tmp != 0) \ + { \ + tmp -= tmp>>8; \ + (Y) |= tmp; \ + tmp = FLAGS & ~(Y>>1); \ + (Y) += tmp>>8; \ + } \ +} while (0 == 1) + +#define STORE(Y,DSTPTR) \ +do { \ + *(DSTPTR)++ = (Y); \ + *(DSTPTR)++ = (Y)>>22; \ + *(DSTPTR)++ = (Y)>>11; \ + *(DSTPTR)++ = 255; \ +} while (0 == 1) + +static void yuv444_2_rgb8888(uint8_t *dst_ptr, + const uint8_t *y_ptr, + const uint8_t *u_ptr, + const uint8_t *v_ptr, + int32_t width, + int32_t height, + int32_t y_span, + int32_t uv_span, + int32_t dst_span, + int32_t dither) +{ + height -= 1; + while (height > 0) + { + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do top row pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + uv = READUV(*u_ptr++,*v_ptr++); + y1 = uv + READY(*y_ptr++); + FIXUP(y1); + STORE(y1, dst_ptr); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing top row pix */ + uint32_t uv, y0; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + } + dst_ptr += dst_span-width*4; + y_ptr += y_span-width; + u_ptr += uv_span-width; + v_ptr += uv_span-width; + height = (height<<16)>>16; + height -= 1; + if (height == 0) + break; + height -= width<<16; + height += 1<<16; + while (height < 0) + { + /* Do second row pair */ + uint32_t uv, y0, y1; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + uv = READUV(*u_ptr++,*v_ptr++); + y1 = uv + READY(*y_ptr++); + FIXUP(y1); + STORE(y1, dst_ptr); + height += (2<<16); + } + if ((height>>16) == 0) + { + /* Trailing bottom row pix */ + uint32_t uv, y0; + + uv = READUV(*u_ptr++,*v_ptr++); + y0 = uv + READY(*y_ptr++); + FIXUP(y0); + STORE(y0, dst_ptr); + } + dst_ptr += dst_span-width*4; + y_ptr += y_span-width; + u_ptr += uv_span-width; + v_ptr += uv_span-width; + height = (height<<16)>>16; + height -= 1; + } +} +#endif // YUV2RGB_H diff --git a/thirdparty/stb_vorbis/stb_vorbis.c b/thirdparty/stb_vorbis/stb_vorbis.c deleted file mode 100644 index c4f24d5898..0000000000 --- a/thirdparty/stb_vorbis/stb_vorbis.c +++ /dev/null @@ -1,5399 +0,0 @@ -// Ogg Vorbis audio decoder - v1.09 - public domain -// http://nothings.org/stb_vorbis/ -// -// Original version written by Sean Barrett in 2007. -// -// Originally sponsored by RAD Game Tools. Seeking sponsored -// by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software, -// Aras Pranckevicius, and Sean Barrett. -// -// LICENSE -// -// This software is dual-licensed to the public domain and under the following -// license: you are granted a perpetual, irrevocable license to copy, modify, -// publish, and distribute this file as you see fit. -// -// No warranty for any purpose is expressed or implied by the author (nor -// by RAD Game Tools). Report bugs and send enhancements to the author. -// -// Limitations: -// -// - floor 0 not supported (used in old ogg vorbis files pre-2004) -// - lossless sample-truncation at beginning ignored -// - cannot concatenate multiple vorbis streams -// - sample positions are 32-bit, limiting seekable 192Khz -// files to around 6 hours (Ogg supports 64-bit) -// -// Feature contributors: -// Dougall Johnson (sample-exact seeking) -// -// Bugfix/warning contributors: -// Terje Mathisen Niklas Frykholm Andy Hill -// Casey Muratori John Bolton Gargaj -// Laurent Gomila Marc LeBlanc Ronny Chevalier -// Bernhard Wodo Evan Balster alxprd@github -// Tom Beaumont Ingo Leitgeb Nicolas Guillemot -// Phillip Bennefall Rohit Thiago Goulart -// manxorist@github saga musix -// -// Partial history: -// 1.09 - 2016/04/04 - back out 'truncation of last frame' fix from previous version -// 1.08 - 2016/04/02 - warnings; setup memory leaks; truncation of last frame -// 1.07 - 2015/01/16 - fixes for crashes on invalid files; warning fixes; const -// 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson) -// some crash fixes when out of memory or with corrupt files -// fix some inappropriately signed shifts -// 1.05 - 2015/04/19 - don't define __forceinline if it's redundant -// 1.04 - 2014/08/27 - fix missing const-correct case in API -// 1.03 - 2014/08/07 - warning fixes -// 1.02 - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows -// 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct) -// 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel; -// (API change) report sample rate for decode-full-file funcs -// -// See end of file for full version history. - - -////////////////////////////////////////////////////////////////////////////// -// -// HEADER BEGINS HERE -// - -#ifndef STB_VORBIS_INCLUDE_STB_VORBIS_H -#define STB_VORBIS_INCLUDE_STB_VORBIS_H - -#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO) -#define STB_VORBIS_NO_STDIO 1 -#endif - -#ifndef STB_VORBIS_NO_STDIO -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/////////// THREAD SAFETY - -// Individual stb_vorbis* handles are not thread-safe; you cannot decode from -// them from multiple threads at the same time. However, you can have multiple -// stb_vorbis* handles and decode from them independently in multiple thrads. - - -/////////// MEMORY ALLOCATION - -// normally stb_vorbis uses malloc() to allocate memory at startup, -// and alloca() to allocate temporary memory during a frame on the -// stack. (Memory consumption will depend on the amount of setup -// data in the file and how you set the compile flags for speed -// vs. size. In my test files the maximal-size usage is ~150KB.) -// -// You can modify the wrapper functions in the source (setup_malloc, -// setup_temp_malloc, temp_malloc) to change this behavior, or you -// can use a simpler allocation model: you pass in a buffer from -// which stb_vorbis will allocate _all_ its memory (including the -// temp memory). "open" may fail with a VORBIS_outofmem if you -// do not pass in enough data; there is no way to determine how -// much you do need except to succeed (at which point you can -// query get_info to find the exact amount required. yes I know -// this is lame). -// -// If you pass in a non-NULL buffer of the type below, allocation -// will occur from it as described above. Otherwise just pass NULL -// to use malloc()/alloca() - -typedef struct -{ - char *alloc_buffer; - int alloc_buffer_length_in_bytes; -} stb_vorbis_alloc; - - -/////////// FUNCTIONS USEABLE WITH ALL INPUT MODES - -typedef struct stb_vorbis stb_vorbis; - -typedef struct -{ - unsigned int sample_rate; - int channels; - - unsigned int setup_memory_required; - unsigned int setup_temp_memory_required; - unsigned int temp_memory_required; - - int max_frame_size; -} stb_vorbis_info; - -// get general information about the file -extern stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f); - -// get the last error detected (clears it, too) -extern int stb_vorbis_get_error(stb_vorbis *f); - -// close an ogg vorbis file and free all memory in use -extern void stb_vorbis_close(stb_vorbis *f); - -// this function returns the offset (in samples) from the beginning of the -// file that will be returned by the next decode, if it is known, or -1 -// otherwise. after a flush_pushdata() call, this may take a while before -// it becomes valid again. -// NOT WORKING YET after a seek with PULLDATA API -extern int stb_vorbis_get_sample_offset(stb_vorbis *f); - -// returns the current seek point within the file, or offset from the beginning -// of the memory buffer. In pushdata mode it returns 0. -extern unsigned int stb_vorbis_get_file_offset(stb_vorbis *f); - -/////////// PUSHDATA API - -#ifndef STB_VORBIS_NO_PUSHDATA_API - -// this API allows you to get blocks of data from any source and hand -// them to stb_vorbis. you have to buffer them; stb_vorbis will tell -// you how much it used, and you have to give it the rest next time; -// and stb_vorbis may not have enough data to work with and you will -// need to give it the same data again PLUS more. Note that the Vorbis -// specification does not bound the size of an individual frame. - -extern stb_vorbis *stb_vorbis_open_pushdata( - const unsigned char * datablock, int datablock_length_in_bytes, - int *datablock_memory_consumed_in_bytes, - int *error, - const stb_vorbis_alloc *alloc_buffer); -// create a vorbis decoder by passing in the initial data block containing -// the ogg&vorbis headers (you don't need to do parse them, just provide -// the first N bytes of the file--you're told if it's not enough, see below) -// on success, returns an stb_vorbis *, does not set error, returns the amount of -// data parsed/consumed on this call in *datablock_memory_consumed_in_bytes; -// on failure, returns NULL on error and sets *error, does not change *datablock_memory_consumed -// if returns NULL and *error is VORBIS_need_more_data, then the input block was -// incomplete and you need to pass in a larger block from the start of the file - -extern int stb_vorbis_decode_frame_pushdata( - stb_vorbis *f, - const unsigned char *datablock, int datablock_length_in_bytes, - int *channels, // place to write number of float * buffers - float ***output, // place to write float ** array of float * buffers - int *samples // place to write number of output samples - ); -// decode a frame of audio sample data if possible from the passed-in data block -// -// return value: number of bytes we used from datablock -// -// possible cases: -// 0 bytes used, 0 samples output (need more data) -// N bytes used, 0 samples output (resynching the stream, keep going) -// N bytes used, M samples output (one frame of data) -// note that after opening a file, you will ALWAYS get one N-bytes,0-sample -// frame, because Vorbis always "discards" the first frame. -// -// Note that on resynch, stb_vorbis will rarely consume all of the buffer, -// instead only datablock_length_in_bytes-3 or less. This is because it wants -// to avoid missing parts of a page header if they cross a datablock boundary, -// without writing state-machiney code to record a partial detection. -// -// The number of channels returned are stored in *channels (which can be -// NULL--it is always the same as the number of channels reported by -// get_info). *output will contain an array of float* buffers, one per -// channel. In other words, (*output)[0][0] contains the first sample from -// the first channel, and (*output)[1][0] contains the first sample from -// the second channel. - -extern void stb_vorbis_flush_pushdata(stb_vorbis *f); -// inform stb_vorbis that your next datablock will not be contiguous with -// previous ones (e.g. you've seeked in the data); future attempts to decode -// frames will cause stb_vorbis to resynchronize (as noted above), and -// once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it -// will begin decoding the _next_ frame. -// -// if you want to seek using pushdata, you need to seek in your file, then -// call stb_vorbis_flush_pushdata(), then start calling decoding, then once -// decoding is returning you data, call stb_vorbis_get_sample_offset, and -// if you don't like the result, seek your file again and repeat. -#endif - - -////////// PULLING INPUT API - -#ifndef STB_VORBIS_NO_PULLDATA_API -// This API assumes stb_vorbis is allowed to pull data from a source-- -// either a block of memory containing the _entire_ vorbis stream, or a -// FILE * that you or it create, or possibly some other reading mechanism -// if you go modify the source to replace the FILE * case with some kind -// of callback to your code. (But if you don't support seeking, you may -// just want to go ahead and use pushdata.) - -#if !defined(STB_VORBIS_NO_STDIO) && !defined(STB_VORBIS_NO_INTEGER_CONVERSION) -extern int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output); -#endif -#if !defined(STB_VORBIS_NO_INTEGER_CONVERSION) -extern int stb_vorbis_decode_memory(const unsigned char *mem, int len, int *channels, int *sample_rate, short **output); -#endif -// decode an entire file and output the data interleaved into a malloc()ed -// buffer stored in *output. The return value is the number of samples -// decoded, or -1 if the file could not be opened or was not an ogg vorbis file. -// When you're done with it, just free() the pointer returned in *output. - -extern stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, - int *error, const stb_vorbis_alloc *alloc_buffer); -// create an ogg vorbis decoder from an ogg vorbis stream in memory (note -// this must be the entire stream!). on failure, returns NULL and sets *error - -#ifndef STB_VORBIS_NO_STDIO -extern stb_vorbis * stb_vorbis_open_filename(const char *filename, - int *error, const stb_vorbis_alloc *alloc_buffer); -// create an ogg vorbis decoder from a filename via fopen(). on failure, -// returns NULL and sets *error (possibly to VORBIS_file_open_failure). - -extern stb_vorbis * stb_vorbis_open_file(FILE *f, int close_handle_on_close, - int *error, const stb_vorbis_alloc *alloc_buffer); -// create an ogg vorbis decoder from an open FILE *, looking for a stream at -// the _current_ seek point (ftell). on failure, returns NULL and sets *error. -// note that stb_vorbis must "own" this stream; if you seek it in between -// calls to stb_vorbis, it will become confused. Morever, if you attempt to -// perform stb_vorbis_seek_*() operations on this file, it will assume it -// owns the _entire_ rest of the file after the start point. Use the next -// function, stb_vorbis_open_file_section(), to limit it. - -extern stb_vorbis * stb_vorbis_open_file_section(FILE *f, int close_handle_on_close, - int *error, const stb_vorbis_alloc *alloc_buffer, unsigned int len); -// create an ogg vorbis decoder from an open FILE *, looking for a stream at -// the _current_ seek point (ftell); the stream will be of length 'len' bytes. -// on failure, returns NULL and sets *error. note that stb_vorbis must "own" -// this stream; if you seek it in between calls to stb_vorbis, it will become -// confused. -#endif - -extern int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number); -extern int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number); -// these functions seek in the Vorbis file to (approximately) 'sample_number'. -// after calling seek_frame(), the next call to get_frame_*() will include -// the specified sample. after calling stb_vorbis_seek(), the next call to -// stb_vorbis_get_samples_* will start with the specified sample. If you -// do not need to seek to EXACTLY the target sample when using get_samples_*, -// you can also use seek_frame(). - -extern void stb_vorbis_seek_start(stb_vorbis *f); -// this function is equivalent to stb_vorbis_seek(f,0) - -extern unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f); -extern float stb_vorbis_stream_length_in_seconds(stb_vorbis *f); -// these functions return the total length of the vorbis stream - -extern int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output); -// decode the next frame and return the number of samples. the number of -// channels returned are stored in *channels (which can be NULL--it is always -// the same as the number of channels reported by get_info). *output will -// contain an array of float* buffers, one per channel. These outputs will -// be overwritten on the next call to stb_vorbis_get_frame_*. -// -// You generally should not intermix calls to stb_vorbis_get_frame_*() -// and stb_vorbis_get_samples_*(), since the latter calls the former. - -#ifndef STB_VORBIS_NO_INTEGER_CONVERSION -extern int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts); -extern int stb_vorbis_get_frame_short (stb_vorbis *f, int num_c, short **buffer, int num_samples); -#endif -// decode the next frame and return the number of *samples* per channel. -// Note that for interleaved data, you pass in the number of shorts (the -// size of your array), but the return value is the number of samples per -// channel, not the total number of samples. -// -// The data is coerced to the number of channels you request according to the -// channel coercion rules (see below). You must pass in the size of your -// buffer(s) so that stb_vorbis will not overwrite the end of the buffer. -// The maximum buffer size needed can be gotten from get_info(); however, -// the Vorbis I specification implies an absolute maximum of 4096 samples -// per channel. - -// Channel coercion rules: -// Let M be the number of channels requested, and N the number of channels present, -// and Cn be the nth channel; let stereo L be the sum of all L and center channels, -// and stereo R be the sum of all R and center channels (channel assignment from the -// vorbis spec). -// M N output -// 1 k sum(Ck) for all k -// 2 * stereo L, stereo R -// k l k > l, the first l channels, then 0s -// k l k <= l, the first k channels -// Note that this is not _good_ surround etc. mixing at all! It's just so -// you get something useful. - -extern int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats); -extern int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples); -// gets num_samples samples, not necessarily on a frame boundary--this requires -// buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES. -// Returns the number of samples stored per channel; it may be less than requested -// at the end of the file. If there are no more samples in the file, returns 0. - -#ifndef STB_VORBIS_NO_INTEGER_CONVERSION -extern int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts); -extern int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int num_samples); -#endif -// gets num_samples samples, not necessarily on a frame boundary--this requires -// buffering so you have to supply the buffers. Applies the coercion rules above -// to produce 'channels' channels. Returns the number of samples stored per channel; -// it may be less than requested at the end of the file. If there are no more -// samples in the file, returns 0. - -#endif - -//////// ERROR CODES - -enum STBVorbisError -{ - VORBIS__no_error, - - VORBIS_need_more_data=1, // not a real error - - VORBIS_invalid_api_mixing, // can't mix API modes - VORBIS_outofmem, // not enough memory - VORBIS_feature_not_supported, // uses floor 0 - VORBIS_too_many_channels, // STB_VORBIS_MAX_CHANNELS is too small - VORBIS_file_open_failure, // fopen() failed - VORBIS_seek_without_length, // can't seek in unknown-length file - - VORBIS_unexpected_eof=10, // file is truncated? - VORBIS_seek_invalid, // seek past EOF - - // decoding errors (corrupt/invalid stream) -- you probably - // don't care about the exact details of these - - // vorbis errors: - VORBIS_invalid_setup=20, - VORBIS_invalid_stream, - - // ogg errors: - VORBIS_missing_capture_pattern=30, - VORBIS_invalid_stream_structure_version, - VORBIS_continued_packet_flag_invalid, - VORBIS_incorrect_stream_serial_number, - VORBIS_invalid_first_page, - VORBIS_bad_packet_type, - VORBIS_cant_find_last_page, - VORBIS_seek_failed -}; - - -#ifdef __cplusplus -} -#endif - -#endif // STB_VORBIS_INCLUDE_STB_VORBIS_H -// -// HEADER ENDS HERE -// -////////////////////////////////////////////////////////////////////////////// - -#ifndef STB_VORBIS_HEADER_ONLY - -// global configuration settings (e.g. set these in the project/makefile), -// or just set them in this file at the top (although ideally the first few -// should be visible when the header file is compiled too, although it's not -// crucial) - -// STB_VORBIS_NO_PUSHDATA_API -// does not compile the code for the various stb_vorbis_*_pushdata() -// functions -// #define STB_VORBIS_NO_PUSHDATA_API - -// STB_VORBIS_NO_PULLDATA_API -// does not compile the code for the non-pushdata APIs -// #define STB_VORBIS_NO_PULLDATA_API - -// STB_VORBIS_NO_STDIO -// does not compile the code for the APIs that use FILE *s internally -// or externally (implied by STB_VORBIS_NO_PULLDATA_API) -// #define STB_VORBIS_NO_STDIO - -// STB_VORBIS_NO_INTEGER_CONVERSION -// does not compile the code for converting audio sample data from -// float to integer (implied by STB_VORBIS_NO_PULLDATA_API) -// #define STB_VORBIS_NO_INTEGER_CONVERSION - -// STB_VORBIS_NO_FAST_SCALED_FLOAT -// does not use a fast float-to-int trick to accelerate float-to-int on -// most platforms which requires endianness be defined correctly. -//#define STB_VORBIS_NO_FAST_SCALED_FLOAT - - -// STB_VORBIS_MAX_CHANNELS [number] -// globally define this to the maximum number of channels you need. -// The spec does not put a restriction on channels except that -// the count is stored in a byte, so 255 is the hard limit. -// Reducing this saves about 16 bytes per value, so using 16 saves -// (255-16)*16 or around 4KB. Plus anything other memory usage -// I forgot to account for. Can probably go as low as 8 (7.1 audio), -// 6 (5.1 audio), or 2 (stereo only). -#ifndef STB_VORBIS_MAX_CHANNELS -#define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone? -#endif - -// STB_VORBIS_PUSHDATA_CRC_COUNT [number] -// after a flush_pushdata(), stb_vorbis begins scanning for the -// next valid page, without backtracking. when it finds something -// that looks like a page, it streams through it and verifies its -// CRC32. Should that validation fail, it keeps scanning. But it's -// possible that _while_ streaming through to check the CRC32 of -// one candidate page, it sees another candidate page. This #define -// determines how many "overlapping" candidate pages it can search -// at once. Note that "real" pages are typically ~4KB to ~8KB, whereas -// garbage pages could be as big as 64KB, but probably average ~16KB. -// So don't hose ourselves by scanning an apparent 64KB page and -// missing a ton of real ones in the interim; so minimum of 2 -#ifndef STB_VORBIS_PUSHDATA_CRC_COUNT -#define STB_VORBIS_PUSHDATA_CRC_COUNT 4 -#endif - -// STB_VORBIS_FAST_HUFFMAN_LENGTH [number] -// sets the log size of the huffman-acceleration table. Maximum -// supported value is 24. with larger numbers, more decodings are O(1), -// but the table size is larger so worse cache missing, so you'll have -// to probe (and try multiple ogg vorbis files) to find the sweet spot. -#ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH -#define STB_VORBIS_FAST_HUFFMAN_LENGTH 10 -#endif - -// STB_VORBIS_FAST_BINARY_LENGTH [number] -// sets the log size of the binary-search acceleration table. this -// is used in similar fashion to the fast-huffman size to set initial -// parameters for the binary search - -// STB_VORBIS_FAST_HUFFMAN_INT -// The fast huffman tables are much more efficient if they can be -// stored as 16-bit results instead of 32-bit results. This restricts -// the codebooks to having only 65535 possible outcomes, though. -// (At least, accelerated by the huffman table.) -#ifndef STB_VORBIS_FAST_HUFFMAN_INT -#define STB_VORBIS_FAST_HUFFMAN_SHORT -#endif - -// STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH -// If the 'fast huffman' search doesn't succeed, then stb_vorbis falls -// back on binary searching for the correct one. This requires storing -// extra tables with the huffman codes in sorted order. Defining this -// symbol trades off space for speed by forcing a linear search in the -// non-fast case, except for "sparse" codebooks. -// #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH - -// STB_VORBIS_DIVIDES_IN_RESIDUE -// stb_vorbis precomputes the result of the scalar residue decoding -// that would otherwise require a divide per chunk. you can trade off -// space for time by defining this symbol. -// #define STB_VORBIS_DIVIDES_IN_RESIDUE - -// STB_VORBIS_DIVIDES_IN_CODEBOOK -// vorbis VQ codebooks can be encoded two ways: with every case explicitly -// stored, or with all elements being chosen from a small range of values, -// and all values possible in all elements. By default, stb_vorbis expands -// this latter kind out to look like the former kind for ease of decoding, -// because otherwise an integer divide-per-vector-element is required to -// unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can -// trade off storage for speed. -//#define STB_VORBIS_DIVIDES_IN_CODEBOOK - -#ifdef STB_VORBIS_CODEBOOK_SHORTS -#error "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats" -#endif - -// STB_VORBIS_DIVIDE_TABLE -// this replaces small integer divides in the floor decode loop with -// table lookups. made less than 1% difference, so disabled by default. - -// STB_VORBIS_NO_INLINE_DECODE -// disables the inlining of the scalar codebook fast-huffman decode. -// might save a little codespace; useful for debugging -// #define STB_VORBIS_NO_INLINE_DECODE - -// STB_VORBIS_NO_DEFER_FLOOR -// Normally we only decode the floor without synthesizing the actual -// full curve. We can instead synthesize the curve immediately. This -// requires more memory and is very likely slower, so I don't think -// you'd ever want to do it except for debugging. -// #define STB_VORBIS_NO_DEFER_FLOOR - - - - -////////////////////////////////////////////////////////////////////////////// - -#ifdef STB_VORBIS_NO_PULLDATA_API - #define STB_VORBIS_NO_INTEGER_CONVERSION - #define STB_VORBIS_NO_STDIO -#endif - -#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO) - #define STB_VORBIS_NO_STDIO 1 -#endif - -#ifndef STB_VORBIS_NO_INTEGER_CONVERSION -#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT - - // only need endianness for fast-float-to-int, which we don't - // use for pushdata - - #ifndef STB_VORBIS_BIG_ENDIAN - #define STB_VORBIS_ENDIAN 0 - #else - #define STB_VORBIS_ENDIAN 1 - #endif - -#endif -#endif - - -#ifndef STB_VORBIS_NO_STDIO -#include -#endif - -#ifndef STB_VORBIS_NO_CRT - #include - #include - #include - #include - - // find definition of alloca if it's not in stdlib.h: - #ifdef _MSC_VER - #include - #endif - #if defined(__linux__) || defined(__linux) || defined(__EMSCRIPTEN__) - #include - #endif -#else // STB_VORBIS_NO_CRT - #define NULL 0 - #define malloc(s) 0 - #define free(s) ((void) 0) - #define realloc(s) 0 -#endif // STB_VORBIS_NO_CRT - -#include - -#ifdef __MINGW32__ - // eff you mingw: - // "fixed": - // http://sourceforge.net/p/mingw-w64/mailman/message/32882927/ - // "no that broke the build, reverted, who cares about C": - // http://sourceforge.net/p/mingw-w64/mailman/message/32890381/ - #ifdef __forceinline - #undef __forceinline - #endif - #define __forceinline -#elif !defined(_MSC_VER) - #if __GNUC__ - #define __forceinline inline - #else - #define __forceinline - #endif -#endif - -#if STB_VORBIS_MAX_CHANNELS > 256 -#error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range" -#endif - -#if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24 -#error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range" -#endif - - -#if 0 -#include -#define CHECK(f) _CrtIsValidHeapPointer(f->channel_buffers[1]) -#else -#define CHECK(f) ((void) 0) -#endif - -#define MAX_BLOCKSIZE_LOG 13 // from specification -#define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG) - - -typedef unsigned char uint8; -typedef signed char int8; -typedef unsigned short uint16; -typedef signed short int16; -typedef unsigned int uint32; -typedef signed int int32; - -#ifndef TRUE -#define TRUE 1 -#define FALSE 0 -#endif - -typedef float codetype; - -// @NOTE -// -// Some arrays below are tagged "//varies", which means it's actually -// a variable-sized piece of data, but rather than malloc I assume it's -// small enough it's better to just allocate it all together with the -// main thing -// -// Most of the variables are specified with the smallest size I could pack -// them into. It might give better performance to make them all full-sized -// integers. It should be safe to freely rearrange the structures or change -// the sizes larger--nothing relies on silently truncating etc., nor the -// order of variables. - -#define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH) -#define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1) - -typedef struct -{ - int dimensions, entries; - uint8 *codeword_lengths; - float minimum_value; - float delta_value; - uint8 value_bits; - uint8 lookup_type; - uint8 sequence_p; - uint8 sparse; - uint32 lookup_values; - codetype *multiplicands; - uint32 *codewords; - #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT - int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE]; - #else - int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE]; - #endif - uint32 *sorted_codewords; - int *sorted_values; - int sorted_entries; -} Codebook; - -typedef struct -{ - uint8 order; - uint16 rate; - uint16 bark_map_size; - uint8 amplitude_bits; - uint8 amplitude_offset; - uint8 number_of_books; - uint8 book_list[16]; // varies -} Floor0; - -typedef struct -{ - uint8 partitions; - uint8 partition_class_list[32]; // varies - uint8 class_dimensions[16]; // varies - uint8 class_subclasses[16]; // varies - uint8 class_masterbooks[16]; // varies - int16 subclass_books[16][8]; // varies - uint16 Xlist[31*8+2]; // varies - uint8 sorted_order[31*8+2]; - uint8 neighbors[31*8+2][2]; - uint8 floor1_multiplier; - uint8 rangebits; - int values; -} Floor1; - -typedef union -{ - Floor0 floor0; - Floor1 floor1; -} Floor; - -typedef struct -{ - uint32 begin, end; - uint32 part_size; - uint8 classifications; - uint8 classbook; - uint8 **classdata; - int16 (*residue_books)[8]; -} Residue; - -typedef struct -{ - uint8 magnitude; - uint8 angle; - uint8 mux; -} MappingChannel; - -typedef struct -{ - uint16 coupling_steps; - MappingChannel *chan; - uint8 submaps; - uint8 submap_floor[15]; // varies - uint8 submap_residue[15]; // varies -} Mapping; - -typedef struct -{ - uint8 blockflag; - uint8 mapping; - uint16 windowtype; - uint16 transformtype; -} Mode; - -typedef struct -{ - uint32 goal_crc; // expected crc if match - int bytes_left; // bytes left in packet - uint32 crc_so_far; // running crc - int bytes_done; // bytes processed in _current_ chunk - uint32 sample_loc; // granule pos encoded in page -} CRCscan; - -typedef struct -{ - uint32 page_start, page_end; - uint32 last_decoded_sample; -} ProbedPage; - -struct stb_vorbis -{ - // user-accessible info - unsigned int sample_rate; - int channels; - - unsigned int setup_memory_required; - unsigned int temp_memory_required; - unsigned int setup_temp_memory_required; - - // input config -#ifndef STB_VORBIS_NO_STDIO - FILE *f; - uint32 f_start; - int close_on_free; -#endif - - uint8 *stream; - uint8 *stream_start; - uint8 *stream_end; - - uint32 stream_len; - - uint8 push_mode; - - uint32 first_audio_page_offset; - - ProbedPage p_first, p_last; - - // memory management - stb_vorbis_alloc alloc; - int setup_offset; - int temp_offset; - - // run-time results - int eof; - enum STBVorbisError error; - - // user-useful data - - // header info - int blocksize[2]; - int blocksize_0, blocksize_1; - int codebook_count; - Codebook *codebooks; - int floor_count; - uint16 floor_types[64]; // varies - Floor *floor_config; - int residue_count; - uint16 residue_types[64]; // varies - Residue *residue_config; - int mapping_count; - Mapping *mapping; - int mode_count; - Mode mode_config[64]; // varies - - uint32 total_samples; - - // decode buffer - float *channel_buffers[STB_VORBIS_MAX_CHANNELS]; - float *outputs [STB_VORBIS_MAX_CHANNELS]; - - float *previous_window[STB_VORBIS_MAX_CHANNELS]; - int previous_length; - - #ifndef STB_VORBIS_NO_DEFER_FLOOR - int16 *finalY[STB_VORBIS_MAX_CHANNELS]; - #else - float *floor_buffers[STB_VORBIS_MAX_CHANNELS]; - #endif - - uint32 current_loc; // sample location of next frame to decode - int current_loc_valid; - - // per-blocksize precomputed data - - // twiddle factors - float *A[2],*B[2],*C[2]; - float *window[2]; - uint16 *bit_reverse[2]; - - // current page/packet/segment streaming info - uint32 serial; // stream serial number for verification - int last_page; - int segment_count; - uint8 segments[255]; - uint8 page_flag; - uint8 bytes_in_seg; - uint8 first_decode; - int next_seg; - int last_seg; // flag that we're on the last segment - int last_seg_which; // what was the segment number of the last seg? - uint32 acc; - int valid_bits; - int packet_bytes; - int end_seg_with_known_loc; - uint32 known_loc_for_packet; - int discard_samples_deferred; - uint32 samples_output; - - // push mode scanning - int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching -#ifndef STB_VORBIS_NO_PUSHDATA_API - CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT]; -#endif - - // sample-access - int channel_buffer_start; - int channel_buffer_end; -}; - -#if defined(STB_VORBIS_NO_PUSHDATA_API) - #define IS_PUSH_MODE(f) FALSE -#elif defined(STB_VORBIS_NO_PULLDATA_API) - #define IS_PUSH_MODE(f) TRUE -#else - #define IS_PUSH_MODE(f) ((f)->push_mode) -#endif - -typedef struct stb_vorbis vorb; - -static int error(vorb *f, enum STBVorbisError e) -{ - f->error = e; - if (!f->eof && e != VORBIS_need_more_data) { - f->error=e; // breakpoint for debugging - } - return 0; -} - - -// these functions are used for allocating temporary memory -// while decoding. if you can afford the stack space, use -// alloca(); otherwise, provide a temp buffer and it will -// allocate out of those. - -#define array_size_required(count,size) (count*(sizeof(void *)+(size))) - -#define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size)) -#ifdef dealloca -#define temp_free(f,p) (f->alloc.alloc_buffer ? 0 : dealloca(size)) -#else -#define temp_free(f,p) 0 -#endif -#define temp_alloc_save(f) ((f)->temp_offset) -#define temp_alloc_restore(f,p) ((f)->temp_offset = (p)) - -#define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size) - -// given a sufficiently large block of memory, make an array of pointers to subblocks of it -static void *make_block_array(void *mem, int count, int size) -{ - int i; - void ** p = (void **) mem; - char *q = (char *) (p + count); - for (i=0; i < count; ++i) { - p[i] = q; - q += size; - } - return p; -} - -static void *setup_malloc(vorb *f, int sz) -{ - sz = (sz+3) & ~3; - f->setup_memory_required += sz; - if (f->alloc.alloc_buffer) { - void *p = (char *) f->alloc.alloc_buffer + f->setup_offset; - if (f->setup_offset + sz > f->temp_offset) return NULL; - f->setup_offset += sz; - return p; - } - return sz ? malloc(sz) : NULL; -} - -static void setup_free(vorb *f, void *p) -{ - if (f->alloc.alloc_buffer) return; // do nothing; setup mem is a stack - free(p); -} - -static void *setup_temp_malloc(vorb *f, int sz) -{ - sz = (sz+3) & ~3; - if (f->alloc.alloc_buffer) { - if (f->temp_offset - sz < f->setup_offset) return NULL; - f->temp_offset -= sz; - return (char *) f->alloc.alloc_buffer + f->temp_offset; - } - return malloc(sz); -} - -static void setup_temp_free(vorb *f, void *p, int sz) -{ - if (f->alloc.alloc_buffer) { - f->temp_offset += (sz+3)&~3; - return; - } - free(p); -} - -#define CRC32_POLY 0x04c11db7 // from spec - -static uint32 crc_table[256]; -static void crc32_init(void) -{ - int i,j; - uint32 s; - for(i=0; i < 256; i++) { - for (s=(uint32) i << 24, j=0; j < 8; ++j) - s = (s << 1) ^ (s >= (1U<<31) ? CRC32_POLY : 0); - crc_table[i] = s; - } -} - -static __forceinline uint32 crc32_update(uint32 crc, uint8 byte) -{ - return (crc << 8) ^ crc_table[byte ^ (crc >> 24)]; -} - - -// used in setup, and for huffman that doesn't go fast path -static unsigned int bit_reverse(unsigned int n) -{ - n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1); - n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2); - n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4); - n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8); - return (n >> 16) | (n << 16); -} - -static float square(float x) -{ - return x*x; -} - -// this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3 -// as required by the specification. fast(?) implementation from stb.h -// @OPTIMIZE: called multiple times per-packet with "constants"; move to setup -static int ilog(int32 n) -{ - static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 }; - - // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29) - if (n < (1 << 14)) - if (n < (1 << 4)) return 0 + log2_4[n ]; - else if (n < (1 << 9)) return 5 + log2_4[n >> 5]; - else return 10 + log2_4[n >> 10]; - else if (n < (1 << 24)) - if (n < (1 << 19)) return 15 + log2_4[n >> 15]; - else return 20 + log2_4[n >> 20]; - else if (n < (1 << 29)) return 25 + log2_4[n >> 25]; - else if (n < (1 << 31)) return 30 + log2_4[n >> 30]; - else return 0; // signed n returns 0 -} - -#ifndef M_PI - #define M_PI 3.14159265358979323846264f // from CRC -#endif - -// code length assigned to a value with no huffman encoding -#define NO_CODE 255 - -/////////////////////// LEAF SETUP FUNCTIONS ////////////////////////// -// -// these functions are only called at setup, and only a few times -// per file - -static float float32_unpack(uint32 x) -{ - // from the specification - uint32 mantissa = x & 0x1fffff; - uint32 sign = x & 0x80000000; - uint32 exp = (x & 0x7fe00000) >> 21; - double res = sign ? -(double)mantissa : (double)mantissa; - return (float) ldexp((float)res, exp-788); -} - - -// zlib & jpeg huffman tables assume that the output symbols -// can either be arbitrarily arranged, or have monotonically -// increasing frequencies--they rely on the lengths being sorted; -// this makes for a very simple generation algorithm. -// vorbis allows a huffman table with non-sorted lengths. This -// requires a more sophisticated construction, since symbols in -// order do not map to huffman codes "in order". -static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values) -{ - if (!c->sparse) { - c->codewords [symbol] = huff_code; - } else { - c->codewords [count] = huff_code; - c->codeword_lengths[count] = len; - values [count] = symbol; - } -} - -static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values) -{ - int i,k,m=0; - uint32 available[32]; - - memset(available, 0, sizeof(available)); - // find the first entry - for (k=0; k < n; ++k) if (len[k] < NO_CODE) break; - if (k == n) { assert(c->sorted_entries == 0); return TRUE; } - // add to the list - add_entry(c, 0, k, m++, len[k], values); - // add all available leaves - for (i=1; i <= len[k]; ++i) - available[i] = 1U << (32-i); - // note that the above code treats the first case specially, - // but it's really the same as the following code, so they - // could probably be combined (except the initial code is 0, - // and I use 0 in available[] to mean 'empty') - for (i=k+1; i < n; ++i) { - uint32 res; - int z = len[i], y; - if (z == NO_CODE) continue; - // find lowest available leaf (should always be earliest, - // which is what the specification calls for) - // note that this property, and the fact we can never have - // more than one free leaf at a given level, isn't totally - // trivial to prove, but it seems true and the assert never - // fires, so! - while (z > 0 && !available[z]) --z; - if (z == 0) { return FALSE; } - res = available[z]; - assert(z >= 0 && z < 32); - available[z] = 0; - add_entry(c, bit_reverse(res), i, m++, len[i], values); - // propogate availability up the tree - if (z != len[i]) { - assert(len[i] >= 0 && len[i] < 32); - for (y=len[i]; y > z; --y) { - assert(available[y] == 0); - available[y] = res + (1 << (32-y)); - } - } - } - return TRUE; -} - -// accelerated huffman table allows fast O(1) match of all symbols -// of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH -static void compute_accelerated_huffman(Codebook *c) -{ - int i, len; - for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) - c->fast_huffman[i] = -1; - - len = c->sparse ? c->sorted_entries : c->entries; - #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT - if (len > 32767) len = 32767; // largest possible value we can encode! - #endif - for (i=0; i < len; ++i) { - if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) { - uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i]; - // set table entries for all bit combinations in the higher bits - while (z < FAST_HUFFMAN_TABLE_SIZE) { - c->fast_huffman[z] = i; - z += 1 << c->codeword_lengths[i]; - } - } - } -} - -#ifdef _MSC_VER -#define STBV_CDECL __cdecl -#else -#define STBV_CDECL -#endif - -static int STBV_CDECL uint32_compare(const void *p, const void *q) -{ - uint32 x = * (uint32 *) p; - uint32 y = * (uint32 *) q; - return x < y ? -1 : x > y; -} - -static int include_in_sort(Codebook *c, uint8 len) -{ - if (c->sparse) { assert(len != NO_CODE); return TRUE; } - if (len == NO_CODE) return FALSE; - if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE; - return FALSE; -} - -// if the fast table above doesn't work, we want to binary -// search them... need to reverse the bits -static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values) -{ - int i, len; - // build a list of all the entries - // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN. - // this is kind of a frivolous optimization--I don't see any performance improvement, - // but it's like 4 extra lines of code, so. - if (!c->sparse) { - int k = 0; - for (i=0; i < c->entries; ++i) - if (include_in_sort(c, lengths[i])) - c->sorted_codewords[k++] = bit_reverse(c->codewords[i]); - assert(k == c->sorted_entries); - } else { - for (i=0; i < c->sorted_entries; ++i) - c->sorted_codewords[i] = bit_reverse(c->codewords[i]); - } - - qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare); - c->sorted_codewords[c->sorted_entries] = 0xffffffff; - - len = c->sparse ? c->sorted_entries : c->entries; - // now we need to indicate how they correspond; we could either - // #1: sort a different data structure that says who they correspond to - // #2: for each sorted entry, search the original list to find who corresponds - // #3: for each original entry, find the sorted entry - // #1 requires extra storage, #2 is slow, #3 can use binary search! - for (i=0; i < len; ++i) { - int huff_len = c->sparse ? lengths[values[i]] : lengths[i]; - if (include_in_sort(c,huff_len)) { - uint32 code = bit_reverse(c->codewords[i]); - int x=0, n=c->sorted_entries; - while (n > 1) { - // invariant: sc[x] <= code < sc[x+n] - int m = x + (n >> 1); - if (c->sorted_codewords[m] <= code) { - x = m; - n -= (n>>1); - } else { - n >>= 1; - } - } - assert(c->sorted_codewords[x] == code); - if (c->sparse) { - c->sorted_values[x] = values[i]; - c->codeword_lengths[x] = huff_len; - } else { - c->sorted_values[x] = i; - } - } - } -} - -// only run while parsing the header (3 times) -static int vorbis_validate(uint8 *data) -{ - static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' }; - return memcmp(data, vorbis, 6) == 0; -} - -// called from setup only, once per code book -// (formula implied by specification) -static int lookup1_values(int entries, int dim) -{ - int r = (int) floor(exp((float) log((float) entries) / dim)); - if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning; - ++r; // floor() to avoid _ftol() when non-CRT - assert(pow((float) r+1, dim) > entries); - assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above - return r; -} - -// called twice per file -static void compute_twiddle_factors(int n, float *A, float *B, float *C) -{ - int n4 = n >> 2, n8 = n >> 3; - int k,k2; - - for (k=k2=0; k < n4; ++k,k2+=2) { - A[k2 ] = (float) cos(4*k*M_PI/n); - A[k2+1] = (float) -sin(4*k*M_PI/n); - B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f; - B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f; - } - for (k=k2=0; k < n8; ++k,k2+=2) { - C[k2 ] = (float) cos(2*(k2+1)*M_PI/n); - C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n); - } -} - -static void compute_window(int n, float *window) -{ - int n2 = n >> 1, i; - for (i=0; i < n2; ++i) - window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI))); -} - -static void compute_bitreverse(int n, uint16 *rev) -{ - int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions - int i, n8 = n >> 3; - for (i=0; i < n8; ++i) - rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2; -} - -static int init_blocksize(vorb *f, int b, int n) -{ - int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3; - f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2); - f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2); - f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4); - if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem); - compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]); - f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2); - if (!f->window[b]) return error(f, VORBIS_outofmem); - compute_window(n, f->window[b]); - f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8); - if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem); - compute_bitreverse(n, f->bit_reverse[b]); - return TRUE; -} - -static void neighbors(uint16 *x, int n, int *plow, int *phigh) -{ - int low = -1; - int high = 65536; - int i; - for (i=0; i < n; ++i) { - if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; } - if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; } - } -} - -// this has been repurposed so y is now the original index instead of y -typedef struct -{ - uint16 x,y; -} Point; - -static int STBV_CDECL point_compare(const void *p, const void *q) -{ - Point *a = (Point *) p; - Point *b = (Point *) q; - return a->x < b->x ? -1 : a->x > b->x; -} - -// -/////////////////////// END LEAF SETUP FUNCTIONS ////////////////////////// - - -#if defined(STB_VORBIS_NO_STDIO) - #define USE_MEMORY(z) TRUE -#else - #define USE_MEMORY(z) ((z)->stream) -#endif - -static uint8 get8(vorb *z) -{ - if (USE_MEMORY(z)) { - if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; } - return *z->stream++; - } - - #ifndef STB_VORBIS_NO_STDIO - { - int c = fgetc(z->f); - if (c == EOF) { z->eof = TRUE; return 0; } - return c; - } - #endif -} - -static uint32 get32(vorb *f) -{ - uint32 x; - x = get8(f); - x += get8(f) << 8; - x += get8(f) << 16; - x += (uint32) get8(f) << 24; - return x; -} - -static int getn(vorb *z, uint8 *data, int n) -{ - if (USE_MEMORY(z)) { - if (z->stream+n > z->stream_end) { z->eof = 1; return 0; } - memcpy(data, z->stream, n); - z->stream += n; - return 1; - } - - #ifndef STB_VORBIS_NO_STDIO - if (fread(data, n, 1, z->f) == 1) - return 1; - else { - z->eof = 1; - return 0; - } - #endif -} - -static void skip(vorb *z, int n) -{ - if (USE_MEMORY(z)) { - z->stream += n; - if (z->stream >= z->stream_end) z->eof = 1; - return; - } - #ifndef STB_VORBIS_NO_STDIO - { - long x = ftell(z->f); - fseek(z->f, x+n, SEEK_SET); - } - #endif -} - -static int set_file_offset(stb_vorbis *f, unsigned int loc) -{ - #ifndef STB_VORBIS_NO_PUSHDATA_API - if (f->push_mode) return 0; - #endif - f->eof = 0; - if (USE_MEMORY(f)) { - if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) { - f->stream = f->stream_end; - f->eof = 1; - return 0; - } else { - f->stream = f->stream_start + loc; - return 1; - } - } - #ifndef STB_VORBIS_NO_STDIO - if (loc + f->f_start < loc || loc >= 0x80000000) { - loc = 0x7fffffff; - f->eof = 1; - } else { - loc += f->f_start; - } - if (!fseek(f->f, loc, SEEK_SET)) - return 1; - f->eof = 1; - fseek(f->f, f->f_start, SEEK_END); - return 0; - #endif -} - - -static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 }; - -static int capture_pattern(vorb *f) -{ - if (0x4f != get8(f)) return FALSE; - if (0x67 != get8(f)) return FALSE; - if (0x67 != get8(f)) return FALSE; - if (0x53 != get8(f)) return FALSE; - return TRUE; -} - -#define PAGEFLAG_continued_packet 1 -#define PAGEFLAG_first_page 2 -#define PAGEFLAG_last_page 4 - -static int start_page_no_capturepattern(vorb *f) -{ - uint32 loc0,loc1,n; - // stream structure version - if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version); - // header flag - f->page_flag = get8(f); - // absolute granule position - loc0 = get32(f); - loc1 = get32(f); - // @TODO: validate loc0,loc1 as valid positions? - // stream serial number -- vorbis doesn't interleave, so discard - get32(f); - //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number); - // page sequence number - n = get32(f); - f->last_page = n; - // CRC32 - get32(f); - // page_segments - f->segment_count = get8(f); - if (!getn(f, f->segments, f->segment_count)) - return error(f, VORBIS_unexpected_eof); - // assume we _don't_ know any the sample position of any segments - f->end_seg_with_known_loc = -2; - if (loc0 != ~0U || loc1 != ~0U) { - int i; - // determine which packet is the last one that will complete - for (i=f->segment_count-1; i >= 0; --i) - if (f->segments[i] < 255) - break; - // 'i' is now the index of the _last_ segment of a packet that ends - if (i >= 0) { - f->end_seg_with_known_loc = i; - f->known_loc_for_packet = loc0; - } - } - if (f->first_decode) { - int i,len; - ProbedPage p; - len = 0; - for (i=0; i < f->segment_count; ++i) - len += f->segments[i]; - len += 27 + f->segment_count; - p.page_start = f->first_audio_page_offset; - p.page_end = p.page_start + len; - p.last_decoded_sample = loc0; - f->p_first = p; - } - f->next_seg = 0; - return TRUE; -} - -static int start_page(vorb *f) -{ - if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern); - return start_page_no_capturepattern(f); -} - -static int start_packet(vorb *f) -{ - while (f->next_seg == -1) { - if (!start_page(f)) return FALSE; - if (f->page_flag & PAGEFLAG_continued_packet) - return error(f, VORBIS_continued_packet_flag_invalid); - } - f->last_seg = FALSE; - f->valid_bits = 0; - f->packet_bytes = 0; - f->bytes_in_seg = 0; - // f->next_seg is now valid - return TRUE; -} - -static int maybe_start_packet(vorb *f) -{ - if (f->next_seg == -1) { - int x = get8(f); - if (f->eof) return FALSE; // EOF at page boundary is not an error! - if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern); - if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern); - if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern); - if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern); - if (!start_page_no_capturepattern(f)) return FALSE; - if (f->page_flag & PAGEFLAG_continued_packet) { - // set up enough state that we can read this packet if we want, - // e.g. during recovery - f->last_seg = FALSE; - f->bytes_in_seg = 0; - return error(f, VORBIS_continued_packet_flag_invalid); - } - } - return start_packet(f); -} - -static int next_segment(vorb *f) -{ - int len; - if (f->last_seg) return 0; - if (f->next_seg == -1) { - f->last_seg_which = f->segment_count-1; // in case start_page fails - if (!start_page(f)) { f->last_seg = 1; return 0; } - if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid); - } - len = f->segments[f->next_seg++]; - if (len < 255) { - f->last_seg = TRUE; - f->last_seg_which = f->next_seg-1; - } - if (f->next_seg >= f->segment_count) - f->next_seg = -1; - assert(f->bytes_in_seg == 0); - f->bytes_in_seg = len; - return len; -} - -#define EOP (-1) -#define INVALID_BITS (-1) - -static int get8_packet_raw(vorb *f) -{ - if (!f->bytes_in_seg) { // CLANG! - if (f->last_seg) return EOP; - else if (!next_segment(f)) return EOP; - } - assert(f->bytes_in_seg > 0); - --f->bytes_in_seg; - ++f->packet_bytes; - return get8(f); -} - -static int get8_packet(vorb *f) -{ - int x = get8_packet_raw(f); - f->valid_bits = 0; - return x; -} - -static void flush_packet(vorb *f) -{ - while (get8_packet_raw(f) != EOP); -} - -// @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important -// as the huffman decoder? -static uint32 get_bits(vorb *f, int n) -{ - uint32 z; - - if (f->valid_bits < 0) return 0; - if (f->valid_bits < n) { - if (n > 24) { - // the accumulator technique below would not work correctly in this case - z = get_bits(f, 24); - z += get_bits(f, n-24) << 24; - return z; - } - if (f->valid_bits == 0) f->acc = 0; - while (f->valid_bits < n) { - int z = get8_packet_raw(f); - if (z == EOP) { - f->valid_bits = INVALID_BITS; - return 0; - } - f->acc += z << f->valid_bits; - f->valid_bits += 8; - } - } - if (f->valid_bits < 0) return 0; - z = f->acc & ((1 << n)-1); - f->acc >>= n; - f->valid_bits -= n; - return z; -} - -// @OPTIMIZE: primary accumulator for huffman -// expand the buffer to as many bits as possible without reading off end of packet -// it might be nice to allow f->valid_bits and f->acc to be stored in registers, -// e.g. cache them locally and decode locally -static __forceinline void prep_huffman(vorb *f) -{ - if (f->valid_bits <= 24) { - if (f->valid_bits == 0) f->acc = 0; - do { - int z; - if (f->last_seg && !f->bytes_in_seg) return; - z = get8_packet_raw(f); - if (z == EOP) return; - f->acc += (unsigned) z << f->valid_bits; - f->valid_bits += 8; - } while (f->valid_bits <= 24); - } -} - -enum -{ - VORBIS_packet_id = 1, - VORBIS_packet_comment = 3, - VORBIS_packet_setup = 5 -}; - -static int codebook_decode_scalar_raw(vorb *f, Codebook *c) -{ - int i; - prep_huffman(f); - - if (c->codewords == NULL && c->sorted_codewords == NULL) - return -1; - - // cases to use binary search: sorted_codewords && !c->codewords - // sorted_codewords && c->entries > 8 - if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) { - // binary search - uint32 code = bit_reverse(f->acc); - int x=0, n=c->sorted_entries, len; - - while (n > 1) { - // invariant: sc[x] <= code < sc[x+n] - int m = x + (n >> 1); - if (c->sorted_codewords[m] <= code) { - x = m; - n -= (n>>1); - } else { - n >>= 1; - } - } - // x is now the sorted index - if (!c->sparse) x = c->sorted_values[x]; - // x is now sorted index if sparse, or symbol otherwise - len = c->codeword_lengths[x]; - if (f->valid_bits >= len) { - f->acc >>= len; - f->valid_bits -= len; - return x; - } - - f->valid_bits = 0; - return -1; - } - - // if small, linear search - assert(!c->sparse); - for (i=0; i < c->entries; ++i) { - if (c->codeword_lengths[i] == NO_CODE) continue; - if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) { - if (f->valid_bits >= c->codeword_lengths[i]) { - f->acc >>= c->codeword_lengths[i]; - f->valid_bits -= c->codeword_lengths[i]; - return i; - } - f->valid_bits = 0; - return -1; - } - } - - error(f, VORBIS_invalid_stream); - f->valid_bits = 0; - return -1; -} - -#ifndef STB_VORBIS_NO_INLINE_DECODE - -#define DECODE_RAW(var, f,c) \ - if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \ - prep_huffman(f); \ - var = f->acc & FAST_HUFFMAN_TABLE_MASK; \ - var = c->fast_huffman[var]; \ - if (var >= 0) { \ - int n = c->codeword_lengths[var]; \ - f->acc >>= n; \ - f->valid_bits -= n; \ - if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \ - } else { \ - var = codebook_decode_scalar_raw(f,c); \ - } - -#else - -static int codebook_decode_scalar(vorb *f, Codebook *c) -{ - int i; - if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) - prep_huffman(f); - // fast huffman table lookup - i = f->acc & FAST_HUFFMAN_TABLE_MASK; - i = c->fast_huffman[i]; - if (i >= 0) { - f->acc >>= c->codeword_lengths[i]; - f->valid_bits -= c->codeword_lengths[i]; - if (f->valid_bits < 0) { f->valid_bits = 0; return -1; } - return i; - } - return codebook_decode_scalar_raw(f,c); -} - -#define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c); - -#endif - -#define DECODE(var,f,c) \ - DECODE_RAW(var,f,c) \ - if (c->sparse) var = c->sorted_values[var]; - -#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK - #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c) -#else - #define DECODE_VQ(var,f,c) DECODE(var,f,c) -#endif - - - - - - -// CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case -// where we avoid one addition -#define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off]) -#define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off]) -#define CODEBOOK_ELEMENT_BASE(c) (0) - -static int codebook_decode_start(vorb *f, Codebook *c) -{ - int z = -1; - - // type 0 is only legal in a scalar context - if (c->lookup_type == 0) - error(f, VORBIS_invalid_stream); - else { - DECODE_VQ(z,f,c); - if (c->sparse) assert(z < c->sorted_entries); - if (z < 0) { // check for EOP - if (!f->bytes_in_seg) - if (f->last_seg) - return z; - error(f, VORBIS_invalid_stream); - } - } - return z; -} - -static int codebook_decode(vorb *f, Codebook *c, float *output, int len) -{ - int i,z = codebook_decode_start(f,c); - if (z < 0) return FALSE; - if (len > c->dimensions) len = c->dimensions; - -#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK - if (c->lookup_type == 1) { - float last = CODEBOOK_ELEMENT_BASE(c); - int div = 1; - for (i=0; i < len; ++i) { - int off = (z / div) % c->lookup_values; - float val = CODEBOOK_ELEMENT_FAST(c,off) + last; - output[i] += val; - if (c->sequence_p) last = val + c->minimum_value; - div *= c->lookup_values; - } - return TRUE; - } -#endif - - z *= c->dimensions; - if (c->sequence_p) { - float last = CODEBOOK_ELEMENT_BASE(c); - for (i=0; i < len; ++i) { - float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; - output[i] += val; - last = val + c->minimum_value; - } - } else { - float last = CODEBOOK_ELEMENT_BASE(c); - for (i=0; i < len; ++i) { - output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last; - } - } - - return TRUE; -} - -static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step) -{ - int i,z = codebook_decode_start(f,c); - float last = CODEBOOK_ELEMENT_BASE(c); - if (z < 0) return FALSE; - if (len > c->dimensions) len = c->dimensions; - -#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK - if (c->lookup_type == 1) { - int div = 1; - for (i=0; i < len; ++i) { - int off = (z / div) % c->lookup_values; - float val = CODEBOOK_ELEMENT_FAST(c,off) + last; - output[i*step] += val; - if (c->sequence_p) last = val; - div *= c->lookup_values; - } - return TRUE; - } -#endif - - z *= c->dimensions; - for (i=0; i < len; ++i) { - float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; - output[i*step] += val; - if (c->sequence_p) last = val; - } - - return TRUE; -} - -static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode) -{ - int c_inter = *c_inter_p; - int p_inter = *p_inter_p; - int i,z, effective = c->dimensions; - - // type 0 is only legal in a scalar context - if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream); - - while (total_decode > 0) { - float last = CODEBOOK_ELEMENT_BASE(c); - DECODE_VQ(z,f,c); - #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK - assert(!c->sparse || z < c->sorted_entries); - #endif - if (z < 0) { - if (!f->bytes_in_seg) - if (f->last_seg) return FALSE; - return error(f, VORBIS_invalid_stream); - } - - // if this will take us off the end of the buffers, stop short! - // we check by computing the length of the virtual interleaved - // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter), - // and the length we'll be using (effective) - if (c_inter + p_inter*ch + effective > len * ch) { - effective = len*ch - (p_inter*ch - c_inter); - } - - #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK - if (c->lookup_type == 1) { - int div = 1; - for (i=0; i < effective; ++i) { - int off = (z / div) % c->lookup_values; - float val = CODEBOOK_ELEMENT_FAST(c,off) + last; - if (outputs[c_inter]) - outputs[c_inter][p_inter] += val; - if (++c_inter == ch) { c_inter = 0; ++p_inter; } - if (c->sequence_p) last = val; - div *= c->lookup_values; - } - } else - #endif - { - z *= c->dimensions; - if (c->sequence_p) { - for (i=0; i < effective; ++i) { - float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; - if (outputs[c_inter]) - outputs[c_inter][p_inter] += val; - if (++c_inter == ch) { c_inter = 0; ++p_inter; } - last = val; - } - } else { - for (i=0; i < effective; ++i) { - float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; - if (outputs[c_inter]) - outputs[c_inter][p_inter] += val; - if (++c_inter == ch) { c_inter = 0; ++p_inter; } - } - } - } - - total_decode -= effective; - } - *c_inter_p = c_inter; - *p_inter_p = p_inter; - return TRUE; -} - -static int predict_point(int x, int x0, int x1, int y0, int y1) -{ - int dy = y1 - y0; - int adx = x1 - x0; - // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86? - int err = abs(dy) * (x - x0); - int off = err / adx; - return dy < 0 ? y0 - off : y0 + off; -} - -// the following table is block-copied from the specification -static float inverse_db_table[256] = -{ - 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f, - 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f, - 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f, - 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f, - 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f, - 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f, - 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f, - 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f, - 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f, - 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f, - 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f, - 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f, - 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f, - 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f, - 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f, - 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f, - 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f, - 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f, - 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f, - 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f, - 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f, - 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f, - 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f, - 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f, - 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f, - 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f, - 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f, - 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f, - 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f, - 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f, - 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f, - 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f, - 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f, - 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f, - 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f, - 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f, - 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f, - 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f, - 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f, - 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f, - 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f, - 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f, - 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f, - 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f, - 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f, - 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f, - 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f, - 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f, - 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f, - 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f, - 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f, - 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f, - 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f, - 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f, - 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f, - 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f, - 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f, - 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f, - 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f, - 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f, - 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f, - 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f, - 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f, - 0.82788260f, 0.88168307f, 0.9389798f, 1.0f -}; - - -// @OPTIMIZE: if you want to replace this bresenham line-drawing routine, -// note that you must produce bit-identical output to decode correctly; -// this specific sequence of operations is specified in the spec (it's -// drawing integer-quantized frequency-space lines that the encoder -// expects to be exactly the same) -// ... also, isn't the whole point of Bresenham's algorithm to NOT -// have to divide in the setup? sigh. -#ifndef STB_VORBIS_NO_DEFER_FLOOR -#define LINE_OP(a,b) a *= b -#else -#define LINE_OP(a,b) a = b -#endif - -#ifdef STB_VORBIS_DIVIDE_TABLE -#define DIVTAB_NUMER 32 -#define DIVTAB_DENOM 64 -int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB -#endif - -static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n) -{ - int dy = y1 - y0; - int adx = x1 - x0; - int ady = abs(dy); - int base; - int x=x0,y=y0; - int err = 0; - int sy; - -#ifdef STB_VORBIS_DIVIDE_TABLE - if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) { - if (dy < 0) { - base = -integer_divide_table[ady][adx]; - sy = base-1; - } else { - base = integer_divide_table[ady][adx]; - sy = base+1; - } - } else { - base = dy / adx; - if (dy < 0) - sy = base - 1; - else - sy = base+1; - } -#else - base = dy / adx; - if (dy < 0) - sy = base - 1; - else - sy = base+1; -#endif - ady -= abs(base) * adx; - if (x1 > n) x1 = n; - if (x < x1) { - LINE_OP(output[x], inverse_db_table[y]); - for (++x; x < x1; ++x) { - err += ady; - if (err >= adx) { - err -= adx; - y += sy; - } else - y += base; - LINE_OP(output[x], inverse_db_table[y]); - } - } -} - -static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype) -{ - int k; - if (rtype == 0) { - int step = n / book->dimensions; - for (k=0; k < step; ++k) - if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) - return FALSE; - } else { - for (k=0; k < n; ) { - if (!codebook_decode(f, book, target+offset, n-k)) - return FALSE; - k += book->dimensions; - offset += book->dimensions; - } - } - return TRUE; -} - -static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode) -{ - int i,j,pass; - Residue *r = f->residue_config + rn; - int rtype = f->residue_types[rn]; - int c = r->classbook; - int classwords = f->codebooks[c].dimensions; - int n_read = r->end - r->begin; - int part_read = n_read / r->part_size; - int temp_alloc_point = temp_alloc_save(f); - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata)); - #else - int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications)); - #endif - - CHECK(f); - - for (i=0; i < ch; ++i) - if (!do_not_decode[i]) - memset(residue_buffers[i], 0, sizeof(float) * n); - - if (rtype == 2 && ch != 1) { - for (j=0; j < ch; ++j) - if (!do_not_decode[j]) - break; - if (j == ch) - goto done; - - for (pass=0; pass < 8; ++pass) { - int pcount = 0, class_set = 0; - if (ch == 2) { - while (pcount < part_read) { - int z = r->begin + pcount*r->part_size; - int c_inter = (z & 1), p_inter = z>>1; - if (pass == 0) { - Codebook *c = f->codebooks+r->classbook; - int q; - DECODE(q,f,c); - if (q == EOP) goto done; - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - part_classdata[0][class_set] = r->classdata[q]; - #else - for (i=classwords-1; i >= 0; --i) { - classifications[0][i+pcount] = q % r->classifications; - q /= r->classifications; - } - #endif - } - for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { - int z = r->begin + pcount*r->part_size; - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - int c = part_classdata[0][class_set][i]; - #else - int c = classifications[0][pcount]; - #endif - int b = r->residue_books[c][pass]; - if (b >= 0) { - Codebook *book = f->codebooks + b; - #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK - if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) - goto done; - #else - // saves 1% - if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) - goto done; - #endif - } else { - z += r->part_size; - c_inter = z & 1; - p_inter = z >> 1; - } - } - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - ++class_set; - #endif - } - } else if (ch == 1) { - while (pcount < part_read) { - int z = r->begin + pcount*r->part_size; - int c_inter = 0, p_inter = z; - if (pass == 0) { - Codebook *c = f->codebooks+r->classbook; - int q; - DECODE(q,f,c); - if (q == EOP) goto done; - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - part_classdata[0][class_set] = r->classdata[q]; - #else - for (i=classwords-1; i >= 0; --i) { - classifications[0][i+pcount] = q % r->classifications; - q /= r->classifications; - } - #endif - } - for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { - int z = r->begin + pcount*r->part_size; - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - int c = part_classdata[0][class_set][i]; - #else - int c = classifications[0][pcount]; - #endif - int b = r->residue_books[c][pass]; - if (b >= 0) { - Codebook *book = f->codebooks + b; - if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) - goto done; - } else { - z += r->part_size; - c_inter = 0; - p_inter = z; - } - } - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - ++class_set; - #endif - } - } else { - while (pcount < part_read) { - int z = r->begin + pcount*r->part_size; - int c_inter = z % ch, p_inter = z/ch; - if (pass == 0) { - Codebook *c = f->codebooks+r->classbook; - int q; - DECODE(q,f,c); - if (q == EOP) goto done; - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - part_classdata[0][class_set] = r->classdata[q]; - #else - for (i=classwords-1; i >= 0; --i) { - classifications[0][i+pcount] = q % r->classifications; - q /= r->classifications; - } - #endif - } - for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { - int z = r->begin + pcount*r->part_size; - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - int c = part_classdata[0][class_set][i]; - #else - int c = classifications[0][pcount]; - #endif - int b = r->residue_books[c][pass]; - if (b >= 0) { - Codebook *book = f->codebooks + b; - if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) - goto done; - } else { - z += r->part_size; - c_inter = z % ch; - p_inter = z / ch; - } - } - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - ++class_set; - #endif - } - } - } - goto done; - } - CHECK(f); - - for (pass=0; pass < 8; ++pass) { - int pcount = 0, class_set=0; - while (pcount < part_read) { - if (pass == 0) { - for (j=0; j < ch; ++j) { - if (!do_not_decode[j]) { - Codebook *c = f->codebooks+r->classbook; - int temp; - DECODE(temp,f,c); - if (temp == EOP) goto done; - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - part_classdata[j][class_set] = r->classdata[temp]; - #else - for (i=classwords-1; i >= 0; --i) { - classifications[j][i+pcount] = temp % r->classifications; - temp /= r->classifications; - } - #endif - } - } - } - for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { - for (j=0; j < ch; ++j) { - if (!do_not_decode[j]) { - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - int c = part_classdata[j][class_set][i]; - #else - int c = classifications[j][pcount]; - #endif - int b = r->residue_books[c][pass]; - if (b >= 0) { - float *target = residue_buffers[j]; - int offset = r->begin + pcount * r->part_size; - int n = r->part_size; - Codebook *book = f->codebooks + b; - if (!residue_decode(f, book, target, offset, n, rtype)) - goto done; - } - } - } - } - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - ++class_set; - #endif - } - } - done: - CHECK(f); - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - temp_free(f,part_classdata); - #else - temp_free(f,classifications); - #endif - temp_alloc_restore(f,temp_alloc_point); -} - - -#if 0 -// slow way for debugging -void inverse_mdct_slow(float *buffer, int n) -{ - int i,j; - int n2 = n >> 1; - float *x = (float *) malloc(sizeof(*x) * n2); - memcpy(x, buffer, sizeof(*x) * n2); - for (i=0; i < n; ++i) { - float acc = 0; - for (j=0; j < n2; ++j) - // formula from paper: - //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1)); - // formula from wikipedia - //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5)); - // these are equivalent, except the formula from the paper inverts the multiplier! - // however, what actually works is NO MULTIPLIER!?! - //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5)); - acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1)); - buffer[i] = acc; - } - free(x); -} -#elif 0 -// same as above, but just barely able to run in real time on modern machines -void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype) -{ - float mcos[16384]; - int i,j; - int n2 = n >> 1, nmask = (n << 2) -1; - float *x = (float *) malloc(sizeof(*x) * n2); - memcpy(x, buffer, sizeof(*x) * n2); - for (i=0; i < 4*n; ++i) - mcos[i] = (float) cos(M_PI / 2 * i / n); - - for (i=0; i < n; ++i) { - float acc = 0; - for (j=0; j < n2; ++j) - acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask]; - buffer[i] = acc; - } - free(x); -} -#elif 0 -// transform to use a slow dct-iv; this is STILL basically trivial, -// but only requires half as many ops -void dct_iv_slow(float *buffer, int n) -{ - float mcos[16384]; - float x[2048]; - int i,j; - int n2 = n >> 1, nmask = (n << 3) - 1; - memcpy(x, buffer, sizeof(*x) * n); - for (i=0; i < 8*n; ++i) - mcos[i] = (float) cos(M_PI / 4 * i / n); - for (i=0; i < n; ++i) { - float acc = 0; - for (j=0; j < n; ++j) - acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask]; - buffer[i] = acc; - } -} - -void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype) -{ - int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4; - float temp[4096]; - - memcpy(temp, buffer, n2 * sizeof(float)); - dct_iv_slow(temp, n2); // returns -c'-d, a-b' - - for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b' - for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d' - for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d -} -#endif - -#ifndef LIBVORBIS_MDCT -#define LIBVORBIS_MDCT 0 -#endif - -#if LIBVORBIS_MDCT -// directly call the vorbis MDCT using an interface documented -// by Jeff Roberts... useful for performance comparison -typedef struct -{ - int n; - int log2n; - - float *trig; - int *bitrev; - - float scale; -} mdct_lookup; - -extern void mdct_init(mdct_lookup *lookup, int n); -extern void mdct_clear(mdct_lookup *l); -extern void mdct_backward(mdct_lookup *init, float *in, float *out); - -mdct_lookup M1,M2; - -void inverse_mdct(float *buffer, int n, vorb *f, int blocktype) -{ - mdct_lookup *M; - if (M1.n == n) M = &M1; - else if (M2.n == n) M = &M2; - else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; } - else { - if (M2.n) __asm int 3; - mdct_init(&M2, n); - M = &M2; - } - - mdct_backward(M, buffer, buffer); -} -#endif - - -// the following were split out into separate functions while optimizing; -// they could be pushed back up but eh. __forceinline showed no change; -// they're probably already being inlined. -static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A) -{ - float *ee0 = e + i_off; - float *ee2 = ee0 + k_off; - int i; - - assert((n & 3) == 0); - for (i=(n>>2); i > 0; --i) { - float k00_20, k01_21; - k00_20 = ee0[ 0] - ee2[ 0]; - k01_21 = ee0[-1] - ee2[-1]; - ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0]; - ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1]; - ee2[ 0] = k00_20 * A[0] - k01_21 * A[1]; - ee2[-1] = k01_21 * A[0] + k00_20 * A[1]; - A += 8; - - k00_20 = ee0[-2] - ee2[-2]; - k01_21 = ee0[-3] - ee2[-3]; - ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2]; - ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3]; - ee2[-2] = k00_20 * A[0] - k01_21 * A[1]; - ee2[-3] = k01_21 * A[0] + k00_20 * A[1]; - A += 8; - - k00_20 = ee0[-4] - ee2[-4]; - k01_21 = ee0[-5] - ee2[-5]; - ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4]; - ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5]; - ee2[-4] = k00_20 * A[0] - k01_21 * A[1]; - ee2[-5] = k01_21 * A[0] + k00_20 * A[1]; - A += 8; - - k00_20 = ee0[-6] - ee2[-6]; - k01_21 = ee0[-7] - ee2[-7]; - ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6]; - ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7]; - ee2[-6] = k00_20 * A[0] - k01_21 * A[1]; - ee2[-7] = k01_21 * A[0] + k00_20 * A[1]; - A += 8; - ee0 -= 8; - ee2 -= 8; - } -} - -static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1) -{ - int i; - float k00_20, k01_21; - - float *e0 = e + d0; - float *e2 = e0 + k_off; - - for (i=lim >> 2; i > 0; --i) { - k00_20 = e0[-0] - e2[-0]; - k01_21 = e0[-1] - e2[-1]; - e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0]; - e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1]; - e2[-0] = (k00_20)*A[0] - (k01_21) * A[1]; - e2[-1] = (k01_21)*A[0] + (k00_20) * A[1]; - - A += k1; - - k00_20 = e0[-2] - e2[-2]; - k01_21 = e0[-3] - e2[-3]; - e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2]; - e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3]; - e2[-2] = (k00_20)*A[0] - (k01_21) * A[1]; - e2[-3] = (k01_21)*A[0] + (k00_20) * A[1]; - - A += k1; - - k00_20 = e0[-4] - e2[-4]; - k01_21 = e0[-5] - e2[-5]; - e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4]; - e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5]; - e2[-4] = (k00_20)*A[0] - (k01_21) * A[1]; - e2[-5] = (k01_21)*A[0] + (k00_20) * A[1]; - - A += k1; - - k00_20 = e0[-6] - e2[-6]; - k01_21 = e0[-7] - e2[-7]; - e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6]; - e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7]; - e2[-6] = (k00_20)*A[0] - (k01_21) * A[1]; - e2[-7] = (k01_21)*A[0] + (k00_20) * A[1]; - - e0 -= 8; - e2 -= 8; - - A += k1; - } -} - -static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0) -{ - int i; - float A0 = A[0]; - float A1 = A[0+1]; - float A2 = A[0+a_off]; - float A3 = A[0+a_off+1]; - float A4 = A[0+a_off*2+0]; - float A5 = A[0+a_off*2+1]; - float A6 = A[0+a_off*3+0]; - float A7 = A[0+a_off*3+1]; - - float k00,k11; - - float *ee0 = e +i_off; - float *ee2 = ee0+k_off; - - for (i=n; i > 0; --i) { - k00 = ee0[ 0] - ee2[ 0]; - k11 = ee0[-1] - ee2[-1]; - ee0[ 0] = ee0[ 0] + ee2[ 0]; - ee0[-1] = ee0[-1] + ee2[-1]; - ee2[ 0] = (k00) * A0 - (k11) * A1; - ee2[-1] = (k11) * A0 + (k00) * A1; - - k00 = ee0[-2] - ee2[-2]; - k11 = ee0[-3] - ee2[-3]; - ee0[-2] = ee0[-2] + ee2[-2]; - ee0[-3] = ee0[-3] + ee2[-3]; - ee2[-2] = (k00) * A2 - (k11) * A3; - ee2[-3] = (k11) * A2 + (k00) * A3; - - k00 = ee0[-4] - ee2[-4]; - k11 = ee0[-5] - ee2[-5]; - ee0[-4] = ee0[-4] + ee2[-4]; - ee0[-5] = ee0[-5] + ee2[-5]; - ee2[-4] = (k00) * A4 - (k11) * A5; - ee2[-5] = (k11) * A4 + (k00) * A5; - - k00 = ee0[-6] - ee2[-6]; - k11 = ee0[-7] - ee2[-7]; - ee0[-6] = ee0[-6] + ee2[-6]; - ee0[-7] = ee0[-7] + ee2[-7]; - ee2[-6] = (k00) * A6 - (k11) * A7; - ee2[-7] = (k11) * A6 + (k00) * A7; - - ee0 -= k0; - ee2 -= k0; - } -} - -static __forceinline void iter_54(float *z) -{ - float k00,k11,k22,k33; - float y0,y1,y2,y3; - - k00 = z[ 0] - z[-4]; - y0 = z[ 0] + z[-4]; - y2 = z[-2] + z[-6]; - k22 = z[-2] - z[-6]; - - z[-0] = y0 + y2; // z0 + z4 + z2 + z6 - z[-2] = y0 - y2; // z0 + z4 - z2 - z6 - - // done with y0,y2 - - k33 = z[-3] - z[-7]; - - z[-4] = k00 + k33; // z0 - z4 + z3 - z7 - z[-6] = k00 - k33; // z0 - z4 - z3 + z7 - - // done with k33 - - k11 = z[-1] - z[-5]; - y1 = z[-1] + z[-5]; - y3 = z[-3] + z[-7]; - - z[-1] = y1 + y3; // z1 + z5 + z3 + z7 - z[-3] = y1 - y3; // z1 + z5 - z3 - z7 - z[-5] = k11 - k22; // z1 - z5 + z2 - z6 - z[-7] = k11 + k22; // z1 - z5 - z2 + z6 -} - -static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n) -{ - int a_off = base_n >> 3; - float A2 = A[0+a_off]; - float *z = e + i_off; - float *base = z - 16 * n; - - while (z > base) { - float k00,k11; - - k00 = z[-0] - z[-8]; - k11 = z[-1] - z[-9]; - z[-0] = z[-0] + z[-8]; - z[-1] = z[-1] + z[-9]; - z[-8] = k00; - z[-9] = k11 ; - - k00 = z[ -2] - z[-10]; - k11 = z[ -3] - z[-11]; - z[ -2] = z[ -2] + z[-10]; - z[ -3] = z[ -3] + z[-11]; - z[-10] = (k00+k11) * A2; - z[-11] = (k11-k00) * A2; - - k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation - k11 = z[ -5] - z[-13]; - z[ -4] = z[ -4] + z[-12]; - z[ -5] = z[ -5] + z[-13]; - z[-12] = k11; - z[-13] = k00; - - k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation - k11 = z[ -7] - z[-15]; - z[ -6] = z[ -6] + z[-14]; - z[ -7] = z[ -7] + z[-15]; - z[-14] = (k00+k11) * A2; - z[-15] = (k00-k11) * A2; - - iter_54(z); - iter_54(z-8); - z -= 16; - } -} - -static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype) -{ - int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l; - int ld; - // @OPTIMIZE: reduce register pressure by using fewer variables? - int save_point = temp_alloc_save(f); - float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2)); - float *u=NULL,*v=NULL; - // twiddle factors - float *A = f->A[blocktype]; - - // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio" - // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function. - - // kernel from paper - - - // merged: - // copy and reflect spectral data - // step 0 - - // note that it turns out that the items added together during - // this step are, in fact, being added to themselves (as reflected - // by step 0). inexplicable inefficiency! this became obvious - // once I combined the passes. - - // so there's a missing 'times 2' here (for adding X to itself). - // this propogates through linearly to the end, where the numbers - // are 1/2 too small, and need to be compensated for. - - { - float *d,*e, *AA, *e_stop; - d = &buf2[n2-2]; - AA = A; - e = &buffer[0]; - e_stop = &buffer[n2]; - while (e != e_stop) { - d[1] = (e[0] * AA[0] - e[2]*AA[1]); - d[0] = (e[0] * AA[1] + e[2]*AA[0]); - d -= 2; - AA += 2; - e += 4; - } - - e = &buffer[n2-3]; - while (d >= buf2) { - d[1] = (-e[2] * AA[0] - -e[0]*AA[1]); - d[0] = (-e[2] * AA[1] + -e[0]*AA[0]); - d -= 2; - AA += 2; - e -= 4; - } - } - - // now we use symbolic names for these, so that we can - // possibly swap their meaning as we change which operations - // are in place - - u = buffer; - v = buf2; - - // step 2 (paper output is w, now u) - // this could be in place, but the data ends up in the wrong - // place... _somebody_'s got to swap it, so this is nominated - { - float *AA = &A[n2-8]; - float *d0,*d1, *e0, *e1; - - e0 = &v[n4]; - e1 = &v[0]; - - d0 = &u[n4]; - d1 = &u[0]; - - while (AA >= A) { - float v40_20, v41_21; - - v41_21 = e0[1] - e1[1]; - v40_20 = e0[0] - e1[0]; - d0[1] = e0[1] + e1[1]; - d0[0] = e0[0] + e1[0]; - d1[1] = v41_21*AA[4] - v40_20*AA[5]; - d1[0] = v40_20*AA[4] + v41_21*AA[5]; - - v41_21 = e0[3] - e1[3]; - v40_20 = e0[2] - e1[2]; - d0[3] = e0[3] + e1[3]; - d0[2] = e0[2] + e1[2]; - d1[3] = v41_21*AA[0] - v40_20*AA[1]; - d1[2] = v40_20*AA[0] + v41_21*AA[1]; - - AA -= 8; - - d0 += 4; - d1 += 4; - e0 += 4; - e1 += 4; - } - } - - // step 3 - ld = ilog(n) - 1; // ilog is off-by-one from normal definitions - - // optimized step 3: - - // the original step3 loop can be nested r inside s or s inside r; - // it's written originally as s inside r, but this is dumb when r - // iterates many times, and s few. So I have two copies of it and - // switch between them halfway. - - // this is iteration 0 of step 3 - imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A); - imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A); - - // this is iteration 1 of step 3 - imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16); - imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16); - imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16); - imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16); - - l=2; - for (; l < (ld-3)>>1; ++l) { - int k0 = n >> (l+2), k0_2 = k0>>1; - int lim = 1 << (l+1); - int i; - for (i=0; i < lim; ++i) - imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3)); - } - - for (; l < ld-6; ++l) { - int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1; - int rlim = n >> (l+6), r; - int lim = 1 << (l+1); - int i_off; - float *A0 = A; - i_off = n2-1; - for (r=rlim; r > 0; --r) { - imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0); - A0 += k1*4; - i_off -= 8; - } - } - - // iterations with count: - // ld-6,-5,-4 all interleaved together - // the big win comes from getting rid of needless flops - // due to the constants on pass 5 & 4 being all 1 and 0; - // combining them to be simultaneous to improve cache made little difference - imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n); - - // output is u - - // step 4, 5, and 6 - // cannot be in-place because of step 5 - { - uint16 *bitrev = f->bit_reverse[blocktype]; - // weirdly, I'd have thought reading sequentially and writing - // erratically would have been better than vice-versa, but in - // fact that's not what my testing showed. (That is, with - // j = bitreverse(i), do you read i and write j, or read j and write i.) - - float *d0 = &v[n4-4]; - float *d1 = &v[n2-4]; - while (d0 >= v) { - int k4; - - k4 = bitrev[0]; - d1[3] = u[k4+0]; - d1[2] = u[k4+1]; - d0[3] = u[k4+2]; - d0[2] = u[k4+3]; - - k4 = bitrev[1]; - d1[1] = u[k4+0]; - d1[0] = u[k4+1]; - d0[1] = u[k4+2]; - d0[0] = u[k4+3]; - - d0 -= 4; - d1 -= 4; - bitrev += 2; - } - } - // (paper output is u, now v) - - - // data must be in buf2 - assert(v == buf2); - - // step 7 (paper output is v, now v) - // this is now in place - { - float *C = f->C[blocktype]; - float *d, *e; - - d = v; - e = v + n2 - 4; - - while (d < e) { - float a02,a11,b0,b1,b2,b3; - - a02 = d[0] - e[2]; - a11 = d[1] + e[3]; - - b0 = C[1]*a02 + C[0]*a11; - b1 = C[1]*a11 - C[0]*a02; - - b2 = d[0] + e[ 2]; - b3 = d[1] - e[ 3]; - - d[0] = b2 + b0; - d[1] = b3 + b1; - e[2] = b2 - b0; - e[3] = b1 - b3; - - a02 = d[2] - e[0]; - a11 = d[3] + e[1]; - - b0 = C[3]*a02 + C[2]*a11; - b1 = C[3]*a11 - C[2]*a02; - - b2 = d[2] + e[ 0]; - b3 = d[3] - e[ 1]; - - d[2] = b2 + b0; - d[3] = b3 + b1; - e[0] = b2 - b0; - e[1] = b1 - b3; - - C += 4; - d += 4; - e -= 4; - } - } - - // data must be in buf2 - - - // step 8+decode (paper output is X, now buffer) - // this generates pairs of data a la 8 and pushes them directly through - // the decode kernel (pushing rather than pulling) to avoid having - // to make another pass later - - // this cannot POSSIBLY be in place, so we refer to the buffers directly - - { - float *d0,*d1,*d2,*d3; - - float *B = f->B[blocktype] + n2 - 8; - float *e = buf2 + n2 - 8; - d0 = &buffer[0]; - d1 = &buffer[n2-4]; - d2 = &buffer[n2]; - d3 = &buffer[n-4]; - while (e >= v) { - float p0,p1,p2,p3; - - p3 = e[6]*B[7] - e[7]*B[6]; - p2 = -e[6]*B[6] - e[7]*B[7]; - - d0[0] = p3; - d1[3] = - p3; - d2[0] = p2; - d3[3] = p2; - - p1 = e[4]*B[5] - e[5]*B[4]; - p0 = -e[4]*B[4] - e[5]*B[5]; - - d0[1] = p1; - d1[2] = - p1; - d2[1] = p0; - d3[2] = p0; - - p3 = e[2]*B[3] - e[3]*B[2]; - p2 = -e[2]*B[2] - e[3]*B[3]; - - d0[2] = p3; - d1[1] = - p3; - d2[2] = p2; - d3[1] = p2; - - p1 = e[0]*B[1] - e[1]*B[0]; - p0 = -e[0]*B[0] - e[1]*B[1]; - - d0[3] = p1; - d1[0] = - p1; - d2[3] = p0; - d3[0] = p0; - - B -= 8; - e -= 8; - d0 += 4; - d2 += 4; - d1 -= 4; - d3 -= 4; - } - } - - temp_free(f,buf2); - temp_alloc_restore(f,save_point); -} - -#if 0 -// this is the original version of the above code, if you want to optimize it from scratch -void inverse_mdct_naive(float *buffer, int n) -{ - float s; - float A[1 << 12], B[1 << 12], C[1 << 11]; - int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l; - int n3_4 = n - n4, ld; - // how can they claim this only uses N words?! - // oh, because they're only used sparsely, whoops - float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13]; - // set up twiddle factors - - for (k=k2=0; k < n4; ++k,k2+=2) { - A[k2 ] = (float) cos(4*k*M_PI/n); - A[k2+1] = (float) -sin(4*k*M_PI/n); - B[k2 ] = (float) cos((k2+1)*M_PI/n/2); - B[k2+1] = (float) sin((k2+1)*M_PI/n/2); - } - for (k=k2=0; k < n8; ++k,k2+=2) { - C[k2 ] = (float) cos(2*(k2+1)*M_PI/n); - C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n); - } - - // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio" - // Note there are bugs in that pseudocode, presumably due to them attempting - // to rename the arrays nicely rather than representing the way their actual - // implementation bounces buffers back and forth. As a result, even in the - // "some formulars corrected" version, a direct implementation fails. These - // are noted below as "paper bug". - - // copy and reflect spectral data - for (k=0; k < n2; ++k) u[k] = buffer[k]; - for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1]; - // kernel from paper - // step 1 - for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) { - v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1]; - v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2]; - } - // step 2 - for (k=k4=0; k < n8; k+=1, k4+=4) { - w[n2+3+k4] = v[n2+3+k4] + v[k4+3]; - w[n2+1+k4] = v[n2+1+k4] + v[k4+1]; - w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4]; - w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4]; - } - // step 3 - ld = ilog(n) - 1; // ilog is off-by-one from normal definitions - for (l=0; l < ld-3; ++l) { - int k0 = n >> (l+2), k1 = 1 << (l+3); - int rlim = n >> (l+4), r4, r; - int s2lim = 1 << (l+2), s2; - for (r=r4=0; r < rlim; r4+=4,++r) { - for (s2=0; s2 < s2lim; s2+=2) { - u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4]; - u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4]; - u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1] - - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1]; - u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1] - + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1]; - } - } - if (l+1 < ld-3) { - // paper bug: ping-ponging of u&w here is omitted - memcpy(w, u, sizeof(u)); - } - } - - // step 4 - for (i=0; i < n8; ++i) { - int j = bit_reverse(i) >> (32-ld+3); - assert(j < n8); - if (i == j) { - // paper bug: original code probably swapped in place; if copying, - // need to directly copy in this case - int i8 = i << 3; - v[i8+1] = u[i8+1]; - v[i8+3] = u[i8+3]; - v[i8+5] = u[i8+5]; - v[i8+7] = u[i8+7]; - } else if (i < j) { - int i8 = i << 3, j8 = j << 3; - v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1]; - v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3]; - v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5]; - v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7]; - } - } - // step 5 - for (k=0; k < n2; ++k) { - w[k] = v[k*2+1]; - } - // step 6 - for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) { - u[n-1-k2] = w[k4]; - u[n-2-k2] = w[k4+1]; - u[n3_4 - 1 - k2] = w[k4+2]; - u[n3_4 - 2 - k2] = w[k4+3]; - } - // step 7 - for (k=k2=0; k < n8; ++k, k2 += 2) { - v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2; - v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2; - v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2; - v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2; - } - // step 8 - for (k=k2=0; k < n4; ++k,k2 += 2) { - X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1]; - X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ]; - } - - // decode kernel to output - // determined the following value experimentally - // (by first figuring out what made inverse_mdct_slow work); then matching that here - // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?) - s = 0.5; // theoretically would be n4 - - // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code, - // so it needs to use the "old" B values to behave correctly, or else - // set s to 1.0 ]]] - for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4]; - for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1]; - for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4]; -} -#endif - -static float *get_window(vorb *f, int len) -{ - len <<= 1; - if (len == f->blocksize_0) return f->window[0]; - if (len == f->blocksize_1) return f->window[1]; - assert(0); - return NULL; -} - -#ifndef STB_VORBIS_NO_DEFER_FLOOR -typedef int16 YTYPE; -#else -typedef int YTYPE; -#endif -static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag) -{ - int n2 = n >> 1; - int s = map->chan[i].mux, floor; - floor = map->submap_floor[s]; - if (f->floor_types[floor] == 0) { - return error(f, VORBIS_invalid_stream); - } else { - Floor1 *g = &f->floor_config[floor].floor1; - int j,q; - int lx = 0, ly = finalY[0] * g->floor1_multiplier; - for (q=1; q < g->values; ++q) { - j = g->sorted_order[q]; - #ifndef STB_VORBIS_NO_DEFER_FLOOR - if (finalY[j] >= 0) - #else - if (step2_flag[j]) - #endif - { - int hy = finalY[j] * g->floor1_multiplier; - int hx = g->Xlist[j]; - if (lx != hx) - draw_line(target, lx,ly, hx,hy, n2); - CHECK(f); - lx = hx, ly = hy; - } - } - if (lx < n2) { - // optimization of: draw_line(target, lx,ly, n,ly, n2); - for (j=lx; j < n2; ++j) - LINE_OP(target[j], inverse_db_table[ly]); - CHECK(f); - } - } - return TRUE; -} - -// The meaning of "left" and "right" -// -// For a given frame: -// we compute samples from 0..n -// window_center is n/2 -// we'll window and mix the samples from left_start to left_end with data from the previous frame -// all of the samples from left_end to right_start can be output without mixing; however, -// this interval is 0-length except when transitioning between short and long frames -// all of the samples from right_start to right_end need to be mixed with the next frame, -// which we don't have, so those get saved in a buffer -// frame N's right_end-right_start, the number of samples to mix with the next frame, -// has to be the same as frame N+1's left_end-left_start (which they are by -// construction) - -static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode) -{ - Mode *m; - int i, n, prev, next, window_center; - f->channel_buffer_start = f->channel_buffer_end = 0; - - retry: - if (f->eof) return FALSE; - if (!maybe_start_packet(f)) - return FALSE; - // check packet type - if (get_bits(f,1) != 0) { - if (IS_PUSH_MODE(f)) - return error(f,VORBIS_bad_packet_type); - while (EOP != get8_packet(f)); - goto retry; - } - - if (f->alloc.alloc_buffer) - assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); - - i = get_bits(f, ilog(f->mode_count-1)); - if (i == EOP) return FALSE; - if (i >= f->mode_count) return FALSE; - *mode = i; - m = f->mode_config + i; - if (m->blockflag) { - n = f->blocksize_1; - prev = get_bits(f,1); - next = get_bits(f,1); - } else { - prev = next = 0; - n = f->blocksize_0; - } - -// WINDOWING - - window_center = n >> 1; - if (m->blockflag && !prev) { - *p_left_start = (n - f->blocksize_0) >> 2; - *p_left_end = (n + f->blocksize_0) >> 2; - } else { - *p_left_start = 0; - *p_left_end = window_center; - } - if (m->blockflag && !next) { - *p_right_start = (n*3 - f->blocksize_0) >> 2; - *p_right_end = (n*3 + f->blocksize_0) >> 2; - } else { - *p_right_start = window_center; - *p_right_end = n; - } - - return TRUE; -} - -static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left) -{ - Mapping *map; - int i,j,k,n,n2; - int zero_channel[256]; - int really_zero_channel[256]; - -// WINDOWING - - n = f->blocksize[m->blockflag]; - map = &f->mapping[m->mapping]; - -// FLOORS - n2 = n >> 1; - - CHECK(f); - - for (i=0; i < f->channels; ++i) { - int s = map->chan[i].mux, floor; - zero_channel[i] = FALSE; - floor = map->submap_floor[s]; - if (f->floor_types[floor] == 0) { - return error(f, VORBIS_invalid_stream); - } else { - Floor1 *g = &f->floor_config[floor].floor1; - if (get_bits(f, 1)) { - short *finalY; - uint8 step2_flag[256]; - static int range_list[4] = { 256, 128, 86, 64 }; - int range = range_list[g->floor1_multiplier-1]; - int offset = 2; - finalY = f->finalY[i]; - finalY[0] = get_bits(f, ilog(range)-1); - finalY[1] = get_bits(f, ilog(range)-1); - for (j=0; j < g->partitions; ++j) { - int pclass = g->partition_class_list[j]; - int cdim = g->class_dimensions[pclass]; - int cbits = g->class_subclasses[pclass]; - int csub = (1 << cbits)-1; - int cval = 0; - if (cbits) { - Codebook *c = f->codebooks + g->class_masterbooks[pclass]; - DECODE(cval,f,c); - } - for (k=0; k < cdim; ++k) { - int book = g->subclass_books[pclass][cval & csub]; - cval = cval >> cbits; - if (book >= 0) { - int temp; - Codebook *c = f->codebooks + book; - DECODE(temp,f,c); - finalY[offset++] = temp; - } else - finalY[offset++] = 0; - } - } - if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec - step2_flag[0] = step2_flag[1] = 1; - for (j=2; j < g->values; ++j) { - int low, high, pred, highroom, lowroom, room, val; - low = g->neighbors[j][0]; - high = g->neighbors[j][1]; - //neighbors(g->Xlist, j, &low, &high); - pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]); - val = finalY[j]; - highroom = range - pred; - lowroom = pred; - if (highroom < lowroom) - room = highroom * 2; - else - room = lowroom * 2; - if (val) { - step2_flag[low] = step2_flag[high] = 1; - step2_flag[j] = 1; - if (val >= room) - if (highroom > lowroom) - finalY[j] = val - lowroom + pred; - else - finalY[j] = pred - val + highroom - 1; - else - if (val & 1) - finalY[j] = pred - ((val+1)>>1); - else - finalY[j] = pred + (val>>1); - } else { - step2_flag[j] = 0; - finalY[j] = pred; - } - } - -#ifdef STB_VORBIS_NO_DEFER_FLOOR - do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag); -#else - // defer final floor computation until _after_ residue - for (j=0; j < g->values; ++j) { - if (!step2_flag[j]) - finalY[j] = -1; - } -#endif - } else { - error: - zero_channel[i] = TRUE; - } - // So we just defer everything else to later - - // at this point we've decoded the floor into buffer - } - } - CHECK(f); - // at this point we've decoded all floors - - if (f->alloc.alloc_buffer) - assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); - - // re-enable coupled channels if necessary - memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels); - for (i=0; i < map->coupling_steps; ++i) - if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) { - zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE; - } - - CHECK(f); -// RESIDUE DECODE - for (i=0; i < map->submaps; ++i) { - float *residue_buffers[STB_VORBIS_MAX_CHANNELS]; - int r; - uint8 do_not_decode[256]; - int ch = 0; - for (j=0; j < f->channels; ++j) { - if (map->chan[j].mux == i) { - if (zero_channel[j]) { - do_not_decode[ch] = TRUE; - residue_buffers[ch] = NULL; - } else { - do_not_decode[ch] = FALSE; - residue_buffers[ch] = f->channel_buffers[j]; - } - ++ch; - } - } - r = map->submap_residue[i]; - decode_residue(f, residue_buffers, ch, n2, r, do_not_decode); - } - - if (f->alloc.alloc_buffer) - assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); - CHECK(f); - -// INVERSE COUPLING - for (i = map->coupling_steps-1; i >= 0; --i) { - int n2 = n >> 1; - float *m = f->channel_buffers[map->chan[i].magnitude]; - float *a = f->channel_buffers[map->chan[i].angle ]; - for (j=0; j < n2; ++j) { - float a2,m2; - if (m[j] > 0) - if (a[j] > 0) - m2 = m[j], a2 = m[j] - a[j]; - else - a2 = m[j], m2 = m[j] + a[j]; - else - if (a[j] > 0) - m2 = m[j], a2 = m[j] + a[j]; - else - a2 = m[j], m2 = m[j] - a[j]; - m[j] = m2; - a[j] = a2; - } - } - CHECK(f); - - // finish decoding the floors -#ifndef STB_VORBIS_NO_DEFER_FLOOR - for (i=0; i < f->channels; ++i) { - if (really_zero_channel[i]) { - memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2); - } else { - do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL); - } - } -#else - for (i=0; i < f->channels; ++i) { - if (really_zero_channel[i]) { - memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2); - } else { - for (j=0; j < n2; ++j) - f->channel_buffers[i][j] *= f->floor_buffers[i][j]; - } - } -#endif - -// INVERSE MDCT - CHECK(f); - for (i=0; i < f->channels; ++i) - inverse_mdct(f->channel_buffers[i], n, f, m->blockflag); - CHECK(f); - - // this shouldn't be necessary, unless we exited on an error - // and want to flush to get to the next packet - flush_packet(f); - - if (f->first_decode) { - // assume we start so first non-discarded sample is sample 0 - // this isn't to spec, but spec would require us to read ahead - // and decode the size of all current frames--could be done, - // but presumably it's not a commonly used feature - f->current_loc = -n2; // start of first frame is positioned for discard - // we might have to discard samples "from" the next frame too, - // if we're lapping a large block then a small at the start? - f->discard_samples_deferred = n - right_end; - f->current_loc_valid = TRUE; - f->first_decode = FALSE; - } else if (f->discard_samples_deferred) { - if (f->discard_samples_deferred >= right_start - left_start) { - f->discard_samples_deferred -= (right_start - left_start); - left_start = right_start; - *p_left = left_start; - } else { - left_start += f->discard_samples_deferred; - *p_left = left_start; - f->discard_samples_deferred = 0; - } - } else if (f->previous_length == 0 && f->current_loc_valid) { - // we're recovering from a seek... that means we're going to discard - // the samples from this packet even though we know our position from - // the last page header, so we need to update the position based on - // the discarded samples here - // but wait, the code below is going to add this in itself even - // on a discard, so we don't need to do it here... - } - - // check if we have ogg information about the sample # for this packet - if (f->last_seg_which == f->end_seg_with_known_loc) { - // if we have a valid current loc, and this is final: - if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) { - uint32 current_end = f->known_loc_for_packet - (n-right_end); - // then let's infer the size of the (probably) short final frame - if (current_end < f->current_loc + (right_end-left_start)) { - if (current_end < f->current_loc) { - // negative truncation, that's impossible! - *len = 0; - } else { - *len = current_end - f->current_loc; - } - *len += left_start; - if (*len > right_end) *len = right_end; // this should never happen - f->current_loc += *len; - return TRUE; - } - } - // otherwise, just set our sample loc - // guess that the ogg granule pos refers to the _middle_ of the - // last frame? - // set f->current_loc to the position of left_start - f->current_loc = f->known_loc_for_packet - (n2-left_start); - f->current_loc_valid = TRUE; - } - if (f->current_loc_valid) - f->current_loc += (right_start - left_start); - - if (f->alloc.alloc_buffer) - assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); - *len = right_end; // ignore samples after the window goes to 0 - CHECK(f); - - return TRUE; -} - -static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right) -{ - int mode, left_end, right_end; - if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0; - return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left); -} - -static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right) -{ - int prev,i,j; - // we use right&left (the start of the right- and left-window sin()-regions) - // to determine how much to return, rather than inferring from the rules - // (same result, clearer code); 'left' indicates where our sin() window - // starts, therefore where the previous window's right edge starts, and - // therefore where to start mixing from the previous buffer. 'right' - // indicates where our sin() ending-window starts, therefore that's where - // we start saving, and where our returned-data ends. - - // mixin from previous window - if (f->previous_length) { - int i,j, n = f->previous_length; - float *w = get_window(f, n); - for (i=0; i < f->channels; ++i) { - for (j=0; j < n; ++j) - f->channel_buffers[i][left+j] = - f->channel_buffers[i][left+j]*w[ j] + - f->previous_window[i][ j]*w[n-1-j]; - } - } - - prev = f->previous_length; - - // last half of this data becomes previous window - f->previous_length = len - right; - - // @OPTIMIZE: could avoid this copy by double-buffering the - // output (flipping previous_window with channel_buffers), but - // then previous_window would have to be 2x as large, and - // channel_buffers couldn't be temp mem (although they're NOT - // currently temp mem, they could be (unless we want to level - // performance by spreading out the computation)) - for (i=0; i < f->channels; ++i) - for (j=0; right+j < len; ++j) - f->previous_window[i][j] = f->channel_buffers[i][right+j]; - - if (!prev) - // there was no previous packet, so this data isn't valid... - // this isn't entirely true, only the would-have-overlapped data - // isn't valid, but this seems to be what the spec requires - return 0; - - // truncate a short frame - if (len < right) right = len; - - f->samples_output += right-left; - - return right - left; -} - -static void vorbis_pump_first_frame(stb_vorbis *f) -{ - int len, right, left; - if (vorbis_decode_packet(f, &len, &left, &right)) - vorbis_finish_frame(f, len, left, right); -} - -#ifndef STB_VORBIS_NO_PUSHDATA_API -static int is_whole_packet_present(stb_vorbis *f, int end_page) -{ - // make sure that we have the packet available before continuing... - // this requires a full ogg parse, but we know we can fetch from f->stream - - // instead of coding this out explicitly, we could save the current read state, - // read the next packet with get8() until end-of-packet, check f->eof, then - // reset the state? but that would be slower, esp. since we'd have over 256 bytes - // of state to restore (primarily the page segment table) - - int s = f->next_seg, first = TRUE; - uint8 *p = f->stream; - - if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag - for (; s < f->segment_count; ++s) { - p += f->segments[s]; - if (f->segments[s] < 255) // stop at first short segment - break; - } - // either this continues, or it ends it... - if (end_page) - if (s < f->segment_count-1) return error(f, VORBIS_invalid_stream); - if (s == f->segment_count) - s = -1; // set 'crosses page' flag - if (p > f->stream_end) return error(f, VORBIS_need_more_data); - first = FALSE; - } - for (; s == -1;) { - uint8 *q; - int n; - - // check that we have the page header ready - if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data); - // validate the page - if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream); - if (p[4] != 0) return error(f, VORBIS_invalid_stream); - if (first) { // the first segment must NOT have 'continued_packet', later ones MUST - if (f->previous_length) - if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream); - // if no previous length, we're resynching, so we can come in on a continued-packet, - // which we'll just drop - } else { - if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream); - } - n = p[26]; // segment counts - q = p+27; // q points to segment table - p = q + n; // advance past header - // make sure we've read the segment table - if (p > f->stream_end) return error(f, VORBIS_need_more_data); - for (s=0; s < n; ++s) { - p += q[s]; - if (q[s] < 255) - break; - } - if (end_page) - if (s < n-1) return error(f, VORBIS_invalid_stream); - if (s == n) - s = -1; // set 'crosses page' flag - if (p > f->stream_end) return error(f, VORBIS_need_more_data); - first = FALSE; - } - return TRUE; -} -#endif // !STB_VORBIS_NO_PUSHDATA_API - -static int start_decoder(vorb *f) -{ - uint8 header[6], x,y; - int len,i,j,k, max_submaps = 0; - int longest_floorlist=0; - - // first page, first packet - - if (!start_page(f)) return FALSE; - // validate page flag - if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page); - if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page); - if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page); - // check for expected packet length - if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page); - if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page); - // read packet - // check packet header - if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page); - if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof); - if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page); - // vorbis_version - if (get32(f) != 0) return error(f, VORBIS_invalid_first_page); - f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page); - if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels); - f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page); - get32(f); // bitrate_maximum - get32(f); // bitrate_nominal - get32(f); // bitrate_minimum - x = get8(f); - { - int log0,log1; - log0 = x & 15; - log1 = x >> 4; - f->blocksize_0 = 1 << log0; - f->blocksize_1 = 1 << log1; - if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup); - if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup); - if (log0 > log1) return error(f, VORBIS_invalid_setup); - } - - // framing_flag - x = get8(f); - if (!(x & 1)) return error(f, VORBIS_invalid_first_page); - - // second packet! - if (!start_page(f)) return FALSE; - - if (!start_packet(f)) return FALSE; - do { - len = next_segment(f); - skip(f, len); - f->bytes_in_seg = 0; - } while (len); - - // third packet! - if (!start_packet(f)) return FALSE; - - #ifndef STB_VORBIS_NO_PUSHDATA_API - if (IS_PUSH_MODE(f)) { - if (!is_whole_packet_present(f, TRUE)) { - // convert error in ogg header to write type - if (f->error == VORBIS_invalid_stream) - f->error = VORBIS_invalid_setup; - return FALSE; - } - } - #endif - - crc32_init(); // always init it, to avoid multithread race conditions - - if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup); - for (i=0; i < 6; ++i) header[i] = get8_packet(f); - if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup); - - // codebooks - - f->codebook_count = get_bits(f,8) + 1; - f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count); - if (f->codebooks == NULL) return error(f, VORBIS_outofmem); - memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count); - for (i=0; i < f->codebook_count; ++i) { - uint32 *values; - int ordered, sorted_count; - int total=0; - uint8 *lengths; - Codebook *c = f->codebooks+i; - CHECK(f); - x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup); - x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup); - x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup); - x = get_bits(f, 8); - c->dimensions = (get_bits(f, 8)<<8) + x; - x = get_bits(f, 8); - y = get_bits(f, 8); - c->entries = (get_bits(f, 8)<<16) + (y<<8) + x; - ordered = get_bits(f,1); - c->sparse = ordered ? 0 : get_bits(f,1); - - if (c->dimensions == 0 && c->entries != 0) return error(f, VORBIS_invalid_setup); - - if (c->sparse) - lengths = (uint8 *) setup_temp_malloc(f, c->entries); - else - lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries); - - if (!lengths) return error(f, VORBIS_outofmem); - - if (ordered) { - int current_entry = 0; - int current_length = get_bits(f,5) + 1; - while (current_entry < c->entries) { - int limit = c->entries - current_entry; - int n = get_bits(f, ilog(limit)); - if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); } - memset(lengths + current_entry, current_length, n); - current_entry += n; - ++current_length; - } - } else { - for (j=0; j < c->entries; ++j) { - int present = c->sparse ? get_bits(f,1) : 1; - if (present) { - lengths[j] = get_bits(f, 5) + 1; - ++total; - if (lengths[j] == 32) - return error(f, VORBIS_invalid_setup); - } else { - lengths[j] = NO_CODE; - } - } - } - - if (c->sparse && total >= c->entries >> 2) { - // convert sparse items to non-sparse! - if (c->entries > (int) f->setup_temp_memory_required) - f->setup_temp_memory_required = c->entries; - - c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries); - if (c->codeword_lengths == NULL) return error(f, VORBIS_outofmem); - memcpy(c->codeword_lengths, lengths, c->entries); - setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs! - lengths = c->codeword_lengths; - c->sparse = 0; - } - - // compute the size of the sorted tables - if (c->sparse) { - sorted_count = total; - } else { - sorted_count = 0; - #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH - for (j=0; j < c->entries; ++j) - if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) - ++sorted_count; - #endif - } - - c->sorted_entries = sorted_count; - values = NULL; - - CHECK(f); - if (!c->sparse) { - c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries); - if (!c->codewords) return error(f, VORBIS_outofmem); - } else { - unsigned int size; - if (c->sorted_entries) { - c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries); - if (!c->codeword_lengths) return error(f, VORBIS_outofmem); - c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries); - if (!c->codewords) return error(f, VORBIS_outofmem); - values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries); - if (!values) return error(f, VORBIS_outofmem); - } - size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries; - if (size > f->setup_temp_memory_required) - f->setup_temp_memory_required = size; - } - - if (!compute_codewords(c, lengths, c->entries, values)) { - if (c->sparse) setup_temp_free(f, values, 0); - return error(f, VORBIS_invalid_setup); - } - - if (c->sorted_entries) { - // allocate an extra slot for sentinels - c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1)); - if (c->sorted_codewords == NULL) return error(f, VORBIS_outofmem); - // allocate an extra slot at the front so that c->sorted_values[-1] is defined - // so that we can catch that case without an extra if - c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1)); - if (c->sorted_values == NULL) return error(f, VORBIS_outofmem); - ++c->sorted_values; - c->sorted_values[-1] = -1; - compute_sorted_huffman(c, lengths, values); - } - - if (c->sparse) { - setup_temp_free(f, values, sizeof(*values)*c->sorted_entries); - setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries); - setup_temp_free(f, lengths, c->entries); - c->codewords = NULL; - } - - compute_accelerated_huffman(c); - - CHECK(f); - c->lookup_type = get_bits(f, 4); - if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup); - if (c->lookup_type > 0) { - uint16 *mults; - c->minimum_value = float32_unpack(get_bits(f, 32)); - c->delta_value = float32_unpack(get_bits(f, 32)); - c->value_bits = get_bits(f, 4)+1; - c->sequence_p = get_bits(f,1); - if (c->lookup_type == 1) { - c->lookup_values = lookup1_values(c->entries, c->dimensions); - } else { - c->lookup_values = c->entries * c->dimensions; - } - if (c->lookup_values == 0) return error(f, VORBIS_invalid_setup); - mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values); - if (mults == NULL) return error(f, VORBIS_outofmem); - for (j=0; j < (int) c->lookup_values; ++j) { - int q = get_bits(f, c->value_bits); - if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); } - mults[j] = q; - } - -#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK - if (c->lookup_type == 1) { - int len, sparse = c->sparse; - float last=0; - // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop - if (sparse) { - if (c->sorted_entries == 0) goto skip; - c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions); - } else - c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions); - if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); } - len = sparse ? c->sorted_entries : c->entries; - for (j=0; j < len; ++j) { - unsigned int z = sparse ? c->sorted_values[j] : j; - unsigned int div=1; - for (k=0; k < c->dimensions; ++k) { - int off = (z / div) % c->lookup_values; - float val = mults[off]; - val = mults[off]*c->delta_value + c->minimum_value + last; - c->multiplicands[j*c->dimensions + k] = val; - if (c->sequence_p) - last = val; - if (k+1 < c->dimensions) { - if (div > UINT_MAX / (unsigned int) c->lookup_values) { - setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); - return error(f, VORBIS_invalid_setup); - } - div *= c->lookup_values; - } - } - } - c->lookup_type = 2; - } - else -#endif - { - float last=0; - CHECK(f); - c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values); - if (c->multiplicands == NULL) { setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); } - for (j=0; j < (int) c->lookup_values; ++j) { - float val = mults[j] * c->delta_value + c->minimum_value + last; - c->multiplicands[j] = val; - if (c->sequence_p) - last = val; - } - } -#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK - skip:; -#endif - setup_temp_free(f, mults, sizeof(mults[0])*c->lookup_values); - - CHECK(f); - } - CHECK(f); - } - - // time domain transfers (notused) - - x = get_bits(f, 6) + 1; - for (i=0; i < x; ++i) { - uint32 z = get_bits(f, 16); - if (z != 0) return error(f, VORBIS_invalid_setup); - } - - // Floors - f->floor_count = get_bits(f, 6)+1; - f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config)); - if (f->floor_config == NULL) return error(f, VORBIS_outofmem); - for (i=0; i < f->floor_count; ++i) { - f->floor_types[i] = get_bits(f, 16); - if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup); - if (f->floor_types[i] == 0) { - Floor0 *g = &f->floor_config[i].floor0; - g->order = get_bits(f,8); - g->rate = get_bits(f,16); - g->bark_map_size = get_bits(f,16); - g->amplitude_bits = get_bits(f,6); - g->amplitude_offset = get_bits(f,8); - g->number_of_books = get_bits(f,4) + 1; - for (j=0; j < g->number_of_books; ++j) - g->book_list[j] = get_bits(f,8); - return error(f, VORBIS_feature_not_supported); - } else { - Point p[31*8+2]; - Floor1 *g = &f->floor_config[i].floor1; - int max_class = -1; - g->partitions = get_bits(f, 5); - for (j=0; j < g->partitions; ++j) { - g->partition_class_list[j] = get_bits(f, 4); - if (g->partition_class_list[j] > max_class) - max_class = g->partition_class_list[j]; - } - for (j=0; j <= max_class; ++j) { - g->class_dimensions[j] = get_bits(f, 3)+1; - g->class_subclasses[j] = get_bits(f, 2); - if (g->class_subclasses[j]) { - g->class_masterbooks[j] = get_bits(f, 8); - if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup); - } - for (k=0; k < 1 << g->class_subclasses[j]; ++k) { - g->subclass_books[j][k] = get_bits(f,8)-1; - if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup); - } - } - g->floor1_multiplier = get_bits(f,2)+1; - g->rangebits = get_bits(f,4); - g->Xlist[0] = 0; - g->Xlist[1] = 1 << g->rangebits; - g->values = 2; - for (j=0; j < g->partitions; ++j) { - int c = g->partition_class_list[j]; - for (k=0; k < g->class_dimensions[c]; ++k) { - g->Xlist[g->values] = get_bits(f, g->rangebits); - ++g->values; - } - } - // precompute the sorting - for (j=0; j < g->values; ++j) { - p[j].x = g->Xlist[j]; - p[j].y = j; - } - qsort(p, g->values, sizeof(p[0]), point_compare); - for (j=0; j < g->values; ++j) - g->sorted_order[j] = (uint8) p[j].y; - // precompute the neighbors - for (j=2; j < g->values; ++j) { - int low,hi; - neighbors(g->Xlist, j, &low,&hi); - g->neighbors[j][0] = low; - g->neighbors[j][1] = hi; - } - - if (g->values > longest_floorlist) - longest_floorlist = g->values; - } - } - - // Residue - f->residue_count = get_bits(f, 6)+1; - f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(f->residue_config[0])); - if (f->residue_config == NULL) return error(f, VORBIS_outofmem); - memset(f->residue_config, 0, f->residue_count * sizeof(f->residue_config[0])); - for (i=0; i < f->residue_count; ++i) { - uint8 residue_cascade[64]; - Residue *r = f->residue_config+i; - f->residue_types[i] = get_bits(f, 16); - if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup); - r->begin = get_bits(f, 24); - r->end = get_bits(f, 24); - if (r->end < r->begin) return error(f, VORBIS_invalid_setup); - r->part_size = get_bits(f,24)+1; - r->classifications = get_bits(f,6)+1; - r->classbook = get_bits(f,8); - if (r->classbook >= f->codebook_count) return error(f, VORBIS_invalid_setup); - for (j=0; j < r->classifications; ++j) { - uint8 high_bits=0; - uint8 low_bits=get_bits(f,3); - if (get_bits(f,1)) - high_bits = get_bits(f,5); - residue_cascade[j] = high_bits*8 + low_bits; - } - r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications); - if (r->residue_books == NULL) return error(f, VORBIS_outofmem); - for (j=0; j < r->classifications; ++j) { - for (k=0; k < 8; ++k) { - if (residue_cascade[j] & (1 << k)) { - r->residue_books[j][k] = get_bits(f, 8); - if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup); - } else { - r->residue_books[j][k] = -1; - } - } - } - // precompute the classifications[] array to avoid inner-loop mod/divide - // call it 'classdata' since we already have r->classifications - r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries); - if (!r->classdata) return error(f, VORBIS_outofmem); - memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries); - for (j=0; j < f->codebooks[r->classbook].entries; ++j) { - int classwords = f->codebooks[r->classbook].dimensions; - int temp = j; - r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords); - if (r->classdata[j] == NULL) return error(f, VORBIS_outofmem); - for (k=classwords-1; k >= 0; --k) { - r->classdata[j][k] = temp % r->classifications; - temp /= r->classifications; - } - } - } - - f->mapping_count = get_bits(f,6)+1; - f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping)); - if (f->mapping == NULL) return error(f, VORBIS_outofmem); - memset(f->mapping, 0, f->mapping_count * sizeof(*f->mapping)); - for (i=0; i < f->mapping_count; ++i) { - Mapping *m = f->mapping + i; - int mapping_type = get_bits(f,16); - if (mapping_type != 0) return error(f, VORBIS_invalid_setup); - m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan)); - if (m->chan == NULL) return error(f, VORBIS_outofmem); - if (get_bits(f,1)) - m->submaps = get_bits(f,4)+1; - else - m->submaps = 1; - if (m->submaps > max_submaps) - max_submaps = m->submaps; - if (get_bits(f,1)) { - m->coupling_steps = get_bits(f,8)+1; - for (k=0; k < m->coupling_steps; ++k) { - m->chan[k].magnitude = get_bits(f, ilog(f->channels-1)); - m->chan[k].angle = get_bits(f, ilog(f->channels-1)); - if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup); - if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup); - if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup); - } - } else - m->coupling_steps = 0; - - // reserved field - if (get_bits(f,2)) return error(f, VORBIS_invalid_setup); - if (m->submaps > 1) { - for (j=0; j < f->channels; ++j) { - m->chan[j].mux = get_bits(f, 4); - if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup); - } - } else - // @SPECIFICATION: this case is missing from the spec - for (j=0; j < f->channels; ++j) - m->chan[j].mux = 0; - - for (j=0; j < m->submaps; ++j) { - get_bits(f,8); // discard - m->submap_floor[j] = get_bits(f,8); - m->submap_residue[j] = get_bits(f,8); - if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup); - if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup); - } - } - - // Modes - f->mode_count = get_bits(f, 6)+1; - for (i=0; i < f->mode_count; ++i) { - Mode *m = f->mode_config+i; - m->blockflag = get_bits(f,1); - m->windowtype = get_bits(f,16); - m->transformtype = get_bits(f,16); - m->mapping = get_bits(f,8); - if (m->windowtype != 0) return error(f, VORBIS_invalid_setup); - if (m->transformtype != 0) return error(f, VORBIS_invalid_setup); - if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup); - } - - flush_packet(f); - - f->previous_length = 0; - - for (i=0; i < f->channels; ++i) { - f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1); - f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2); - f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist); - if (f->channel_buffers[i] == NULL || f->previous_window[i] == NULL || f->finalY[i] == NULL) return error(f, VORBIS_outofmem); - #ifdef STB_VORBIS_NO_DEFER_FLOOR - f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2); - if (f->floor_buffers[i] == NULL) return error(f, VORBIS_outofmem); - #endif - } - - if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE; - if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE; - f->blocksize[0] = f->blocksize_0; - f->blocksize[1] = f->blocksize_1; - -#ifdef STB_VORBIS_DIVIDE_TABLE - if (integer_divide_table[1][1]==0) - for (i=0; i < DIVTAB_NUMER; ++i) - for (j=1; j < DIVTAB_DENOM; ++j) - integer_divide_table[i][j] = i / j; -#endif - - // compute how much temporary memory is needed - - // 1. - { - uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1); - uint32 classify_mem; - int i,max_part_read=0; - for (i=0; i < f->residue_count; ++i) { - Residue *r = f->residue_config + i; - int n_read = r->end - r->begin; - int part_read = n_read / r->part_size; - if (part_read > max_part_read) - max_part_read = part_read; - } - #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE - classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *)); - #else - classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *)); - #endif - - f->temp_memory_required = classify_mem; - if (imdct_mem > f->temp_memory_required) - f->temp_memory_required = imdct_mem; - } - - f->first_decode = TRUE; - - if (f->alloc.alloc_buffer) { - assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes); - // check if there's enough temp memory so we don't error later - if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset) - return error(f, VORBIS_outofmem); - } - - f->first_audio_page_offset = stb_vorbis_get_file_offset(f); - - return TRUE; -} - -static void vorbis_deinit(stb_vorbis *p) -{ - int i,j; - if (p->residue_config) { - for (i=0; i < p->residue_count; ++i) { - Residue *r = p->residue_config+i; - if (r->classdata) { - for (j=0; j < p->codebooks[r->classbook].entries; ++j) - setup_free(p, r->classdata[j]); - setup_free(p, r->classdata); - } - setup_free(p, r->residue_books); - } - } - - if (p->codebooks) { - CHECK(p); - for (i=0; i < p->codebook_count; ++i) { - Codebook *c = p->codebooks + i; - setup_free(p, c->codeword_lengths); - setup_free(p, c->multiplicands); - setup_free(p, c->codewords); - setup_free(p, c->sorted_codewords); - // c->sorted_values[-1] is the first entry in the array - setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL); - } - setup_free(p, p->codebooks); - } - setup_free(p, p->floor_config); - setup_free(p, p->residue_config); - if (p->mapping) { - for (i=0; i < p->mapping_count; ++i) - setup_free(p, p->mapping[i].chan); - setup_free(p, p->mapping); - } - CHECK(p); - for (i=0; i < p->channels && i < STB_VORBIS_MAX_CHANNELS; ++i) { - setup_free(p, p->channel_buffers[i]); - setup_free(p, p->previous_window[i]); - #ifdef STB_VORBIS_NO_DEFER_FLOOR - setup_free(p, p->floor_buffers[i]); - #endif - setup_free(p, p->finalY[i]); - } - for (i=0; i < 2; ++i) { - setup_free(p, p->A[i]); - setup_free(p, p->B[i]); - setup_free(p, p->C[i]); - setup_free(p, p->window[i]); - setup_free(p, p->bit_reverse[i]); - } - #ifndef STB_VORBIS_NO_STDIO - if (p->close_on_free) fclose(p->f); - #endif -} - -void stb_vorbis_close(stb_vorbis *p) -{ - if (p == NULL) return; - vorbis_deinit(p); - setup_free(p,p); -} - -static void vorbis_init(stb_vorbis *p, const stb_vorbis_alloc *z) -{ - memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start - if (z) { - p->alloc = *z; - p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3; - p->temp_offset = p->alloc.alloc_buffer_length_in_bytes; - } - p->eof = 0; - p->error = VORBIS__no_error; - p->stream = NULL; - p->codebooks = NULL; - p->page_crc_tests = -1; - #ifndef STB_VORBIS_NO_STDIO - p->close_on_free = FALSE; - p->f = NULL; - #endif -} - -int stb_vorbis_get_sample_offset(stb_vorbis *f) -{ - if (f->current_loc_valid) - return f->current_loc; - else - return -1; -} - -stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f) -{ - stb_vorbis_info d; - d.channels = f->channels; - d.sample_rate = f->sample_rate; - d.setup_memory_required = f->setup_memory_required; - d.setup_temp_memory_required = f->setup_temp_memory_required; - d.temp_memory_required = f->temp_memory_required; - d.max_frame_size = f->blocksize_1 >> 1; - return d; -} - -int stb_vorbis_get_error(stb_vorbis *f) -{ - int e = f->error; - f->error = VORBIS__no_error; - return e; -} - -static stb_vorbis * vorbis_alloc(stb_vorbis *f) -{ - stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p)); - return p; -} - -#ifndef STB_VORBIS_NO_PUSHDATA_API - -void stb_vorbis_flush_pushdata(stb_vorbis *f) -{ - f->previous_length = 0; - f->page_crc_tests = 0; - f->discard_samples_deferred = 0; - f->current_loc_valid = FALSE; - f->first_decode = FALSE; - f->samples_output = 0; - f->channel_buffer_start = 0; - f->channel_buffer_end = 0; -} - -static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len) -{ - int i,n; - for (i=0; i < f->page_crc_tests; ++i) - f->scan[i].bytes_done = 0; - - // if we have room for more scans, search for them first, because - // they may cause us to stop early if their header is incomplete - if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) { - if (data_len < 4) return 0; - data_len -= 3; // need to look for 4-byte sequence, so don't miss - // one that straddles a boundary - for (i=0; i < data_len; ++i) { - if (data[i] == 0x4f) { - if (0==memcmp(data+i, ogg_page_header, 4)) { - int j,len; - uint32 crc; - // make sure we have the whole page header - if (i+26 >= data_len || i+27+data[i+26] >= data_len) { - // only read up to this page start, so hopefully we'll - // have the whole page header start next time - data_len = i; - break; - } - // ok, we have it all; compute the length of the page - len = 27 + data[i+26]; - for (j=0; j < data[i+26]; ++j) - len += data[i+27+j]; - // scan everything up to the embedded crc (which we must 0) - crc = 0; - for (j=0; j < 22; ++j) - crc = crc32_update(crc, data[i+j]); - // now process 4 0-bytes - for ( ; j < 26; ++j) - crc = crc32_update(crc, 0); - // len is the total number of bytes we need to scan - n = f->page_crc_tests++; - f->scan[n].bytes_left = len-j; - f->scan[n].crc_so_far = crc; - f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24); - // if the last frame on a page is continued to the next, then - // we can't recover the sample_loc immediately - if (data[i+27+data[i+26]-1] == 255) - f->scan[n].sample_loc = ~0; - else - f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24); - f->scan[n].bytes_done = i+j; - if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT) - break; - // keep going if we still have room for more - } - } - } - } - - for (i=0; i < f->page_crc_tests;) { - uint32 crc; - int j; - int n = f->scan[i].bytes_done; - int m = f->scan[i].bytes_left; - if (m > data_len - n) m = data_len - n; - // m is the bytes to scan in the current chunk - crc = f->scan[i].crc_so_far; - for (j=0; j < m; ++j) - crc = crc32_update(crc, data[n+j]); - f->scan[i].bytes_left -= m; - f->scan[i].crc_so_far = crc; - if (f->scan[i].bytes_left == 0) { - // does it match? - if (f->scan[i].crc_so_far == f->scan[i].goal_crc) { - // Houston, we have page - data_len = n+m; // consumption amount is wherever that scan ended - f->page_crc_tests = -1; // drop out of page scan mode - f->previous_length = 0; // decode-but-don't-output one frame - f->next_seg = -1; // start a new page - f->current_loc = f->scan[i].sample_loc; // set the current sample location - // to the amount we'd have decoded had we decoded this page - f->current_loc_valid = f->current_loc != ~0U; - return data_len; - } - // delete entry - f->scan[i] = f->scan[--f->page_crc_tests]; - } else { - ++i; - } - } - - return data_len; -} - -// return value: number of bytes we used -int stb_vorbis_decode_frame_pushdata( - stb_vorbis *f, // the file we're decoding - const uint8 *data, int data_len, // the memory available for decoding - int *channels, // place to write number of float * buffers - float ***output, // place to write float ** array of float * buffers - int *samples // place to write number of output samples - ) -{ - int i; - int len,right,left; - - if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); - - if (f->page_crc_tests >= 0) { - *samples = 0; - return vorbis_search_for_page_pushdata(f, (uint8 *) data, data_len); - } - - f->stream = (uint8 *) data; - f->stream_end = (uint8 *) data + data_len; - f->error = VORBIS__no_error; - - // check that we have the entire packet in memory - if (!is_whole_packet_present(f, FALSE)) { - *samples = 0; - return 0; - } - - if (!vorbis_decode_packet(f, &len, &left, &right)) { - // save the actual error we encountered - enum STBVorbisError error = f->error; - if (error == VORBIS_bad_packet_type) { - // flush and resynch - f->error = VORBIS__no_error; - while (get8_packet(f) != EOP) - if (f->eof) break; - *samples = 0; - return (int) (f->stream - data); - } - if (error == VORBIS_continued_packet_flag_invalid) { - if (f->previous_length == 0) { - // we may be resynching, in which case it's ok to hit one - // of these; just discard the packet - f->error = VORBIS__no_error; - while (get8_packet(f) != EOP) - if (f->eof) break; - *samples = 0; - return (int) (f->stream - data); - } - } - // if we get an error while parsing, what to do? - // well, it DEFINITELY won't work to continue from where we are! - stb_vorbis_flush_pushdata(f); - // restore the error that actually made us bail - f->error = error; - *samples = 0; - return 1; - } - - // success! - len = vorbis_finish_frame(f, len, left, right); - for (i=0; i < f->channels; ++i) - f->outputs[i] = f->channel_buffers[i] + left; - - if (channels) *channels = f->channels; - *samples = len; - *output = f->outputs; - return (int) (f->stream - data); -} - -stb_vorbis *stb_vorbis_open_pushdata( - const unsigned char *data, int data_len, // the memory available for decoding - int *data_used, // only defined if result is not NULL - int *error, const stb_vorbis_alloc *alloc) -{ - stb_vorbis *f, p; - vorbis_init(&p, alloc); - p.stream = (uint8 *) data; - p.stream_end = (uint8 *) data + data_len; - p.push_mode = TRUE; - if (!start_decoder(&p)) { - if (p.eof) - *error = VORBIS_need_more_data; - else - *error = p.error; - return NULL; - } - f = vorbis_alloc(&p); - if (f) { - *f = p; - *data_used = (int) (f->stream - data); - *error = 0; - return f; - } else { - vorbis_deinit(&p); - return NULL; - } -} -#endif // STB_VORBIS_NO_PUSHDATA_API - -unsigned int stb_vorbis_get_file_offset(stb_vorbis *f) -{ - #ifndef STB_VORBIS_NO_PUSHDATA_API - if (f->push_mode) return 0; - #endif - if (USE_MEMORY(f)) return (unsigned int) (f->stream - f->stream_start); - #ifndef STB_VORBIS_NO_STDIO - return (unsigned int) (ftell(f->f) - f->f_start); - #endif -} - -#ifndef STB_VORBIS_NO_PULLDATA_API -// -// DATA-PULLING API -// - -static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last) -{ - for(;;) { - int n; - if (f->eof) return 0; - n = get8(f); - if (n == 0x4f) { // page header candidate - unsigned int retry_loc = stb_vorbis_get_file_offset(f); - int i; - // check if we're off the end of a file_section stream - if (retry_loc - 25 > f->stream_len) - return 0; - // check the rest of the header - for (i=1; i < 4; ++i) - if (get8(f) != ogg_page_header[i]) - break; - if (f->eof) return 0; - if (i == 4) { - uint8 header[27]; - uint32 i, crc, goal, len; - for (i=0; i < 4; ++i) - header[i] = ogg_page_header[i]; - for (; i < 27; ++i) - header[i] = get8(f); - if (f->eof) return 0; - if (header[4] != 0) goto invalid; - goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24); - for (i=22; i < 26; ++i) - header[i] = 0; - crc = 0; - for (i=0; i < 27; ++i) - crc = crc32_update(crc, header[i]); - len = 0; - for (i=0; i < header[26]; ++i) { - int s = get8(f); - crc = crc32_update(crc, s); - len += s; - } - if (len && f->eof) return 0; - for (i=0; i < len; ++i) - crc = crc32_update(crc, get8(f)); - // finished parsing probable page - if (crc == goal) { - // we could now check that it's either got the last - // page flag set, OR it's followed by the capture - // pattern, but I guess TECHNICALLY you could have - // a file with garbage between each ogg page and recover - // from it automatically? So even though that paranoia - // might decrease the chance of an invalid decode by - // another 2^32, not worth it since it would hose those - // invalid-but-useful files? - if (end) - *end = stb_vorbis_get_file_offset(f); - if (last) { - if (header[5] & 0x04) - *last = 1; - else - *last = 0; - } - set_file_offset(f, retry_loc-1); - return 1; - } - } - invalid: - // not a valid page, so rewind and look for next one - set_file_offset(f, retry_loc); - } - } -} - - -#define SAMPLE_unknown 0xffffffff - -// seeking is implemented with a binary search, which narrows down the range to -// 64K, before using a linear search (because finding the synchronization -// pattern can be expensive, and the chance we'd find the end page again is -// relatively high for small ranges) -// -// two initial interpolation-style probes are used at the start of the search -// to try to bound either side of the binary search sensibly, while still -// working in O(log n) time if they fail. - -static int get_seek_page_info(stb_vorbis *f, ProbedPage *z) -{ - uint8 header[27], lacing[255]; - int i,len; - - // record where the page starts - z->page_start = stb_vorbis_get_file_offset(f); - - // parse the header - getn(f, header, 27); - if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') - return 0; - getn(f, lacing, header[26]); - - // determine the length of the payload - len = 0; - for (i=0; i < header[26]; ++i) - len += lacing[i]; - - // this implies where the page ends - z->page_end = z->page_start + 27 + header[26] + len; - - // read the last-decoded sample out of the data - z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 24); - - // restore file state to where we were - set_file_offset(f, z->page_start); - return 1; -} - -// rarely used function to seek back to the preceeding page while finding the -// start of a packet -static int go_to_page_before(stb_vorbis *f, unsigned int limit_offset) -{ - unsigned int previous_safe, end; - - // now we want to seek back 64K from the limit - if (limit_offset >= 65536 && limit_offset-65536 >= f->first_audio_page_offset) - previous_safe = limit_offset - 65536; - else - previous_safe = f->first_audio_page_offset; - - set_file_offset(f, previous_safe); - - while (vorbis_find_page(f, &end, NULL)) { - if (end >= limit_offset && stb_vorbis_get_file_offset(f) < limit_offset) - return 1; - set_file_offset(f, end); - } - - return 0; -} - -// implements the search logic for finding a page and starting decoding. if -// the function succeeds, current_loc_valid will be true and current_loc will -// be less than or equal to the provided sample number (the closer the -// better). -static int seek_to_sample_coarse(stb_vorbis *f, uint32 sample_number) -{ - ProbedPage left, right, mid; - int i, start_seg_with_known_loc, end_pos, page_start; - uint32 delta, stream_length, padding; - double offset, bytes_per_sample; - int probe = 0; - - // find the last page and validate the target sample - stream_length = stb_vorbis_stream_length_in_samples(f); - if (stream_length == 0) return error(f, VORBIS_seek_without_length); - if (sample_number > stream_length) return error(f, VORBIS_seek_invalid); - - // this is the maximum difference between the window-center (which is the - // actual granule position value), and the right-start (which the spec - // indicates should be the granule position (give or take one)). - padding = ((f->blocksize_1 - f->blocksize_0) >> 2); - if (sample_number < padding) - sample_number = 0; - else - sample_number -= padding; - - left = f->p_first; - while (left.last_decoded_sample == ~0U) { - // (untested) the first page does not have a 'last_decoded_sample' - set_file_offset(f, left.page_end); - if (!get_seek_page_info(f, &left)) goto error; - } - - right = f->p_last; - assert(right.last_decoded_sample != ~0U); - - // starting from the start is handled differently - if (sample_number <= left.last_decoded_sample) { - stb_vorbis_seek_start(f); - return 1; - } - - while (left.page_end != right.page_start) { - assert(left.page_end < right.page_start); - // search range in bytes - delta = right.page_start - left.page_end; - if (delta <= 65536) { - // there's only 64K left to search - handle it linearly - set_file_offset(f, left.page_end); - } else { - if (probe < 2) { - if (probe == 0) { - // first probe (interpolate) - double data_bytes = right.page_end - left.page_start; - bytes_per_sample = data_bytes / right.last_decoded_sample; - offset = left.page_start + bytes_per_sample * (sample_number - left.last_decoded_sample); - } else { - // second probe (try to bound the other side) - double error = ((double) sample_number - mid.last_decoded_sample) * bytes_per_sample; - if (error >= 0 && error < 8000) error = 8000; - if (error < 0 && error > -8000) error = -8000; - offset += error * 2; - } - - // ensure the offset is valid - if (offset < left.page_end) - offset = left.page_end; - if (offset > right.page_start - 65536) - offset = right.page_start - 65536; - - set_file_offset(f, (unsigned int) offset); - } else { - // binary search for large ranges (offset by 32K to ensure - // we don't hit the right page) - set_file_offset(f, left.page_end + (delta / 2) - 32768); - } - - if (!vorbis_find_page(f, NULL, NULL)) goto error; - } - - for (;;) { - if (!get_seek_page_info(f, &mid)) goto error; - if (mid.last_decoded_sample != ~0U) break; - // (untested) no frames end on this page - set_file_offset(f, mid.page_end); - assert(mid.page_start < right.page_start); - } - - // if we've just found the last page again then we're in a tricky file, - // and we're close enough. - if (mid.page_start == right.page_start) - break; - - if (sample_number < mid.last_decoded_sample) - right = mid; - else - left = mid; - - ++probe; - } - - // seek back to start of the last packet - page_start = left.page_start; - set_file_offset(f, page_start); - if (!start_page(f)) return error(f, VORBIS_seek_failed); - end_pos = f->end_seg_with_known_loc; - assert(end_pos >= 0); - - for (;;) { - for (i = end_pos; i > 0; --i) - if (f->segments[i-1] != 255) - break; - - start_seg_with_known_loc = i; - - if (start_seg_with_known_loc > 0 || !(f->page_flag & PAGEFLAG_continued_packet)) - break; - - // (untested) the final packet begins on an earlier page - if (!go_to_page_before(f, page_start)) - goto error; - - page_start = stb_vorbis_get_file_offset(f); - if (!start_page(f)) goto error; - end_pos = f->segment_count - 1; - } - - // prepare to start decoding - f->current_loc_valid = FALSE; - f->last_seg = FALSE; - f->valid_bits = 0; - f->packet_bytes = 0; - f->bytes_in_seg = 0; - f->previous_length = 0; - f->next_seg = start_seg_with_known_loc; - - for (i = 0; i < start_seg_with_known_loc; i++) - skip(f, f->segments[i]); - - // start decoding (optimizable - this frame is generally discarded) - vorbis_pump_first_frame(f); - return 1; - -error: - // try to restore the file to a valid state - stb_vorbis_seek_start(f); - return error(f, VORBIS_seek_failed); -} - -// the same as vorbis_decode_initial, but without advancing -static int peek_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode) -{ - int bits_read, bytes_read; - - if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) - return 0; - - // either 1 or 2 bytes were read, figure out which so we can rewind - bits_read = 1 + ilog(f->mode_count-1); - if (f->mode_config[*mode].blockflag) - bits_read += 2; - bytes_read = (bits_read + 7) / 8; - - f->bytes_in_seg += bytes_read; - f->packet_bytes -= bytes_read; - skip(f, -bytes_read); - if (f->next_seg == -1) - f->next_seg = f->segment_count - 1; - else - f->next_seg--; - f->valid_bits = 0; - - return 1; -} - -int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number) -{ - uint32 max_frame_samples; - - if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); - - // fast page-level search - if (!seek_to_sample_coarse(f, sample_number)) - return 0; - - assert(f->current_loc_valid); - assert(f->current_loc <= sample_number); - - // linear search for the relevant packet - max_frame_samples = (f->blocksize_1*3 - f->blocksize_0) >> 2; - while (f->current_loc < sample_number) { - int left_start, left_end, right_start, right_end, mode, frame_samples; - if (!peek_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode)) - return error(f, VORBIS_seek_failed); - // calculate the number of samples returned by the next frame - frame_samples = right_start - left_start; - if (f->current_loc + frame_samples > sample_number) { - return 1; // the next frame will contain the sample - } else if (f->current_loc + frame_samples + max_frame_samples > sample_number) { - // there's a chance the frame after this could contain the sample - vorbis_pump_first_frame(f); - } else { - // this frame is too early to be relevant - f->current_loc += frame_samples; - f->previous_length = 0; - maybe_start_packet(f); - flush_packet(f); - } - } - // the next frame will start with the sample - assert(f->current_loc == sample_number); - return 1; -} - -int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number) -{ - if (!stb_vorbis_seek_frame(f, sample_number)) - return 0; - - if (sample_number != f->current_loc) { - int n; - uint32 frame_start = f->current_loc; - stb_vorbis_get_frame_float(f, &n, NULL); - assert(sample_number > frame_start); - assert(f->channel_buffer_start + (int) (sample_number-frame_start) <= f->channel_buffer_end); - f->channel_buffer_start += (sample_number - frame_start); - } - - return 1; -} - -void stb_vorbis_seek_start(stb_vorbis *f) -{ - if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; } - set_file_offset(f, f->first_audio_page_offset); - f->previous_length = 0; - f->first_decode = TRUE; - f->next_seg = -1; - vorbis_pump_first_frame(f); -} - -unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f) -{ - unsigned int restore_offset, previous_safe; - unsigned int end, last_page_loc; - - if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); - if (!f->total_samples) { - unsigned int last; - uint32 lo,hi; - char header[6]; - - // first, store the current decode position so we can restore it - restore_offset = stb_vorbis_get_file_offset(f); - - // now we want to seek back 64K from the end (the last page must - // be at most a little less than 64K, but let's allow a little slop) - if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset) - previous_safe = f->stream_len - 65536; - else - previous_safe = f->first_audio_page_offset; - - set_file_offset(f, previous_safe); - // previous_safe is now our candidate 'earliest known place that seeking - // to will lead to the final page' - - if (!vorbis_find_page(f, &end, &last)) { - // if we can't find a page, we're hosed! - f->error = VORBIS_cant_find_last_page; - f->total_samples = 0xffffffff; - goto done; - } - - // check if there are more pages - last_page_loc = stb_vorbis_get_file_offset(f); - - // stop when the last_page flag is set, not when we reach eof; - // this allows us to stop short of a 'file_section' end without - // explicitly checking the length of the section - while (!last) { - set_file_offset(f, end); - if (!vorbis_find_page(f, &end, &last)) { - // the last page we found didn't have the 'last page' flag - // set. whoops! - break; - } - previous_safe = last_page_loc+1; - last_page_loc = stb_vorbis_get_file_offset(f); - } - - set_file_offset(f, last_page_loc); - - // parse the header - getn(f, (unsigned char *)header, 6); - // extract the absolute granule position - lo = get32(f); - hi = get32(f); - if (lo == 0xffffffff && hi == 0xffffffff) { - f->error = VORBIS_cant_find_last_page; - f->total_samples = SAMPLE_unknown; - goto done; - } - if (hi) - lo = 0xfffffffe; // saturate - f->total_samples = lo; - - f->p_last.page_start = last_page_loc; - f->p_last.page_end = end; - f->p_last.last_decoded_sample = lo; - - done: - set_file_offset(f, restore_offset); - } - return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples; -} - -float stb_vorbis_stream_length_in_seconds(stb_vorbis *f) -{ - return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate; -} - - - -int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output) -{ - int len, right,left,i; - if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); - - if (!vorbis_decode_packet(f, &len, &left, &right)) { - f->channel_buffer_start = f->channel_buffer_end = 0; - return 0; - } - - len = vorbis_finish_frame(f, len, left, right); - for (i=0; i < f->channels; ++i) - f->outputs[i] = f->channel_buffers[i] + left; - - f->channel_buffer_start = left; - f->channel_buffer_end = left+len; - - if (channels) *channels = f->channels; - if (output) *output = f->outputs; - return len; -} - -#ifndef STB_VORBIS_NO_STDIO - -stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc, unsigned int length) -{ - stb_vorbis *f, p; - vorbis_init(&p, alloc); - p.f = file; - p.f_start = (uint32) ftell(file); - p.stream_len = length; - p.close_on_free = close_on_free; - if (start_decoder(&p)) { - f = vorbis_alloc(&p); - if (f) { - *f = p; - vorbis_pump_first_frame(f); - return f; - } - } - if (error) *error = p.error; - vorbis_deinit(&p); - return NULL; -} - -stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, const stb_vorbis_alloc *alloc) -{ - unsigned int len, start; - start = (unsigned int) ftell(file); - fseek(file, 0, SEEK_END); - len = (unsigned int) (ftell(file) - start); - fseek(file, start, SEEK_SET); - return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len); -} - -stb_vorbis * stb_vorbis_open_filename(const char *filename, int *error, const stb_vorbis_alloc *alloc) -{ - FILE *f = fopen(filename, "rb"); - if (f) - return stb_vorbis_open_file(f, TRUE, error, alloc); - if (error) *error = VORBIS_file_open_failure; - return NULL; -} -#endif // STB_VORBIS_NO_STDIO - -stb_vorbis * stb_vorbis_open_memory(const unsigned char *data, int len, int *error, const stb_vorbis_alloc *alloc) -{ - stb_vorbis *f, p; - if (data == NULL) return NULL; - vorbis_init(&p, alloc); - p.stream = (uint8 *) data; - p.stream_end = (uint8 *) data + len; - p.stream_start = (uint8 *) p.stream; - p.stream_len = len; - p.push_mode = FALSE; - if (start_decoder(&p)) { - f = vorbis_alloc(&p); - if (f) { - *f = p; - vorbis_pump_first_frame(f); - return f; - } - } - if (error) *error = p.error; - vorbis_deinit(&p); - return NULL; -} - -#ifndef STB_VORBIS_NO_INTEGER_CONVERSION -#define PLAYBACK_MONO 1 -#define PLAYBACK_LEFT 2 -#define PLAYBACK_RIGHT 4 - -#define L (PLAYBACK_LEFT | PLAYBACK_MONO) -#define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO) -#define R (PLAYBACK_RIGHT | PLAYBACK_MONO) - -static int8 channel_position[7][6] = -{ - { 0 }, - { C }, - { L, R }, - { L, C, R }, - { L, R, L, R }, - { L, C, R, L, R }, - { L, C, R, L, R, C }, -}; - - -#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT - typedef union { - float f; - int i; - } float_conv; - typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4]; - #define FASTDEF(x) float_conv x - // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round - #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT)) - #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22)) - #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s)) - #define check_endianness() -#else - #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s)))) - #define check_endianness() - #define FASTDEF(x) -#endif - -static void copy_samples(short *dest, float *src, int len) -{ - int i; - check_endianness(); - for (i=0; i < len; ++i) { - FASTDEF(temp); - int v = FAST_SCALED_FLOAT_TO_INT(temp, src[i],15); - if ((unsigned int) (v + 32768) > 65535) - v = v < 0 ? -32768 : 32767; - dest[i] = v; - } -} - -static void compute_samples(int mask, short *output, int num_c, float **data, int d_offset, int len) -{ - #define BUFFER_SIZE 32 - float buffer[BUFFER_SIZE]; - int i,j,o,n = BUFFER_SIZE; - check_endianness(); - for (o = 0; o < len; o += BUFFER_SIZE) { - memset(buffer, 0, sizeof(buffer)); - if (o + n > len) n = len - o; - for (j=0; j < num_c; ++j) { - if (channel_position[num_c][j] & mask) { - for (i=0; i < n; ++i) - buffer[i] += data[j][d_offset+o+i]; - } - } - for (i=0; i < n; ++i) { - FASTDEF(temp); - int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15); - if ((unsigned int) (v + 32768) > 65535) - v = v < 0 ? -32768 : 32767; - output[o+i] = v; - } - } -} - -static void compute_stereo_samples(short *output, int num_c, float **data, int d_offset, int len) -{ - #define BUFFER_SIZE 32 - float buffer[BUFFER_SIZE]; - int i,j,o,n = BUFFER_SIZE >> 1; - // o is the offset in the source data - check_endianness(); - for (o = 0; o < len; o += BUFFER_SIZE >> 1) { - // o2 is the offset in the output data - int o2 = o << 1; - memset(buffer, 0, sizeof(buffer)); - if (o + n > len) n = len - o; - for (j=0; j < num_c; ++j) { - int m = channel_position[num_c][j] & (PLAYBACK_LEFT | PLAYBACK_RIGHT); - if (m == (PLAYBACK_LEFT | PLAYBACK_RIGHT)) { - for (i=0; i < n; ++i) { - buffer[i*2+0] += data[j][d_offset+o+i]; - buffer[i*2+1] += data[j][d_offset+o+i]; - } - } else if (m == PLAYBACK_LEFT) { - for (i=0; i < n; ++i) { - buffer[i*2+0] += data[j][d_offset+o+i]; - } - } else if (m == PLAYBACK_RIGHT) { - for (i=0; i < n; ++i) { - buffer[i*2+1] += data[j][d_offset+o+i]; - } - } - } - for (i=0; i < (n<<1); ++i) { - FASTDEF(temp); - int v = FAST_SCALED_FLOAT_TO_INT(temp,buffer[i],15); - if ((unsigned int) (v + 32768) > 65535) - v = v < 0 ? -32768 : 32767; - output[o2+i] = v; - } - } -} - -static void convert_samples_short(int buf_c, short **buffer, int b_offset, int data_c, float **data, int d_offset, int samples) -{ - int i; - if (buf_c != data_c && buf_c <= 2 && data_c <= 6) { - static int channel_selector[3][2] = { {0}, {PLAYBACK_MONO}, {PLAYBACK_LEFT, PLAYBACK_RIGHT} }; - for (i=0; i < buf_c; ++i) - compute_samples(channel_selector[buf_c][i], buffer[i]+b_offset, data_c, data, d_offset, samples); - } else { - int limit = buf_c < data_c ? buf_c : data_c; - for (i=0; i < limit; ++i) - copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples); - for ( ; i < buf_c; ++i) - memset(buffer[i]+b_offset, 0, sizeof(short) * samples); - } -} - -int stb_vorbis_get_frame_short(stb_vorbis *f, int num_c, short **buffer, int num_samples) -{ - float **output; - int len = stb_vorbis_get_frame_float(f, NULL, &output); - if (len > num_samples) len = num_samples; - if (len) - convert_samples_short(num_c, buffer, 0, f->channels, output, 0, len); - return len; -} - -static void convert_channels_short_interleaved(int buf_c, short *buffer, int data_c, float **data, int d_offset, int len) -{ - int i; - check_endianness(); - if (buf_c != data_c && buf_c <= 2 && data_c <= 6) { - assert(buf_c == 2); - for (i=0; i < buf_c; ++i) - compute_stereo_samples(buffer, data_c, data, d_offset, len); - } else { - int limit = buf_c < data_c ? buf_c : data_c; - int j; - for (j=0; j < len; ++j) { - for (i=0; i < limit; ++i) { - FASTDEF(temp); - float f = data[i][d_offset+j]; - int v = FAST_SCALED_FLOAT_TO_INT(temp, f,15);//data[i][d_offset+j],15); - if ((unsigned int) (v + 32768) > 65535) - v = v < 0 ? -32768 : 32767; - *buffer++ = v; - } - for ( ; i < buf_c; ++i) - *buffer++ = 0; - } - } -} - -int stb_vorbis_get_frame_short_interleaved(stb_vorbis *f, int num_c, short *buffer, int num_shorts) -{ - float **output; - int len; - if (num_c == 1) return stb_vorbis_get_frame_short(f,num_c,&buffer, num_shorts); - len = stb_vorbis_get_frame_float(f, NULL, &output); - if (len) { - if (len*num_c > num_shorts) len = num_shorts / num_c; - convert_channels_short_interleaved(num_c, buffer, f->channels, output, 0, len); - } - return len; -} - -int stb_vorbis_get_samples_short_interleaved(stb_vorbis *f, int channels, short *buffer, int num_shorts) -{ - float **outputs; - int len = num_shorts / channels; - int n=0; - int z = f->channels; - if (z > channels) z = channels; - while (n < len) { - int k = f->channel_buffer_end - f->channel_buffer_start; - if (n+k >= len) k = len - n; - if (k) - convert_channels_short_interleaved(channels, buffer, f->channels, f->channel_buffers, f->channel_buffer_start, k); - buffer += k*channels; - n += k; - f->channel_buffer_start += k; - if (n == len) break; - if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break; - } - return n; -} - -int stb_vorbis_get_samples_short(stb_vorbis *f, int channels, short **buffer, int len) -{ - float **outputs; - int n=0; - int z = f->channels; - if (z > channels) z = channels; - while (n < len) { - int k = f->channel_buffer_end - f->channel_buffer_start; - if (n+k >= len) k = len - n; - if (k) - convert_samples_short(channels, buffer, n, f->channels, f->channel_buffers, f->channel_buffer_start, k); - n += k; - f->channel_buffer_start += k; - if (n == len) break; - if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) break; - } - return n; -} - -#ifndef STB_VORBIS_NO_STDIO -int stb_vorbis_decode_filename(const char *filename, int *channels, int *sample_rate, short **output) -{ - int data_len, offset, total, limit, error; - short *data; - stb_vorbis *v = stb_vorbis_open_filename(filename, &error, NULL); - if (v == NULL) return -1; - limit = v->channels * 4096; - *channels = v->channels; - if (sample_rate) - *sample_rate = v->sample_rate; - offset = data_len = 0; - total = limit; - data = (short *) malloc(total * sizeof(*data)); - if (data == NULL) { - stb_vorbis_close(v); - return -2; - } - for (;;) { - int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset); - if (n == 0) break; - data_len += n; - offset += n * v->channels; - if (offset + limit > total) { - short *data2; - total *= 2; - data2 = (short *) realloc(data, total * sizeof(*data)); - if (data2 == NULL) { - free(data); - stb_vorbis_close(v); - return -2; - } - data = data2; - } - } - *output = data; - stb_vorbis_close(v); - return data_len; -} -#endif // NO_STDIO - -int stb_vorbis_decode_memory(const uint8 *mem, int len, int *channels, int *sample_rate, short **output) -{ - int data_len, offset, total, limit, error; - short *data; - stb_vorbis *v = stb_vorbis_open_memory(mem, len, &error, NULL); - if (v == NULL) return -1; - limit = v->channels * 4096; - *channels = v->channels; - if (sample_rate) - *sample_rate = v->sample_rate; - offset = data_len = 0; - total = limit; - data = (short *) malloc(total * sizeof(*data)); - if (data == NULL) { - stb_vorbis_close(v); - return -2; - } - for (;;) { - int n = stb_vorbis_get_frame_short_interleaved(v, v->channels, data+offset, total-offset); - if (n == 0) break; - data_len += n; - offset += n * v->channels; - if (offset + limit > total) { - short *data2; - total *= 2; - data2 = (short *) realloc(data, total * sizeof(*data)); - if (data2 == NULL) { - free(data); - stb_vorbis_close(v); - return -2; - } - data = data2; - } - } - *output = data; - stb_vorbis_close(v); - return data_len; -} -#endif // STB_VORBIS_NO_INTEGER_CONVERSION - -int stb_vorbis_get_samples_float_interleaved(stb_vorbis *f, int channels, float *buffer, int num_floats) -{ - float **outputs; - int len = num_floats / channels; - int n=0; - int z = f->channels; - if (z > channels) z = channels; - while (n < len) { - int i,j; - int k = f->channel_buffer_end - f->channel_buffer_start; - if (n+k >= len) k = len - n; - for (j=0; j < k; ++j) { - for (i=0; i < z; ++i) - *buffer++ = f->channel_buffers[i][f->channel_buffer_start+j]; - for ( ; i < channels; ++i) - *buffer++ = 0; - } - n += k; - f->channel_buffer_start += k; - if (n == len) - break; - if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) - break; - } - return n; -} - -int stb_vorbis_get_samples_float(stb_vorbis *f, int channels, float **buffer, int num_samples) -{ - float **outputs; - int n=0; - int z = f->channels; - if (z > channels) z = channels; - while (n < num_samples) { - int i; - int k = f->channel_buffer_end - f->channel_buffer_start; - if (n+k >= num_samples) k = num_samples - n; - if (k) { - for (i=0; i < z; ++i) - memcpy(buffer[i]+n, f->channel_buffers[i]+f->channel_buffer_start, sizeof(float)*k); - for ( ; i < channels; ++i) - memset(buffer[i]+n, 0, sizeof(float) * k); - } - n += k; - f->channel_buffer_start += k; - if (n == num_samples) - break; - if (!stb_vorbis_get_frame_float(f, NULL, &outputs)) - break; - } - return n; -} -#endif // STB_VORBIS_NO_PULLDATA_API - -/* Version history - 1.09 - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version - 1.08 - 2016/04/02 - fixed multiple warnings; fix setup memory leaks; - avoid discarding last frame of audio data - 1.07 - 2015/01/16 - fixed some warnings, fix mingw, const-correct API - some more crash fixes when out of memory or with corrupt files - 1.06 - 2015/08/31 - full, correct support for seeking API (Dougall Johnson) - some crash fixes when out of memory or with corrupt files - 1.05 - 2015/04/19 - don't define __forceinline if it's redundant - 1.04 - 2014/08/27 - fix missing const-correct case in API - 1.03 - 2014/08/07 - Warning fixes - 1.02 - 2014/07/09 - Declare qsort compare function _cdecl on windows - 1.01 - 2014/06/18 - fix stb_vorbis_get_samples_float - 1.0 - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel - (API change) report sample rate for decode-full-file funcs - 0.99996 - bracket #include for macintosh compilation by Laurent Gomila - 0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem - 0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence - 0.99993 - remove assert that fired on legal files with empty tables - 0.99992 - rewind-to-start - 0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo - 0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++ - 0.9998 - add a full-decode function with a memory source - 0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition - 0.9996 - query length of vorbis stream in samples/seconds - 0.9995 - bugfix to another optimization that only happened in certain files - 0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors - 0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation - 0.9992 - performance improvement of IMDCT; now performs close to reference implementation - 0.9991 - performance improvement of IMDCT - 0.999 - (should have been 0.9990) performance improvement of IMDCT - 0.998 - no-CRT support from Casey Muratori - 0.997 - bugfixes for bugs found by Terje Mathisen - 0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen - 0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen - 0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen - 0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen - 0.992 - fixes for MinGW warning - 0.991 - turn fast-float-conversion on by default - 0.990 - fix push-mode seek recovery if you seek into the headers - 0.98b - fix to bad release of 0.98 - 0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode - 0.97 - builds under c++ (typecasting, don't use 'class' keyword) - 0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code - 0.95 - clamping code for 16-bit functions - 0.94 - not publically released - 0.93 - fixed all-zero-floor case (was decoding garbage) - 0.92 - fixed a memory leak - 0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION - 0.90 - first public release -*/ - -#endif // STB_VORBIS_HEADER_ONLY -- cgit v1.2.3