diff options
Diffstat (limited to 'thirdparty/libtheora/mathops.c')
-rw-r--r-- | thirdparty/libtheora/mathops.c | 68 |
1 files changed, 43 insertions, 25 deletions
diff --git a/thirdparty/libtheora/mathops.c b/thirdparty/libtheora/mathops.c index d3fb909194..23c8f6e1ba 100644 --- a/thirdparty/libtheora/mathops.c +++ b/thirdparty/libtheora/mathops.c @@ -1,10 +1,8 @@ +#include "internal.h" #include "mathops.h" -#include <limits.h> /*The fastest fallback strategy for platforms with fast multiplication appears to be based on de Bruijn sequences~\cite{LP98}. - Tests confirmed this to be true even on an ARM11, where it is actually faster - than using the native clz instruction. Define OC_ILOG_NODEBRUIJN to use a simpler fallback on platforms where multiplication or table lookups are too expensive. @@ -15,8 +13,7 @@ year=1998, note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}" }*/ -#if !defined(OC_ILOG_NODEBRUIJN)&& \ - !defined(OC_CLZ32)||!defined(OC_CLZ64)&&LONG_MAX<9223372036854775807LL +#if !defined(OC_ILOG_NODEBRUIJN)&&!defined(OC_CLZ32) static const unsigned char OC_DEBRUIJN_IDX32[32]={ 0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8, 31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9 @@ -25,7 +22,7 @@ static const unsigned char OC_DEBRUIJN_IDX32[32]={ int oc_ilog32(ogg_uint32_t _v){ #if defined(OC_CLZ32) - return (OC_CLZ32_OFFS-OC_CLZ32(_v))&-!!_v; + return OC_CLZ32_OFFS-OC_CLZ32(_v)&-!!_v; #else /*On a Pentium M, this branchless version tested as the fastest version without multiplications on 1,000,000,000 random 32-bit integers, edging out a @@ -51,12 +48,12 @@ int oc_ilog32(ogg_uint32_t _v){ /*This de Bruijn sequence version is faster if you have a fast multiplier.*/ # else int ret; - ret=_v>0; _v|=_v>>1; _v|=_v>>2; _v|=_v>>4; _v|=_v>>8; _v|=_v>>16; + ret=_v&1; _v=(_v>>1)+1; ret+=OC_DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F]; return ret; @@ -66,16 +63,21 @@ int oc_ilog32(ogg_uint32_t _v){ int oc_ilog64(ogg_int64_t _v){ #if defined(OC_CLZ64) - return (OC_CLZ64_OFFS-OC_CLZ64(_v))&-!!_v; + return OC_CLZ64_OFFS-OC_CLZ64(_v)&-!!_v; #else -# if defined(OC_ILOG_NODEBRUIJN) +/*If we don't have a fast 64-bit word implementation, split it into two 32-bit + halves.*/ +# if defined(OC_ILOG_NODEBRUIJN)|| \ + defined(OC_CLZ32)||LONG_MAX<9223372036854775807LL ogg_uint32_t v; int ret; int m; - ret=_v>0; m=(_v>0xFFFFFFFFU)<<5; v=(ogg_uint32_t)(_v>>m); - ret|=m; +# if defined(OC_CLZ32) + ret=m+OC_CLZ32_OFFS-OC_CLZ32(v)&-!!v; +# elif defined(OC_ILOG_NODEBRUIJN) + ret=v>0|m; m=(v>0xFFFFU)<<4; v>>=m; ret|=m; @@ -90,26 +92,19 @@ int oc_ilog64(ogg_int64_t _v){ ret|=m; ret+=v>1; return ret; -# else -/*If we don't have a 64-bit word, split it into two 32-bit halves.*/ -# if LONG_MAX<9223372036854775807LL - ogg_uint32_t v; - int ret; - int m; - ret=_v>0; - m=(_v>0xFFFFFFFFU)<<5; - v=(ogg_uint32_t)(_v>>m); - ret|=m; +# else v|=v>>1; v|=v>>2; v|=v>>4; v|=v>>8; v|=v>>16; + ret=v&1|m; v=(v>>1)+1; ret+=OC_DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F]; +# endif return ret; -/*Otherwise do it in one 64-bit operation.*/ -# else +/*Otherwise do it in one 64-bit multiply.*/ +# else static const unsigned char OC_DEBRUIJN_IDX64[64]={ 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40, 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57, @@ -117,17 +112,16 @@ int oc_ilog64(ogg_int64_t _v){ 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58 }; int ret; - ret=_v>0; _v|=_v>>1; _v|=_v>>2; _v|=_v>>4; _v|=_v>>8; _v|=_v>>16; _v|=_v>>32; + ret=(int)_v&1; _v=(_v>>1)+1; ret+=OC_DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F]; return ret; -# endif # endif #endif } @@ -294,3 +288,27 @@ ogg_int64_t oc_blog64(ogg_int64_t _w){ } return OC_Q57(ipart)+z; } + +/*Polynomial approximation of a binary exponential. + Q10 input, Q0 output.*/ +ogg_uint32_t oc_bexp32_q10(int _z){ + unsigned n; + int ipart; + ipart=_z>>10; + n=(_z&(1<<10)-1)<<4; + n=(n*((n*((n*((n*3548>>15)+6817)>>15)+15823)>>15)+22708)>>15)+16384; + return 14-ipart>0?n+(1<<13-ipart)>>14-ipart:n<<ipart-14; +} + +/*Polynomial approximation of a binary logarithm. + Q0 input, Q10 output.*/ +int oc_blog32_q10(ogg_uint32_t _w){ + int n; + int ipart; + int fpart; + if(_w<=0)return -1; + ipart=OC_ILOGNZ_32(_w); + n=(ipart-16>0?_w>>ipart-16:_w<<16-ipart)-32768-16384; + fpart=(n*((n*((n*((n*-1402>>15)+2546)>>15)-5216)>>15)+15745)>>15)-6793; + return (ipart<<10)+(fpart>>4); +} |