diff options
Diffstat (limited to 'thirdparty/zstd/common/bitstream.h')
-rw-r--r-- | thirdparty/zstd/common/bitstream.h | 33 |
1 files changed, 24 insertions, 9 deletions
diff --git a/thirdparty/zstd/common/bitstream.h b/thirdparty/zstd/common/bitstream.h index 2e5a933ad3..84b6062ff3 100644 --- a/thirdparty/zstd/common/bitstream.h +++ b/thirdparty/zstd/common/bitstream.h @@ -143,10 +143,16 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) { # if defined(_MSC_VER) /* Visual */ # if STATIC_BMI2 == 1 - return _lzcnt_u32(val) ^ 31; + return _lzcnt_u32(val) ^ 31; # else - unsigned long r = 0; - return _BitScanReverse(&r, val) ? (unsigned)r : 0; + if (val != 0) { + unsigned long r; + _BitScanReverse(&r, val); + return (unsigned)r; + } else { + /* Should not reach this code path */ + __assume(0); + } # endif # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ return __builtin_clz (val) ^ 31; @@ -293,22 +299,22 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si switch(srcSize) { case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); - /* fall-through */ + ZSTD_FALLTHROUGH; case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); - /* fall-through */ + ZSTD_FALLTHROUGH; case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); - /* fall-through */ + ZSTD_FALLTHROUGH; case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; - /* fall-through */ + ZSTD_FALLTHROUGH; case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; - /* fall-through */ + ZSTD_FALLTHROUGH; case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; - /* fall-through */ + ZSTD_FALLTHROUGH; default: break; } @@ -332,7 +338,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c U32 const regMask = sizeof(bitContainer)*8 - 1; /* if start > regMask, bitstream is corrupted, and result is undefined */ assert(nbBits < BIT_MASK_SIZE); + /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better + * than accessing memory. When bmi2 instruction is not present, we consider + * such cpus old (pre-Haswell, 2013) and their performance is not of that + * importance. + */ +#if defined(__x86_64__) || defined(_M_X86) + return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1); +#else return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +#endif } MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) |