summaryrefslogtreecommitdiff
path: root/thirdparty/zstd/common/zstd_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/zstd/common/zstd_internal.h')
-rw-r--r--thirdparty/zstd/common/zstd_internal.h95
1 files changed, 37 insertions, 58 deletions
diff --git a/thirdparty/zstd/common/zstd_internal.h b/thirdparty/zstd/common/zstd_internal.h
index 81b16eac2e..dcdcbdb81c 100644
--- a/thirdparty/zstd/common/zstd_internal.h
+++ b/thirdparty/zstd/common/zstd_internal.h
@@ -56,9 +56,9 @@ extern "C" {
/**
* Return the specified error if the condition evaluates to true.
*
- * In debug modes, prints additional information. In order to do that
- * (particularly, printing the conditional that failed), this can't just wrap
- * RETURN_ERROR().
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
*/
#define RETURN_ERROR_IF(cond, err, ...) \
if (cond) { \
@@ -197,79 +197,56 @@ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
-#define WILDCOPY_OVERLENGTH 8
-#define VECLEN 16
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
typedef enum {
ZSTD_no_overlap,
- ZSTD_overlap_src_before_dst,
+ ZSTD_overlap_src_before_dst
/* ZSTD_overlap_dst_before_src, */
} ZSTD_overlap_e;
/*! ZSTD_wildcopy() :
- * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
+ * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ * @param ovtype controls the overlap detection
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ * The src buffer must be before the dst buffer.
+ */
MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
-void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
{
ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
const BYTE* ip = (const BYTE*)src;
BYTE* op = (BYTE*)dst;
BYTE* const oend = op + length;
- assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
- if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
- do
- COPY8(op, ip)
- while (op < oend);
- }
- else {
- if ((length & 8) == 0)
- COPY8(op, ip);
- do {
+ assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+ if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+ /* Handle short offset copies. */
+ do {
+ COPY8(op, ip)
+ } while (op < oend);
+ } else {
+ assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+ /* Separate out the first two COPY16() calls because the copy length is
+ * almost certain to be short, so the branches have different
+ * probabilities.
+ * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%.
+ * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%.
+ */
COPY16(op, ip);
- }
- while (op < oend);
- }
-}
-
-/*! ZSTD_wildcopy_16min() :
- * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
-MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
-void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
-{
- ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
- const BYTE* ip = (const BYTE*)src;
- BYTE* op = (BYTE*)dst;
- BYTE* const oend = op + length;
-
- assert(length >= 8);
- assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
-
- if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
- do
- COPY8(op, ip)
- while (op < oend);
- }
- else {
- if ((length & 8) == 0)
- COPY8(op, ip);
- do {
COPY16(op, ip);
- }
- while (op < oend);
+ if (op >= oend) return;
+ do {
+ COPY16(op, ip);
+ COPY16(op, ip);
+ }
+ while (op < oend);
}
}
-MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
-{
- const BYTE* ip = (const BYTE*)src;
- BYTE* op = (BYTE*)dst;
- BYTE* const oend = (BYTE*)dstEnd;
- do
- COPY8(op, ip)
- while (op < oend);
-}
-
/*-*******************************************
* Private declarations
@@ -323,7 +300,9 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus
_BitScanReverse(&r, val);
return (unsigned)r;
# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
- return 31 - __builtin_clz(val);
+ return __builtin_clz (val) ^ 31;
+# elif defined(__ICCARM__) /* IAR Intrinsic */
+ return 31 - __CLZ(val);
# else /* Software version */
static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;