[Complex Text Layouts] Add third-party TextServer dependencies (ICU, HarfBuzz, Graphite).

author: bruvzg <7645683+bruvzg@users.noreply.github.com> 2020-08-11 12:10:23 +0300
committer: bruvzg <7645683+bruvzg@users.noreply.github.com> 2020-11-26 13:55:27 +0200
commit: b9f441e81e89728f284f61991e14748208817efd (patch)
tree: a3729bf0b71f11b324d2dff8838e914780917701 /thirdparty/icu4c/common/ucnvbocu.cpp
parent: 493da99269b3a111cf58e0352495e25ee49c5f84 (diff)
1 files changed, 1413 insertions, 0 deletions
diff --git a/thirdparty/icu4c/common/ucnvbocu.cpp b/thirdparty/icu4c/common/ucnvbocu.cpp
new file mode 100644
index 0000000000..7c2aab5655
--- /dev/null
+++ b/thirdparty/icu4c/common/ucnvbocu.cpp
@@ -0,0 +1,1413 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 2002-2016, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  ucnvbocu.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002mar27
+*   created by: Markus W. Scherer
+*
+*   This is an implementation of the Binary Ordered Compression for Unicode,
+*   in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
+#include "putilimp.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "uassert.h"
+
+/* BOCU-1 constants and macros ---------------------------------------------- */
+
+/*
+ * BOCU-1 encodes the code points of a Unicode string as
+ * a sequence of byte-encoded differences (slope detection),
+ * preserving lexical order.
+ *
+ * Optimize the difference-taking for runs of Unicode text within
+ * small scripts:
+ *
+ * Most small scripts are allocated within aligned 128-blocks of Unicode
+ * code points. Lexical order is preserved if the "previous code point" state
+ * is always moved into the middle of such a block.
+ *
+ * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
+ * areas into the middle of those areas.
+ *
+ * C0 control codes and space are encoded with their US-ASCII bytes.
+ * "prev" is reset for C0 controls but not for space.
+ */
+
+/* initial value for "prev": middle of the ASCII range */
+#define BOCU1_ASCII_PREV        0x40
+
+/* bounding byte values for differences */
+#define BOCU1_MIN               0x21
+#define BOCU1_MIDDLE            0x90
+#define BOCU1_MAX_LEAD          0xfe
+#define BOCU1_MAX_TRAIL         0xff
+#define BOCU1_RESET             0xff
+
+/* number of lead bytes */
+#define BOCU1_COUNT             (BOCU1_MAX_LEAD-BOCU1_MIN+1)
+
+/* adjust trail byte counts for the use of some C0 control byte values */
+#define BOCU1_TRAIL_CONTROLS_COUNT  20
+#define BOCU1_TRAIL_BYTE_OFFSET     (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
+
+/* number of trail bytes */
+#define BOCU1_TRAIL_COUNT       ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
+
+/*
+ * number of positive and negative single-byte codes
+ * (counting 0==BOCU1_MIDDLE among the positive ones)
+ */
+#define BOCU1_SINGLE            64
+
+/* number of lead bytes for positive and negative 2/3/4-byte sequences */
+#define BOCU1_LEAD_2            43
+#define BOCU1_LEAD_3            3
+#define BOCU1_LEAD_4            1
+
+/* The difference value range for single-byters. */
+#define BOCU1_REACH_POS_1   (BOCU1_SINGLE-1)
+#define BOCU1_REACH_NEG_1   (-BOCU1_SINGLE)
+
+/* The difference value range for double-byters. */
+#define BOCU1_REACH_POS_2   (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
+#define BOCU1_REACH_NEG_2   (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
+
+/* The difference value range for 3-byters. */
+#define BOCU1_REACH_POS_3   \
+    (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
+
+#define BOCU1_REACH_NEG_3   (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
+
+/* The lead byte start values. */
+#define BOCU1_START_POS_2   (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
+#define BOCU1_START_POS_3   (BOCU1_START_POS_2+BOCU1_LEAD_2)
+#define BOCU1_START_POS_4   (BOCU1_START_POS_3+BOCU1_LEAD_3)
+     /* ==BOCU1_MAX_LEAD */
+
+#define BOCU1_START_NEG_2   (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
+#define BOCU1_START_NEG_3   (BOCU1_START_NEG_2-BOCU1_LEAD_2)
+#define BOCU1_START_NEG_4   (BOCU1_START_NEG_3-BOCU1_LEAD_3)
+     /* ==BOCU1_MIN+1 */
+
+/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
+#define BOCU1_LENGTH_FROM_LEAD(lead) \
+    ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
+     (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
+     (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
+
+/* The length of a byte sequence, according to its packed form. */
+#define BOCU1_LENGTH_FROM_PACKED(packed) \
+    ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
+
+/*
+ * 12 commonly used C0 control codes (and space) are only used to encode
+ * themselves directly,
+ * which makes BOCU-1 MIME-usable and reasonably safe for
+ * ASCII-oriented software.
+ *
+ * These controls are
+ *  0   NUL
+ *
+ *  7   BEL
+ *  8   BS
+ *
+ *  9   TAB
+ *  a   LF
+ *  b   VT
+ *  c   FF
+ *  d   CR
+ *
+ *  e   SO
+ *  f   SI
+ *
+ * 1a   SUB
+ * 1b   ESC
+ *
+ * The other 20 C0 controls are also encoded directly (to preserve order)
+ * but are also used as trail bytes in difference encoding
+ * (for better compression).
+ */
+#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
+
+/*
+ * Byte value map for control codes,
+ * from external byte values 0x00..0x20
+ * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
+ * External byte values that are illegal as trail bytes are mapped to -1.
+ */
+static const int8_t
+bocu1ByteToTrail[BOCU1_MIN]={
+/*  0     1     2     3     4     5     6     7    */
+    -1,   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
+
+/*  8     9     a     b     c     d     e     f    */
+    -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
+
+/*  10    11    12    13    14    15    16    17   */
+    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
+
+/*  18    19    1a    1b    1c    1d    1e    1f   */
+    0x0e, 0x0f, -1,   -1,   0x10, 0x11, 0x12, 0x13,
+
+/*  20   */
+    -1
+};
+
+/*
+ * Byte value map for control codes,
+ * from trail byte values 0..19 (0..0x13) as used in the difference calculation
+ * to external byte values 0x00..0x20.
+ */
+static const int8_t
+bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
+/*  0     1     2     3     4     5     6     7    */
+    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
+
+/*  8     9     a     b     c     d     e     f    */
+    0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+/*  10    11    12    13   */
+    0x1c, 0x1d, 0x1e, 0x1f
+};
+
+/**
+ * Integer division and modulo with negative numerators
+ * yields negative modulo results and quotients that are one more than
+ * what we need here.
+ * This macro adjust the results so that the modulo-value m is always >=0.
+ *
+ * For positive n, the if() condition is always FALSE.
+ *
+ * @param n Number to be split into quotient and rest.
+ *          Will be modified to contain the quotient.
+ * @param d Divisor.
+ * @param m Output variable for the rest (modulo result).
+ */
+#define NEGDIVMOD(n, d, m) UPRV_BLOCK_MACRO_BEGIN { \
+    (m)=(n)%(d); \
+    (n)/=(d); \
+    if((m)<0) { \
+        --(n); \
+        (m)+=(d); \
+    } \
+} UPRV_BLOCK_MACRO_END
+
+/* Faster versions of packDiff() for single-byte-encoded diff values. */
+
+/** Is a diff value encodable in a single byte? */
+#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1)
+
+/** Encode a diff value in a single byte. */
+#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff))
+
+/** Is a diff value encodable in two bytes? */
+#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2)
+
+/* BOCU-1 implementation functions ------------------------------------------ */
+
+#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV)
+
+/**
+ * Compute the next "previous" value for differencing
+ * from the current code point.
+ *
+ * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
+ * @return "previous code point" state value
+ */
+static inline int32_t
+bocu1Prev(int32_t c) {
+    /* compute new prev */
+    if(/* 0x3040<=c && */ c<=0x309f) {
+        /* Hiragana is not 128-aligned */
+        return 0x3070;
+    } else if(0x4e00<=c && c<=0x9fa5) {
+        /* CJK Unihan */
+        return 0x4e00-BOCU1_REACH_NEG_2;
+    } else if(0xac00<=c /* && c<=0xd7a3 */) {
+        /* Korean Hangul */
+        return (0xd7a3+0xac00)/2;
+    } else {
+        /* mostly small scripts */
+        return BOCU1_SIMPLE_PREV(c);
+    }
+}
+
+/** Fast version of bocu1Prev() for most scripts. */
+#define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c))
+
+/*
+ * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
+ * The UConverter fields are used as follows:
+ *
+ * fromUnicodeStatus    encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
+ *
+ * toUnicodeStatus      decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
+ * mode                 decoder's incomplete (diff<<2)|count (ignored when toULength==0)
+ */
+
+/* BOCU-1-from-Unicode conversion functions --------------------------------- */
+
+/**
+ * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
+ * and return a packed integer with them.
+ *
+ * The encoding favors small absolute differences with short encodings
+ * to compress runs of same-script characters.
+ *
+ * Optimized version with unrolled loops and fewer floating-point operations
+ * than the standard packDiff().
+ *
+ * @param diff difference value -0x10ffff..0x10ffff
+ * @return
+ *      0x010000zz for 1-byte sequence zz
+ *      0x0200yyzz for 2-byte sequence yy zz
+ *      0x03xxyyzz for 3-byte sequence xx yy zz
+ *      0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
+ */
+static int32_t
+packDiff(int32_t diff) {
+    int32_t result, m;
+
+    U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
+    if(diff>=BOCU1_REACH_NEG_1) {
+        /* mostly positive differences, and single-byte negative ones */
+#if 0   /* single-byte case handled in macros, see below */
+        if(diff<=BOCU1_REACH_POS_1) {
+            /* single byte */
+            return 0x01000000|(BOCU1_MIDDLE+diff);
+        } else
+#endif
+        if(diff<=BOCU1_REACH_POS_2) {
+            /* two bytes */
+            diff-=BOCU1_REACH_POS_1+1;
+            result=0x02000000;
+
+            m=diff%BOCU1_TRAIL_COUNT;
+            diff/=BOCU1_TRAIL_COUNT;
+            result|=BOCU1_TRAIL_TO_BYTE(m);
+
+            result|=(BOCU1_START_POS_2+diff)<<8;
+        } else if(diff<=BOCU1_REACH_POS_3) {
+            /* three bytes */
+            diff-=BOCU1_REACH_POS_2+1;
+            result=0x03000000;
+
+            m=diff%BOCU1_TRAIL_COUNT;
+            diff/=BOCU1_TRAIL_COUNT;
+            result|=BOCU1_TRAIL_TO_BYTE(m);
+
+            m=diff%BOCU1_TRAIL_COUNT;
+            diff/=BOCU1_TRAIL_COUNT;
+            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+            result|=(BOCU1_START_POS_3+diff)<<16;
+        } else {
+            /* four bytes */
+            diff-=BOCU1_REACH_POS_3+1;
+
+            m=diff%BOCU1_TRAIL_COUNT;
+            diff/=BOCU1_TRAIL_COUNT;
+            result=BOCU1_TRAIL_TO_BYTE(m);
+
+            m=diff%BOCU1_TRAIL_COUNT;
+            diff/=BOCU1_TRAIL_COUNT;
+            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+            /*
+             * We know that / and % would deliver quotient 0 and rest=diff.
+             * Avoid division and modulo for performance.
+             */
+            result|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
+
+            result|=((uint32_t)BOCU1_START_POS_4)<<24;
+        }
+    } else {
+        /* two- to four-byte negative differences */
+        if(diff>=BOCU1_REACH_NEG_2) {
+            /* two bytes */
+            diff-=BOCU1_REACH_NEG_1;
+            result=0x02000000;
+
+            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+            result|=BOCU1_TRAIL_TO_BYTE(m);
+
+            result|=(BOCU1_START_NEG_2+diff)<<8;
+        } else if(diff>=BOCU1_REACH_NEG_3) {
+            /* three bytes */
+            diff-=BOCU1_REACH_NEG_2;
+            result=0x03000000;
+
+            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+            result|=BOCU1_TRAIL_TO_BYTE(m);
+
+            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+            result|=(BOCU1_START_NEG_3+diff)<<16;
+        } else {
+            /* four bytes */
+            diff-=BOCU1_REACH_NEG_3;
+
+            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+            result=BOCU1_TRAIL_TO_BYTE(m);
+
+            NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+            result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+            /*
+             * We know that NEGDIVMOD would deliver
+             * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
+             * Avoid division and modulo for performance.
+             */
+            m=diff+BOCU1_TRAIL_COUNT;
+            result|=BOCU1_TRAIL_TO_BYTE(m)<<16;
+
+            result|=BOCU1_MIN<<24;
+        }
+    }
+    return result;
+}
+
+
+static void U_CALLCONV
+_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+                             UErrorCode *pErrorCode) {
+    UConverter *cnv;
+    const UChar *source, *sourceLimit;
+    uint8_t *target;
+    int32_t targetCapacity;
+    int32_t *offsets;
+
+    int32_t prev, c, diff;
+
+    int32_t sourceIndex, nextSourceIndex;
+
+    /* set up the local pointers */
+    cnv=pArgs->converter;
+    source=pArgs->source;
+    sourceLimit=pArgs->sourceLimit;
+    target=(uint8_t *)pArgs->target;
+    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+    offsets=pArgs->offsets;
+
+    /* get the converter state from UConverter */
+    c=cnv->fromUChar32;
+    prev=(int32_t)cnv->fromUnicodeStatus;
+    if(prev==0) {
+        prev=BOCU1_ASCII_PREV;
+    }
+
+    /* sourceIndex=-1 if the current character began in the previous buffer */
+    sourceIndex= c==0 ? 0 : -1;
+    nextSourceIndex=0;
+
+    /* conversion loop */
+    if(c!=0 && targetCapacity>0) {
+        goto getTrail;
+    }
+
+fastSingle:
+    /* fast loop for single-byte differences */
+    /* use only one loop counter variable, targetCapacity, not also source */
+    diff=(int32_t)(sourceLimit-source);
+    if(targetCapacity>diff) {
+        targetCapacity=diff;
+    }
+    while(targetCapacity>0 && (c=*source)<0x3000) {
+        if(c<=0x20) {
+            if(c!=0x20) {
+                prev=BOCU1_ASCII_PREV;
+            }
+            *target++=(uint8_t)c;
+            *offsets++=nextSourceIndex++;
+            ++source;
+            --targetCapacity;
+        } else {
+            diff=c-prev;
+            if(DIFF_IS_SINGLE(diff)) {
+                prev=BOCU1_SIMPLE_PREV(c);
+                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+                *offsets++=nextSourceIndex++;
+                ++source;
+                --targetCapacity;
+            } else {
+                break;
+            }
+        }
+    }
+    /* restore real values */
+    targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
+    sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
+
+    /* regular loop for all cases */
+    while(source<sourceLimit) {
+        if(targetCapacity>0) {
+            c=*source++;
+            ++nextSourceIndex;
+
+            if(c<=0x20) {
+                /*
+                 * ISO C0 control & space:
+                 * Encode directly for MIME compatibility,
+                 * and reset state except for space, to not disrupt compression.
+                 */
+                if(c!=0x20) {
+                    prev=BOCU1_ASCII_PREV;
+                }
+                *target++=(uint8_t)c;
+                *offsets++=sourceIndex;
+                --targetCapacity;
+
+                sourceIndex=nextSourceIndex;
+                continue;
+            }
+
+            if(U16_IS_LEAD(c)) {
+getTrail:
+                if(source<sourceLimit) {
+                    /* test the following code unit */
+                    UChar trail=*source;
+                    if(U16_IS_TRAIL(trail)) {
+                        ++source;
+                        ++nextSourceIndex;
+                        c=U16_GET_SUPPLEMENTARY(c, trail);
+                    }
+                } else {
+                    /* no more input */
+                    c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
+                    break;
+                }
+            }
+
+            /*
+             * all other Unicode code points c==U+0021..U+10ffff
+             * are encoded with the difference c-prev
+             *
+             * a new prev is computed from c,
+             * placed in the middle of a 0x80-block (for most small scripts) or
+             * in the middle of the Unihan and Hangul blocks
+             * to statistically minimize the following difference
+             */
+            diff=c-prev;
+            prev=BOCU1_PREV(c);
+            if(DIFF_IS_SINGLE(diff)) {
+                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+                *offsets++=sourceIndex;
+                --targetCapacity;
+                sourceIndex=nextSourceIndex;
+                if(c<0x3000) {
+                    goto fastSingle;
+                }
+            } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
+                /* optimize 2-byte case */
+                int32_t m;
+
+                if(diff>=0) {
+                    diff-=BOCU1_REACH_POS_1+1;
+                    m=diff%BOCU1_TRAIL_COUNT;
+                    diff/=BOCU1_TRAIL_COUNT;
+                    diff+=BOCU1_START_POS_2;
+                } else {
+                    diff-=BOCU1_REACH_NEG_1;
+                    NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+                    diff+=BOCU1_START_NEG_2;
+                }
+                *target++=(uint8_t)diff;
+                *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
+                *offsets++=sourceIndex;
+                *offsets++=sourceIndex;
+                targetCapacity-=2;
+                sourceIndex=nextSourceIndex;
+            } else {
+                int32_t length; /* will be 2..4 */
+
+                diff=packDiff(diff);
+                length=BOCU1_LENGTH_FROM_PACKED(diff);
+
+                /* write the output character bytes from diff and length */
+                /* from the first if in the loop we know that targetCapacity>0 */
+                if(length<=targetCapacity) {
+                    switch(length) {
+                        /* each branch falls through to the next one */
+                    case 4:
+                        *target++=(uint8_t)(diff>>24);
+                        *offsets++=sourceIndex;
+                        U_FALLTHROUGH;
+                    case 3:
+                        *target++=(uint8_t)(diff>>16);
+                        *offsets++=sourceIndex;
+                        U_FALLTHROUGH;
+                    case 2:
+                        *target++=(uint8_t)(diff>>8);
+                        *offsets++=sourceIndex;
+                    /* case 1: handled above */
+                        *target++=(uint8_t)diff;
+                        *offsets++=sourceIndex;
+                        U_FALLTHROUGH;
+                    default:
+                        /* will never occur */
+                        break;
+                    }
+                    targetCapacity-=length;
+                    sourceIndex=nextSourceIndex;
+                } else {
+                    uint8_t *charErrorBuffer;
+
+                    /*
+                     * We actually do this backwards here:
+                     * In order to save an intermediate variable, we output
+                     * first to the overflow buffer what does not fit into the
+                     * regular target.
+                     */
+                    /* we know that 1<=targetCapacity<length<=4 */
+                    length-=targetCapacity;
+                    charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
+                    switch(length) {
+                        /* each branch falls through to the next one */
+                    case 3:
+                        *charErrorBuffer++=(uint8_t)(diff>>16);
+                        U_FALLTHROUGH;
+                    case 2:
+                        *charErrorBuffer++=(uint8_t)(diff>>8);
+                        U_FALLTHROUGH;
+                    case 1:
+                        *charErrorBuffer=(uint8_t)diff;
+                        U_FALLTHROUGH;
+                    default:
+                        /* will never occur */
+                        break;
+                    }
+                    cnv->charErrorBufferLength=(int8_t)length;
+
+                    /* now output what fits into the regular target */
+                    diff>>=8*length; /* length was reduced by targetCapacity */
+                    switch(targetCapacity) {
+                        /* each branch falls through to the next one */
+                    case 3:
+                        *target++=(uint8_t)(diff>>16);
+                        *offsets++=sourceIndex;
+                        U_FALLTHROUGH;
+                    case 2:
+                        *target++=(uint8_t)(diff>>8);
+                        *offsets++=sourceIndex;
+                        U_FALLTHROUGH;
+                    case 1:
+                        *target++=(uint8_t)diff;
+                        *offsets++=sourceIndex;
+                        U_FALLTHROUGH;
+                    default:
+                        /* will never occur */
+                        break;
+                    }
+
+                    /* target overflow */
+                    targetCapacity=0;
+                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                    break;
+                }
+            }
+        } else {
+            /* target is full */
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+            break;
+        }
+    }
+
+    /* set the converter state back into UConverter */
+    cnv->fromUChar32= c<0 ? -c : 0;
+    cnv->fromUnicodeStatus=(uint32_t)prev;
+
+    /* write back the updated pointers */
+    pArgs->source=source;
+    pArgs->target=(char *)target;
+    pArgs->offsets=offsets;
+}
+
+/*
+ * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
+ * If a change is made in the original function, then either
+ * change this function the same way or
+ * re-copy the original function and remove the variables
+ * offsets, sourceIndex, and nextSourceIndex.
+ */
+static void U_CALLCONV
+_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
+                  UErrorCode *pErrorCode) {
+    UConverter *cnv;
+    const UChar *source, *sourceLimit;
+    uint8_t *target;
+    int32_t targetCapacity;
+
+    int32_t prev, c, diff;
+
+    /* set up the local pointers */
+    cnv=pArgs->converter;
+    source=pArgs->source;
+    sourceLimit=pArgs->sourceLimit;
+    target=(uint8_t *)pArgs->target;
+    targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+
+    /* get the converter state from UConverter */
+    c=cnv->fromUChar32;
+    prev=(int32_t)cnv->fromUnicodeStatus;
+    if(prev==0) {
+        prev=BOCU1_ASCII_PREV;
+    }
+
+    /* conversion loop */
+    if(c!=0 && targetCapacity>0) {
+        goto getTrail;
+    }
+
+fastSingle:
+    /* fast loop for single-byte differences */
+    /* use only one loop counter variable, targetCapacity, not also source */
+    diff=(int32_t)(sourceLimit-source);
+    if(targetCapacity>diff) {
+        targetCapacity=diff;
+    }
+    while(targetCapacity>0 && (c=*source)<0x3000) {
+        if(c<=0x20) {
+            if(c!=0x20) {
+                prev=BOCU1_ASCII_PREV;
+            }
+            *target++=(uint8_t)c;
+        } else {
+            diff=c-prev;
+            if(DIFF_IS_SINGLE(diff)) {
+                prev=BOCU1_SIMPLE_PREV(c);
+                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+            } else {
+                break;
+            }
+        }
+        ++source;
+        --targetCapacity;
+    }
+    /* restore real values */
+    targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
+
+    /* regular loop for all cases */
+    while(source<sourceLimit) {
+        if(targetCapacity>0) {
+            c=*source++;
+
+            if(c<=0x20) {
+                /*
+                 * ISO C0 control & space:
+                 * Encode directly for MIME compatibility,
+                 * and reset state except for space, to not disrupt compression.
+                 */
+                if(c!=0x20) {
+                    prev=BOCU1_ASCII_PREV;
+                }
+                *target++=(uint8_t)c;
+                --targetCapacity;
+                continue;
+            }
+
+            if(U16_IS_LEAD(c)) {
+getTrail:
+                if(source<sourceLimit) {
+                    /* test the following code unit */
+                    UChar trail=*source;
+                    if(U16_IS_TRAIL(trail)) {
+                        ++source;
+                        c=U16_GET_SUPPLEMENTARY(c, trail);
+                    }
+                } else {
+                    /* no more input */
+                    c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
+                    break;
+                }
+            }
+
+            /*
+             * all other Unicode code points c==U+0021..U+10ffff
+             * are encoded with the difference c-prev
+             *
+             * a new prev is computed from c,
+             * placed in the middle of a 0x80-block (for most small scripts) or
+             * in the middle of the Unihan and Hangul blocks
+             * to statistically minimize the following difference
+             */
+            diff=c-prev;
+            prev=BOCU1_PREV(c);
+            if(DIFF_IS_SINGLE(diff)) {
+                *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+                --targetCapacity;
+                if(c<0x3000) {
+                    goto fastSingle;
+                }
+            } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
+                /* optimize 2-byte case */
+                int32_t m;
+
+                if(diff>=0) {
+                    diff-=BOCU1_REACH_POS_1+1;
+                    m=diff%BOCU1_TRAIL_COUNT;
+                    diff/=BOCU1_TRAIL_COUNT;
+                    diff+=BOCU1_START_POS_2;
+                } else {
+                    diff-=BOCU1_REACH_NEG_1;
+                    NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+                    diff+=BOCU1_START_NEG_2;
+                }
+                *target++=(uint8_t)diff;
+                *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
+                targetCapacity-=2;
+            } else {
+                int32_t length; /* will be 2..4 */
+
+                diff=packDiff(diff);
+                length=BOCU1_LENGTH_FROM_PACKED(diff);
+
+                /* write the output character bytes from diff and length */
+                /* from the first if in the loop we know that targetCapacity>0 */
+                if(length<=targetCapacity) {
+                    switch(length) {
+                        /* each branch falls through to the next one */
+                    case 4:
+                        *target++=(uint8_t)(diff>>24);
+                        U_FALLTHROUGH;
+                    case 3:
+                        *target++=(uint8_t)(diff>>16);
+                    /* case 2: handled above */
+                        *target++=(uint8_t)(diff>>8);
+                    /* case 1: handled above */
+                        *target++=(uint8_t)diff;
+                        U_FALLTHROUGH;
+                    default:
+                        /* will never occur */
+                        break;
+                    }
+                    targetCapacity-=length;
+                } else {
+                    uint8_t *charErrorBuffer;
+
+                    /*
+                     * We actually do this backwards here:
+                     * In order to save an intermediate variable, we output
+                     * first to the overflow buffer what does not fit into the
+                     * regular target.
+                     */
+                    /* we know that 1<=targetCapacity<length<=4 */
+                    length-=targetCapacity;
+                    charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
+                    switch(length) {
+                        /* each branch falls through to the next one */
+                    case 3:
+                        *charErrorBuffer++=(uint8_t)(diff>>16);
+                        U_FALLTHROUGH;
+                    case 2:
+                        *charErrorBuffer++=(uint8_t)(diff>>8);
+                        U_FALLTHROUGH;
+                    case 1:
+                        *charErrorBuffer=(uint8_t)diff;
+                        U_FALLTHROUGH;
+                    default:
+                        /* will never occur */
+                        break;
+                    }
+                    cnv->charErrorBufferLength=(int8_t)length;
+
+                    /* now output what fits into the regular target */
+                    diff>>=8*length; /* length was reduced by targetCapacity */
+                    switch(targetCapacity) {
+                        /* each branch falls through to the next one */
+                    case 3:
+                        *target++=(uint8_t)(diff>>16);
+                        U_FALLTHROUGH;
+                    case 2:
+                        *target++=(uint8_t)(diff>>8);
+                        U_FALLTHROUGH;
+                    case 1:
+                        *target++=(uint8_t)diff;
+                        U_FALLTHROUGH;
+                    default:
+                        /* will never occur */
+                        break;
+                    }
+
+                    /* target overflow */
+                    targetCapacity=0;
+                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                    break;
+                }
+            }
+        } else {
+            /* target is full */
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+            break;
+        }
+    }
+
+    /* set the converter state back into UConverter */
+    cnv->fromUChar32= c<0 ? -c : 0;
+    cnv->fromUnicodeStatus=(uint32_t)prev;
+
+    /* write back the updated pointers */
+    pArgs->source=source;
+    pArgs->target=(char *)target;
+}
+
+/* BOCU-1-to-Unicode conversion functions ----------------------------------- */
+
+/**
+ * Function for BOCU-1 decoder; handles multi-byte lead bytes.
+ *
+ * @param b lead byte;
+ *          BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
+ * @return (diff<<2)|count
+ */
+static inline int32_t
+decodeBocu1LeadByte(int32_t b) {
+    int32_t diff, count;
+
+    if(b>=BOCU1_START_NEG_2) {
+        /* positive difference */
+        if(b<BOCU1_START_POS_3) {
+            /* two bytes */
+            diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
+            count=1;
+        } else if(b<BOCU1_START_POS_4) {
+            /* three bytes */
+            diff=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
+            count=2;
+        } else {
+            /* four bytes */
+            diff=BOCU1_REACH_POS_3+1;
+            count=3;
+        }
+    } else {
+        /* negative difference */
+        if(b>=BOCU1_START_NEG_3) {
+            /* two bytes */
+            diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
+            count=1;
+        } else if(b>BOCU1_MIN) {
+            /* three bytes */
+            diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
+            count=2;
+        } else {
+            /* four bytes */
+            diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
+            count=3;
+        }
+    }
+
+    /* return the state for decoding the trail byte(s) */
+    return (diff<<2)|count;
+}
+
+/**
+ * Function for BOCU-1 decoder; handles multi-byte trail bytes.
+ *
+ * @param count number of remaining trail bytes including this one
+ * @param b trail byte
+ * @return new delta for diff including b - <0 indicates an error
+ *
+ * @see decodeBocu1
+ */
+static inline int32_t
+decodeBocu1TrailByte(int32_t count, int32_t b) {
+    if(b<=0x20) {
+        /* skip some C0 controls and make the trail byte range contiguous */
+        b=bocu1ByteToTrail[b];
+        /* b<0 for an illegal trail byte value will result in return<0 below */
+#if BOCU1_MAX_TRAIL<0xff
+    } else if(b>BOCU1_MAX_TRAIL) {
+        return -99;
+#endif
+    } else {
+        b-=BOCU1_TRAIL_BYTE_OFFSET;
+    }
+
+    /* add trail byte into difference and decrement count */
+    if(count==1) {
+        return b;
+    } else if(count==2) {
+        return b*BOCU1_TRAIL_COUNT;
+    } else /* count==3 */ {
+        return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
+    }
+}
+
+static void U_CALLCONV
+_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                           UErrorCode *pErrorCode) {
+    UConverter *cnv;
+    const uint8_t *source, *sourceLimit;
+    UChar *target;
+    const UChar *targetLimit;
+    int32_t *offsets;
+
+    int32_t prev, count, diff, c;
+
+    int8_t byteIndex;
+    uint8_t *bytes;
+
+    int32_t sourceIndex, nextSourceIndex;
+
+    /* set up the local pointers */
+    cnv=pArgs->converter;
+    source=(const uint8_t *)pArgs->source;
+    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+    target=pArgs->target;
+    targetLimit=pArgs->targetLimit;
+    offsets=pArgs->offsets;
+
+    /* get the converter state from UConverter */
+    prev=(int32_t)cnv->toUnicodeStatus;
+    if(prev==0) {
+        prev=BOCU1_ASCII_PREV;
+    }
+    diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
+    count=diff&3;
+    diff>>=2;
+
+    byteIndex=cnv->toULength;
+    bytes=cnv->toUBytes;
+
+    /* sourceIndex=-1 if the current character began in the previous buffer */
+    sourceIndex=byteIndex==0 ? 0 : -1;
+    nextSourceIndex=0;
+
+    /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
+    if(count>0 && byteIndex>0 && target<targetLimit) {
+        goto getTrail;
+    }
+
+fastSingle:
+    /* fast loop for single-byte differences */
+    /* use count as the only loop counter variable */
+    diff=(int32_t)(sourceLimit-source);
+    count=(int32_t)(pArgs->targetLimit-target);
+    if(count>diff) {
+        count=diff;
+    }
+    while(count>0) {
+        if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
+            c=prev+(c-BOCU1_MIDDLE);
+            if(c<0x3000) {
+                *target++=(UChar)c;
+                *offsets++=nextSourceIndex++;
+                prev=BOCU1_SIMPLE_PREV(c);
+            } else {
+                break;
+            }
+        } else if(c<=0x20) {
+            if(c!=0x20) {
+                prev=BOCU1_ASCII_PREV;
+            }
+            *target++=(UChar)c;
+            *offsets++=nextSourceIndex++;
+        } else {
+            break;
+        }
+        ++source;
+        --count;
+    }
+    sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
+
+    /* decode a sequence of single and lead bytes */
+    while(source<sourceLimit) {
+        if(target>=targetLimit) {
+            /* target is full */
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+            break;
+        }
+
+        ++nextSourceIndex;
+        c=*source++;
+        if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
+            /* Write a code point directly from a single-byte difference. */
+            c=prev+(c-BOCU1_MIDDLE);
+            if(c<0x3000) {
+                *target++=(UChar)c;
+                *offsets++=sourceIndex;
+                prev=BOCU1_SIMPLE_PREV(c);
+                sourceIndex=nextSourceIndex;
+                goto fastSingle;
+            }
+        } else if(c<=0x20) {
+            /*
+             * Direct-encoded C0 control code or space.
+             * Reset prev for C0 control codes but not for space.
+             */
+            if(c!=0x20) {
+                prev=BOCU1_ASCII_PREV;
+            }
+            *target++=(UChar)c;
+            *offsets++=sourceIndex;
+            sourceIndex=nextSourceIndex;
+            continue;
+        } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
+            /* Optimize two-byte case. */
+            if(c>=BOCU1_MIDDLE) {
+                diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
+            } else {
+                diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
+            }
+
+            /* trail byte */
+            ++nextSourceIndex;
+            c=decodeBocu1TrailByte(1, *source++);
+            if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
+                bytes[0]=source[-2];
+                bytes[1]=source[-1];
+                byteIndex=2;
+                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                break;
+            }
+        } else if(c==BOCU1_RESET) {
+            /* only reset the state, no code point */
+            prev=BOCU1_ASCII_PREV;
+            sourceIndex=nextSourceIndex;
+            continue;
+        } else {
+            /*
+             * For multi-byte difference lead bytes, set the decoder state
+             * with the partial difference value from the lead byte and
+             * with the number of trail bytes.
+             */
+            bytes[0]=(uint8_t)c;
+            byteIndex=1;
+
+            diff=decodeBocu1LeadByte(c);
+            count=diff&3;
+            diff>>=2;
+getTrail:
+            for(;;) {
+                if(source>=sourceLimit) {
+                    goto endloop;
+                }
+                ++nextSourceIndex;
+                c=bytes[byteIndex++]=*source++;
+
+                /* trail byte in any position */
+                c=decodeBocu1TrailByte(count, c);
+                if(c<0) {
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    goto endloop;
+                }
+
+                diff+=c;
+                if(--count==0) {
+                    /* final trail byte, deliver a code point */
+                    byteIndex=0;
+                    c=prev+diff;
+                    if((uint32_t)c>0x10ffff) {
+                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                        goto endloop;
+                    }
+                    break;
+                }
+            }
+        }
+
+        /* calculate the next prev and output c */
+        prev=BOCU1_PREV(c);
+        if(c<=0xffff) {
+            *target++=(UChar)c;
+            *offsets++=sourceIndex;
+        } else {
+            /* output surrogate pair */
+            *target++=U16_LEAD(c);
+            if(target<targetLimit) {
+                *target++=U16_TRAIL(c);
+                *offsets++=sourceIndex;
+                *offsets++=sourceIndex;
+            } else {
+                /* target overflow */
+                *offsets++=sourceIndex;
+                cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
+                cnv->UCharErrorBufferLength=1;
+                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                break;
+            }
+        }
+        sourceIndex=nextSourceIndex;
+    }
+endloop:
+
+    if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
+        /* set the converter state in UConverter to deal with the next character */
+        cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
+        cnv->mode=0;
+    } else {
+        /* set the converter state back into UConverter */
+        cnv->toUnicodeStatus=(uint32_t)prev;
+        cnv->mode=(diff<<2)|count;
+    }
+    cnv->toULength=byteIndex;
+
+    /* write back the updated pointers */
+    pArgs->source=(const char *)source;
+    pArgs->target=target;
+    pArgs->offsets=offsets;
+    return;
+}
+
+/*
+ * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
+ * If a change is made in the original function, then either
+ * change this function the same way or
+ * re-copy the original function and remove the variables
+ * offsets, sourceIndex, and nextSourceIndex.
+ */
+static void U_CALLCONV
+_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
+                UErrorCode *pErrorCode) {
+    UConverter *cnv;
+    const uint8_t *source, *sourceLimit;
+    UChar *target;
+    const UChar *targetLimit;
+
+    int32_t prev, count, diff, c;
+
+    int8_t byteIndex;
+    uint8_t *bytes;
+
+    /* set up the local pointers */
+    cnv=pArgs->converter;
+    source=(const uint8_t *)pArgs->source;
+    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+    target=pArgs->target;
+    targetLimit=pArgs->targetLimit;
+
+    /* get the converter state from UConverter */
+    prev=(int32_t)cnv->toUnicodeStatus;
+    if(prev==0) {
+        prev=BOCU1_ASCII_PREV;
+    }
+    diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
+    count=diff&3;
+    diff>>=2;
+
+    byteIndex=cnv->toULength;
+    bytes=cnv->toUBytes;
+
+    /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
+    if(count>0 && byteIndex>0 && target<targetLimit) {
+        goto getTrail;
+    }
+
+fastSingle:
+    /* fast loop for single-byte differences */
+    /* use count as the only loop counter variable */
+    diff=(int32_t)(sourceLimit-source);
+    count=(int32_t)(pArgs->targetLimit-target);
+    if(count>diff) {
+        count=diff;
+    }
+    while(count>0) {
+        if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
+            c=prev+(c-BOCU1_MIDDLE);
+            if(c<0x3000) {
+                *target++=(UChar)c;
+                prev=BOCU1_SIMPLE_PREV(c);
+            } else {
+                break;
+            }
+        } else if(c<=0x20) {
+            if(c!=0x20) {
+                prev=BOCU1_ASCII_PREV;
+            }
+            *target++=(UChar)c;
+        } else {
+            break;
+        }
+        ++source;
+        --count;
+    }
+
+    /* decode a sequence of single and lead bytes */
+    while(source<sourceLimit) {
+        if(target>=targetLimit) {
+            /* target is full */
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+            break;
+        }
+
+        c=*source++;
+        if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
+            /* Write a code point directly from a single-byte difference. */
+            c=prev+(c-BOCU1_MIDDLE);
+            if(c<0x3000) {
+                *target++=(UChar)c;
+                prev=BOCU1_SIMPLE_PREV(c);
+                goto fastSingle;
+            }
+        } else if(c<=0x20) {
+            /*
+             * Direct-encoded C0 control code or space.
+             * Reset prev for C0 control codes but not for space.
+             */
+            if(c!=0x20) {
+                prev=BOCU1_ASCII_PREV;
+            }
+            *target++=(UChar)c;
+            continue;
+        } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
+            /* Optimize two-byte case. */
+            if(c>=BOCU1_MIDDLE) {
+                diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
+            } else {
+                diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
+            }
+
+            /* trail byte */
+            c=decodeBocu1TrailByte(1, *source++);
+            if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
+                bytes[0]=source[-2];
+                bytes[1]=source[-1];
+                byteIndex=2;
+                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                break;
+            }
+        } else if(c==BOCU1_RESET) {
+            /* only reset the state, no code point */
+            prev=BOCU1_ASCII_PREV;
+            continue;
+        } else {
+            /*
+             * For multi-byte difference lead bytes, set the decoder state
+             * with the partial difference value from the lead byte and
+             * with the number of trail bytes.
+             */
+            bytes[0]=(uint8_t)c;
+            byteIndex=1;
+
+            diff=decodeBocu1LeadByte(c);
+            count=diff&3;
+            diff>>=2;
+getTrail:
+            for(;;) {
+                if(source>=sourceLimit) {
+                    goto endloop;
+                }
+                c=bytes[byteIndex++]=*source++;
+
+                /* trail byte in any position */
+                c=decodeBocu1TrailByte(count, c);
+                if(c<0) {
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                    goto endloop;
+                }
+
+                diff+=c;
+                if(--count==0) {
+                    /* final trail byte, deliver a code point */
+                    byteIndex=0;
+                    c=prev+diff;
+                    if((uint32_t)c>0x10ffff) {
+                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                        goto endloop;
+                    }
+                    break;
+                }
+            }
+        }
+
+        /* calculate the next prev and output c */
+        prev=BOCU1_PREV(c);
+        if(c<=0xffff) {
+            *target++=(UChar)c;
+        } else {
+            /* output surrogate pair */
+            *target++=U16_LEAD(c);
+            if(target<targetLimit) {
+                *target++=U16_TRAIL(c);
+            } else {
+                /* target overflow */
+                cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
+                cnv->UCharErrorBufferLength=1;
+                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                break;
+            }
+        }
+    }
+endloop:
+
+    if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
+        /* set the converter state in UConverter to deal with the next character */
+        cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
+        cnv->mode=0;
+    } else {
+        /* set the converter state back into UConverter */
+        cnv->toUnicodeStatus=(uint32_t)prev;
+        cnv->mode=(diff<<2)|count;
+    }
+    cnv->toULength=byteIndex;
+
+    /* write back the updated pointers */
+    pArgs->source=(const char *)source;
+    pArgs->target=target;
+    return;
+}
+
+/* miscellaneous ------------------------------------------------------------ */
+
+static const UConverterImpl _Bocu1Impl={
+    UCNV_BOCU1,
+
+    NULL,
+    NULL,
+
+    NULL,
+    NULL,
+    NULL,
+
+    _Bocu1ToUnicode,
+    _Bocu1ToUnicodeWithOffsets,
+    _Bocu1FromUnicode,
+    _Bocu1FromUnicodeWithOffsets,
+    NULL,
+
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    ucnv_getCompleteUnicodeSet,
+
+    NULL,
+    NULL
+};
+
+static const UConverterStaticData _Bocu1StaticData={
+    sizeof(UConverterStaticData),
+    "BOCU-1",
+    1214, /* CCSID for BOCU-1 */
+    UCNV_IBM, UCNV_BOCU1,
+    1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
+    { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
+    FALSE, FALSE,
+    0,
+    0,
+    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _Bocu1Data=
+        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Bocu1StaticData, &_Bocu1Impl);
+
+#endif
author	bruvzg <7645683+bruvzg@users.noreply.github.com>	2020-08-11 12:10:23 +0300
committer	bruvzg <7645683+bruvzg@users.noreply.github.com>	2020-11-26 13:55:27 +0200
commit	b9f441e81e89728f284f61991e14748208817efd (patch)
tree	a3729bf0b71f11b324d2dff8838e914780917701 /thirdparty/icu4c/common/ucnvbocu.cpp
parent	493da99269b3a111cf58e0352495e25ee49c5f84 (diff)